In [3]:
# Cell 1

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load your dataset
df = pd.read_csv("fruit_classification_dataset.csv")

df.head()


Unnamed: 0,size (cm),shape,weight (g),avg_price (₹),color,taste,fruit_name
0,25.4,round,3089.2,137.1,green,sweet,watermelon
1,24.6,round,3283.9,163.8,green,sweet,watermelon
2,7.8,round,319.0,91.3,green,sweet,custard apple
3,20.0,oval,1607.0,85.7,orange,sweet,papaya
4,10.2,long,131.5,37.8,yellow,sweet,banana


In [5]:
# Cell 2

# Columns in dataset
feature_cols = ["size (cm)", "shape", "weight (g)", "avg_price (₹)", "color", "taste"]
label_col = "fruit_name"

df = df.copy()

# Encode categorical feature columns
for col in ["shape", "color", "taste"]:
    df[col] = pd.factorize(df[col])[0]

# Encode label column
y_numeric, unique_labels = pd.factorize(df[label_col])

X = df[feature_cols].values
y = y_numeric
num_classes = len(unique_labels)

# One-hot encode target
Y = np.eye(num_classes)[y]

# Scale numeric features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split dataset
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, random_state=42
)


In [6]:
# Cell 3

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def softmax(z):
    exp = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp / np.sum(exp, axis=1, keepdims=True)


In [7]:
# Cell 4

class SimpleNN:
    def __init__(self, input_dim, hidden_dim, output_dim, lr=0.01):
        self.lr = lr
        
        # Weight initialization
        self.W1 = np.random.randn(input_dim, hidden_dim) * 0.01
        self.b1 = np.zeros((1, hidden_dim))
        
        self.W2 = np.random.randn(hidden_dim, output_dim) * 0.01
        self.b2 = np.zeros((1, output_dim))

    def forward(self, X):
        self.Z1 = X @ self.W1 + self.b1
        self.A1 = relu(self.Z1)

        self.Z2 = self.A1 @ self.W2 + self.b2
        self.A2 = softmax(self.Z2)
        return self.A2

    def backward(self, X, Y, out):
        m = X.shape[0]

        # Output layer gradients
        dZ2 = out - Y
        dW2 = (1/m) * (self.A1.T @ dZ2)
        db2 = (1/m) * np.sum(dZ2, axis=0, keepdims=True)

        # Hidden layer gradients
        dA1 = dZ2 @ self.W2.T
        dZ1 = dA1 * relu_derivative(self.Z1)
        dW1 = (1/m) * (X.T @ dZ1)
        db1 = (1/m) * np.sum(dZ1, axis=0, keepdims=True)

        # Update weights
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2

    def train(self, X, Y, epochs=2000):
        for epoch in range(epochs):
            out = self.forward(X)
            self.backward(X, Y, out)

            if epoch % 200 == 0:
                loss = -np.mean(np.sum(Y * np.log(out + 1e-10), axis=1))
                print(f"Epoch {epoch} | Loss = {loss:.4f}")

    def predict(self, X):
        out = self.forward(X)
        return np.argmax(out, axis=1)


In [8]:
# Cell 5

input_dim = X_train.shape[1]   # 6 features
hidden_dim = 10                # Tunable
output_dim = num_classes

nn = SimpleNN(input_dim, hidden_dim, output_dim, lr=0.01)
nn.train(X_train, Y_train, epochs=2000)


Epoch 0 | Loss = 2.9957
Epoch 200 | Loss = 2.9950
Epoch 400 | Loss = 2.9941
Epoch 600 | Loss = 2.9923
Epoch 800 | Loss = 2.9885
Epoch 1000 | Loss = 2.9799
Epoch 1200 | Loss = 2.9596
Epoch 1400 | Loss = 2.9126
Epoch 1600 | Loss = 2.8185
Epoch 1800 | Loss = 2.6889


In [9]:
# Cell 6

y_pred = nn.predict(X_test)
y_true = np.argmax(Y_test, axis=1)

accuracy = np.sum(y_pred == y_true) / len(y_true)

print("Test Accuracy:", round(accuracy*100, 2), "%")


Test Accuracy: 15.45 %
