In [47]:
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import torch.nn as nn

In [48]:
data = pd.read_csv("/Users/jesseseid/Jseid628/avocado_ripeness_dataset.csv")
data.head()

Unnamed: 0,firmness,hue,saturation,brightness,color_category,sound_db,weight_g,size_cm3,ripeness
0,14.5,19,40,26,black,34,175,261,ripe
1,71.7,53,69,75,green,69,206,185,pre-conditioned
2,88.5,60,94,46,dark green,79,220,143,hard
3,93.8,105,87,41,dark green,75,299,140,hard
4,42.5,303,58,32,purple,63,200,227,breaking


In [49]:
#Pre-processing

#Categorical to Numerical
le = LabelEncoder()
data['color_category'] = le.fit_transform(data['color_category'])

ripe_types = data['ripeness'].unique()
data['ripeness'].replace(ripe_types, list(range(0,5)), inplace=True)

labels = data['ripeness']
del data['ripeness']
features = data
data.head()

Unnamed: 0,firmness,hue,saturation,brightness,color_category,sound_db,weight_g,size_cm3
0,14.5,19,40,26,0,34,175,261
1,71.7,53,69,75,2,69,206,185
2,88.5,60,94,46,1,79,220,143
3,93.8,105,87,41,1,75,299,140
4,42.5,303,58,32,3,63,200,227


In [50]:
#Split into test and train before conversion to tensors
features_train, features_test, labels_train, labels_test = train_test_split(features, labels, test_size = 0.2, random_state=42)

scaler = MinMaxScaler()
features_train_scaled = scaler.fit_transform(features_train)
features_test_scaled = scaler.transform(features_test)

#Convert features and labels to torch tensors
features_train_tensor = torch.tensor(features_train_scaled, dtype=torch.float32)
features_test_tensor = torch.tensor(features_test_scaled, dtype=torch.float32)

labels_train_tensor = torch.tensor(labels_train.values, dtype=torch.int64)
labels_test_tensor = torch.tensor(labels_test.values, dtype=torch.int64)

test_train_tensors = [features_train_tensor,features_test_tensor, labels_train_tensor, labels_test_tensor]
for tensor in test_train_tensors:
    print(str(tensor.shape))

torch.Size([200, 8])
torch.Size([50, 8])
torch.Size([200])
torch.Size([50])


In [51]:
#Neural Network Definition
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(8, 50)
        self.fc2 = nn.Linear(50, 5)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x
model = NeuralNetwork()

In [52]:
#Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.1)

In [53]:
#Training
num_epochs = 2000
for epoch in range(num_epochs):
    #We calculate graidents again after having taken a step
    optimizer.zero_grad()
    outputs = model(features_train_tensor)
    loss = criterion(outputs, labels_train_tensor)
    loss.backward() #backpropagation
    optimizer.step() #Updates the weights 
    
    if (epoch % 200 == 0):
        print(f'Epoch [{epoch}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [0/2000], Loss: 1.6336
Epoch [200/2000], Loss: 0.4242
Epoch [400/2000], Loss: 0.2032
Epoch [600/2000], Loss: 0.1196
Epoch [800/2000], Loss: 0.0813
Epoch [1000/2000], Loss: 0.0606
Epoch [1200/2000], Loss: 0.0478
Epoch [1400/2000], Loss: 0.0393
Epoch [1600/2000], Loss: 0.0333
Epoch [1800/2000], Loss: 0.0288


In [54]:
def check_accuracy(model, features, labels):
    model.eval()
    with torch.no_grad():
        outputs = model(features)
        max_values, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / labels.size(0)

    return accuracy

check_accuracy(model, features_test_tensor, labels_test_tensor)

1.0

In [55]:
#Test
model.eval()
with torch.no_grad():
    test_outputs = model(features_test_tensor)
    _, predicted = torch.max(test_outputs, 1)

    correct = (predicted == labels_test_tensor).sum().item()
    accuracy = correct / labels_test_tensor.size(0)
    
    # Print the accuracy
    print(f'Accuracy on Test Data: {accuracy * 100:.2f}%')

Accuracy on Test Data: 100.00%
