# 1.Import Libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import joblib

# 2. Load Data

In [2]:
# Load the data
data = pd.read_csv('nuts.csv')

# Labels
# 0 -> M3
# 1 -> M4
# 2 -> M5

# Assuming the last column is the label
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
print(X.shape, y.shape)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Saving Scaler
joblib.dump(scaler, 'scaler.pkl')

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Create DataLoader for batching
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

(429, 10) (429,)


# 3. Training the data

## 3.1 Neural Network

In [3]:
# Define the neural network architecture
class NutClassifier(nn.Module):
    def __init__(self, input_size, num_classes):
        super(NutClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, num_classes)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Determine the number of unique classes
num_classes = len(np.unique(y))
print(f'Number of classes: {num_classes}')

# Instantiate the model with the correct number of input features and classes
model = NutClassifier(input_size=X_train.shape[1], num_classes=num_classes)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

# Evaluate the model on the test set
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on test set: {100 * correct / total}%')

Number of classes: 3
Epoch 1/20, Loss: 1.0900390906767412
Epoch 2/20, Loss: 1.0302428982474587
Epoch 3/20, Loss: 0.9666505022482439
Epoch 4/20, Loss: 0.8820253718983043
Epoch 5/20, Loss: 0.779521416534077
Epoch 6/20, Loss: 0.6722957708618857
Epoch 7/20, Loss: 0.5664061226628043
Epoch 8/20, Loss: 0.46976642175154254
Epoch 9/20, Loss: 0.38355389779264276
Epoch 10/20, Loss: 0.316023436459628
Epoch 11/20, Loss: 0.25780214640227234
Epoch 12/20, Loss: 0.21449925005435944
Epoch 13/20, Loss: 0.17857433381405743
Epoch 14/20, Loss: 0.15534769811413504
Epoch 15/20, Loss: 0.13059652528979562
Epoch 16/20, Loss: 0.11839736117558046
Epoch 17/20, Loss: 0.10292959755117242
Epoch 18/20, Loss: 0.09183757345784795
Epoch 19/20, Loss: 0.08278955062004653
Epoch 20/20, Loss: 0.07746890492059967
Accuracy on test set: 100.0%


## 3.2 Naive Bayes

In [4]:
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Train Naive Bayes model
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)

# Test the model
y_pred_nb = nb_model.predict(X_test)
nb_accuracy = accuracy_score(y_test, y_pred_nb)
print(f'Naive Bayes Accuracy: {nb_accuracy * 100:.2f}%')

# Save the model
joblib.dump(nb_model, './model/models/naive_bayes_model.pkl')

Naive Bayes Accuracy: 83.72%


['./model/models/naive_bayes_model.pkl']

## 3.3 Decision Tree

In [5]:
from sklearn.tree import DecisionTreeClassifier

# Train Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)

# Test the model
y_pred_dt = dt_model.predict(X_test)
dt_accuracy = accuracy_score(y_test, y_pred_dt)
print(f'Decision Tree Accuracy: {dt_accuracy * 100:.2f}%')

# Save the model
joblib.dump(dt_model, './model/models/decision_tree_model.pkl')

Decision Tree Accuracy: 90.70%


['./model/models/decision_tree_model.pkl']

## 3.4 Random Forest

In [6]:
from sklearn.ensemble import RandomForestClassifier

# Train Random Forest model
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

# Test the model
y_pred_rf = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)
print(f'Random Forest Accuracy: {rf_accuracy * 100:.2f}%')

# Save the model
joblib.dump(rf_model, './model/models/random_forest_model.pkl')

Random Forest Accuracy: 95.35%


['./model/models/random_forest_model.pkl']

## 3.5 Support Vector Machine (SVM)

In [7]:
from sklearn.svm import SVC

# Train SVM model
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train, y_train)

# Test the model
y_pred_svm = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, y_pred_svm)
print(f'SVM Accuracy: {svm_accuracy * 100:.2f}%')

# Save the model
joblib.dump(svm_model, './model/models/svm_model.pkl')

SVM Accuracy: 96.51%


['./model/models/svm_model.pkl']

## 3.6 k-Nearest Neighbors (k-NN)

In [8]:
from sklearn.neighbors import KNeighborsClassifier

# Train k-NN model
knn_model = KNeighborsClassifier(n_neighbors=3)
knn_model.fit(X_train, y_train)

# Test the model
y_pred_knn = knn_model.predict(X_test)
knn_accuracy = accuracy_score(y_test, y_pred_knn)
print(f'k-NN Accuracy: {knn_accuracy * 100:.2f}%')

# Save the model
joblib.dump(knn_model, './model/models/knn_model.pkl')

k-NN Accuracy: 91.86%


['./model/models/knn_model.pkl']

# 4. Save Model

In [9]:
# Save the model weights
torch.save(model.state_dict(), 'nut_classifier_weights.pth')

# 5. Example Usage

In [20]:
import joblib
import numpy as np
import torch

# Load the scaler
scaler = joblib.load('scaler.pkl')

# Example input data (replace with actual input data)
input_2 = [210, 1669.0, 182.0, 1452.5, 134.5, 1057.0]
example_input = np.array([input_2])

# Standardize the input data
example_input_standardized = scaler.transform(example_input)

# Convert the standardized input data to a PyTorch tensor
example_input_tensor = torch.tensor(example_input_standardized, dtype=torch.float32)

# Load the model weights (assuming the model architecture is defined as in the previous code)
model.load_state_dict(torch.load('nut_classifier_weights.pth'))
model.eval()  # Set the model to evaluation mode

# Make predictions using the loaded model
output = model(example_input_tensor)
_, predicted_class = torch.max(output.data, 1)
print(f'Predicted class: {predicted_class.item()}')

Predicted class: 2


  model.load_state_dict(torch.load('nut_classifier_weights.pth'))
