In [31]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset


In [32]:
file_path = 'almonds/Almond.csv'
almond_data = pd.read_csv(file_path)

almond_data.info(), print(almond_data.head())

almond_data_cleaned = almond_data.drop(columns=['Unnamed: 0'])

features = almond_data_cleaned.drop(columns=['Type'])
target = almond_data_cleaned['Type']

scaler = MinMaxScaler()
features_normalized = pd.DataFrame(scaler.fit_transform(features), columns=features.columns)

features_normalized_filled = features_normalized.fillna(-1)

encoder = OneHotEncoder(sparse=False)
target_encoded_filled = pd.DataFrame(encoder.fit_transform(target.values.reshape(-1, 1)), columns=encoder.categories_[0])

preprocessed_data_filled = pd.concat([features_normalized_filled, target_encoded_filled], axis=1)

for index, row in preprocessed_data_filled.iterrows():
        for col in preprocessed_data_filled.columns:
            value = row[col]
            if not (0 <= value <= 1 or value == -1):
                print(f"Invalid value found: {value} at row {index}, column '{col}'")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2803 entries, 0 to 2802
Data columns (total 14 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Unnamed: 0                2803 non-null   int64  
 1   Length (major axis)       1946 non-null   float64
 2   Width (minor axis)        1861 non-null   float64
 3   Thickness (depth)         1799 non-null   float64
 4   Area                      2803 non-null   float64
 5   Perimeter                 2803 non-null   float64
 6   Roundness                 1946 non-null   float64
 7   Solidity                  2803 non-null   float64
 8   Compactness               2803 non-null   float64
 9   Aspect Ratio              1004 non-null   float64
 10  Eccentricity              1004 non-null   float64
 11  Extent                    2803 non-null   float64
 12  Convex hull(convex area)  2803 non-null   float64
 13  Type                      2803 non-null   object 
dtypes: float

In [33]:

output_file_path = 'almonds/Almond_Prepped.csv'
preprocessed_data_filled.to_csv(output_file_path, index=False)

In [34]:
Prepped_file_path = 'almonds/Almond_Prepped.csv'
preprocessed_data_filled = pd.read_csv(Prepped_file_path)
print(preprocessed_data_filled.sample(10))


      Length (major axis)  Width (minor axis)  Thickness (depth)      Area  \
837              0.442068           -1.000000           0.456272  0.267524   
1921            -1.000000            0.260692           0.355551  0.056250   
1573             0.326015           -1.000000           0.089350  0.089309   
2724            -1.000000            0.436433           0.685816  0.148147   
1982             0.489893           -1.000000           0.664943  0.353583   
318              0.414994            0.592607          -1.000000  0.451685   
124              0.696864            0.828816          -1.000000  0.761812   
2342            -1.000000            0.287797           0.381158  0.064340   
354              0.124878            0.224931          -1.000000  0.158850   
1454             0.329117           -1.000000           0.226752  0.129683   

      Perimeter  Roundness  Solidity  Compactness  Aspect Ratio  Eccentricity  \
837    0.328293   0.374171  0.920623     0.086263     -1.000

In [35]:
X = preprocessed_data_filled.drop(columns=['MAMRA', 'REGULAR', 'SANORA']).values
Y = preprocessed_data_filled[['MAMRA', 'REGULAR', 'SANORA']].values

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
X_train, X_eval, Y_train, Y_eval = train_test_split(X_train, Y_train, test_size=0.375, random_state=42)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
Y_train_tensor = torch.tensor(Y_train, dtype=torch.float32)
X_eval_tensor = torch.tensor(X_eval, dtype=torch.float32)
Y_eval_tensor = torch.tensor(Y_eval, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32)

train_dataset = TensorDataset(X_train_tensor, Y_train_tensor)
eval_dataset = TensorDataset(X_eval_tensor, Y_eval_tensor)
test_dataset = TensorDataset(X_test_tensor, Y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
eval_loader = DataLoader(eval_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [36]:
class NN(nn.Module):
    def __init__(self, input_size, hidden_layers, output_size, actFunc = 'ReLU'):
        super(NN, self).__init__()
        self.inputLayer = nn.Linear(input_size, hidden_layers[0])
        self.hiddenLayers = []
        for i in range(len(hidden_layers)-1):
            self.hiddenLayers.append(nn.Linear(hidden_layers[i], hidden_layers[i+1]))
        self.outputLayer = nn.Linear(hidden_layers[-1], output_size)
        self.actFunc = nn.ReLU()
        if (actFunc == 'Sigmoid'):
            self.actFunc = nn.Sigmoid()
        if (actFunc == 'TanH'):
            self.actFunc = nn.Tanh()
        if (actFunc == 'TanH'):
            self.actFunc = nn.Tanh()
        if (actFunc == 'TanH'):
            self.actFunc = nn.Tanh()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.actFunc(self.inputLayer(x))
        for layer in self.hiddenLayers:
            x = self.actFunc(layer(x))
        x = self.outputLayer(x)
        x = self.softmax(x)
        return x

In [37]:

def train_model(optimizer, model, loader, num_epochs, verbose = 1):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in loader:
            outputs = model(inputs)
            criterion = nn.CrossEntropyLoss()
            loss = criterion(outputs, torch.max(labels, 1)[1])
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        if(verbose == 1):
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(loader):.4f}')

def test_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            labels_class = torch.max(labels, 1)[1]
            total += labels.size(0)
            correct += (predicted == labels_class).sum().item()
    accuracy = 100 * (correct / total)
    print(f'Test Accuracy: {accuracy:.2f}%')

In [38]:
def simulate(hiddenLayers, activationFunction, train_loader, eval_loader, epochs = 20):
    input_size = 12
    output_size = 3
    
    hidden_size = [4]
    for _ in range(hiddenLayers - 1):
        next_number = hidden_size[0] * 2
        hidden_size.insert(0, next_number)
    
    learning_rate = 0.001
    model = NN(input_size, hidden_size, output_size, activationFunction)
    optimizer_adam = optim.Adam(model.parameters(), lr=learning_rate)

    train_model(optimizer_adam, model, train_loader, epochs, verbose=0)
    test_model(model, eval_loader)

    return model


Epoch [1/20], Loss: 1.1040
Epoch [2/20], Loss: 1.1004
Epoch [3/20], Loss: 1.0986
Epoch [4/20], Loss: 1.0962
Epoch [5/20], Loss: 1.0960
Epoch [6/20], Loss: 1.0948
Epoch [7/20], Loss: 1.0914
Epoch [8/20], Loss: 1.0894
Epoch [9/20], Loss: 1.0856
Epoch [10/20], Loss: 1.0783
Epoch [11/20], Loss: 1.0757
Epoch [12/20], Loss: 1.0695
Epoch [13/20], Loss: 1.0628
Epoch [14/20], Loss: 1.0585
Epoch [15/20], Loss: 1.0519
Epoch [16/20], Loss: 1.0468
Epoch [17/20], Loss: 1.0414
Epoch [18/20], Loss: 1.0360
Epoch [19/20], Loss: 1.0322
Epoch [20/20], Loss: 1.0306
Test Accuracy: 50.80%
