In [1]:
#### Imports ####
from itertools import product
import pandas as pd
from sklearn import preprocessing
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import DataLoader, TensorDataset
import torch.nn as nn
import torch.optim as optim

In [2]:
#### Pre-processing ####################################################################################################################################################

#### Load the dataset ####
df = pd.read_csv('almond.csv')
# print(df.head())

#### Drop first column ####
df = df.drop(df.columns[0], axis=1)
# print(df.head())

#### Handle missing values with mean ####
missing_values_columns = ['Length (major axis)', 'Width (minor axis)', 'Thickness (depth)', 'Area', 'Perimeter', 'Roundness', 'Solidity', 'Compactness', 'Extent', 'Convex hull(convex area)']
df[missing_values_columns] = df[missing_values_columns].fillna(df[missing_values_columns].mean())
# print(df.head())

#### Impute the aspect ratio: length/ width & eccentricity: sqrt(1 - ( Width / Length ) **2 ) ####
df['Aspect Ratio'] = df['Length (major axis)'] / df['Width (minor axis)']
df['Eccentricity'] = np.sqrt(1 - (df['Width (minor axis)'] / df['Length (major axis)']) ** 2)
# print(df.head())

#### Convert 'Type' from categorical values to numerical ####
label_encoder = LabelEncoder()
df['Type'] = label_encoder.fit_transform(df['Type'])
# print(df.head())

#### Check for negative values ####
df = df[(df >= 0).all(axis=1)]
# negative_values = df[df < 0]
# print("Rows with negative values in any column:")
# print(df[(df < 0).any(axis=1)])

print(df)

      Length (major axis)  Width (minor axis)  Thickness (depth)     Area  \
0              290.609274          227.940628         127.759132  22619.0   
1              290.609274          234.188126         128.199509  23038.0   
2              290.609274          229.418610         125.796547  22386.5   
3              290.609274          232.763153         125.918808  22578.5   
4              290.609274          230.150742         107.253448  19068.0   
...                   ...                 ...                ...      ...   
2798           290.609274          192.709366         122.356506  18471.5   
2799           290.609274          186.254745         118.708961  17213.5   
2800           290.609274          186.196182         119.147224  17510.5   
2801           290.609274          188.660828         120.634438  17941.0   
2802           269.356903          176.023636         109.705378  36683.5   

       Perimeter  Roundness  Solidity  Compactness  Aspect Ratio  \
0     6

  result = getattr(ufunc, method)(*inputs, **kwargs)


In [3]:
#### Split the dataset #################################################################################################################################################
x = df.drop('Type', axis=1)
y = df['Type']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

print("x_train shape:", x_train.shape)
print("x_test shape:", x_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

x_train shape: (1953, 12)
x_test shape: (837, 12)
y_train shape: (1953,)
y_test shape: (837,)


In [4]:
#### Standardize the data ##############################################################################################################################################
standardized_scaler = StandardScaler()
x_train = standardized_scaler.fit_transform(x_train)
x_test = standardized_scaler.transform(x_test)

print(df)

      Length (major axis)  Width (minor axis)  Thickness (depth)     Area  \
0              290.609274          227.940628         127.759132  22619.0   
1              290.609274          234.188126         128.199509  23038.0   
2              290.609274          229.418610         125.796547  22386.5   
3              290.609274          232.763153         125.918808  22578.5   
4              290.609274          230.150742         107.253448  19068.0   
...                   ...                 ...                ...      ...   
2798           290.609274          192.709366         122.356506  18471.5   
2799           290.609274          186.254745         118.708961  17213.5   
2800           290.609274          186.196182         119.147224  17510.5   
2801           290.609274          188.660828         120.634438  17941.0   
2802           269.356903          176.023636         109.705378  36683.5   

       Perimeter  Roundness  Solidity  Compactness  Aspect Ratio  \
0     6

In [5]:
#### Convert data for neural network model #############################################################################################################################
x_train_tensor = torch.tensor(x_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)

x_test_tensor = torch.tensor(x_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32)

train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
test_dataset = TensorDataset(x_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [6]:
#### Neural network model ##############################################################################################################################################

#### Base neural network ####
class NeuralNetwork(nn.Module):
    def __init__(self, size_of_input, size_of_hidden, num_classes):
        super(NeuralNetwork, self).__init__()
        self.layerOne = nn.Linear(size_of_input, size_of_hidden)
        self.relu = nn.ReLU()
        self.layerTwo = nn.Linear(size_of_hidden, num_classes)

    def forward(self, x):
        x = self.layerOne(x)
        x = self.relu(x)
        x = self.layerTwo(x)
        return x

#### Set the parameters for the basic neural network model ####
size_of_input = 12
num_classes = 3

#### R Prop training algorithm####
def training_model_rprop(size_of_hidden, learning_rate, num_epochs, train_loader):
    model = NeuralNetwork(size_of_input, size_of_hidden, num_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Rprop(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            labels = labels.long()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    return loss.item(), model


#### Adam optimiser ####
def training_model_adam(size_of_hidden, learning_rate, num_epochs, train_loader):
    model = NeuralNetwork(size_of_input, size_of_hidden, num_classes)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    for epoch in range(num_epochs):
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            labels = labels.long()
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
    
    return loss.item(), model 

In [7]:
#### Function to train and evaluate model using K-Fold ####
def evaluate_model_kfold(optimizer_type, size_of_hidden, learning_rate, num_epochs, k_folds=5):
    kfold = KFold(n_splits=k_folds, shuffle=True, random_state=42)

    performance_scores = []
    loss_scores = [] 
    
    for fold, (train_idx, val_idx) in enumerate(kfold.split(x_train_tensor)):
        # Split data into training and validation sets for this fold
        x_train_fold = x_train_tensor[train_idx]
        y_train_fold = y_train_tensor[train_idx]
        x_val_fold = x_train_tensor[val_idx]
        y_val_fold = y_train_tensor[val_idx]
        
        train_dataset_fold = TensorDataset(x_train_fold, y_train_fold)
        train_loader_fold = DataLoader(train_dataset_fold, batch_size=16, shuffle=True)

        if optimizer_type == 'adam':
            loss, model = training_model_adam(size_of_hidden, learning_rate, num_epochs, train_loader_fold)
        elif optimizer_type == 'rprop':
            loss, model = training_model_rprop(size_of_hidden, learning_rate, num_epochs, train_loader_fold)
        else:
            raise ValueError(f"Unknown optimizer type: {optimizer_type}")
            
        loss_scores.append(loss)
        
        # Evaluate the model on the validation fold
        model.eval()
        with torch.no_grad():
            outputs = model(x_val_fold)
            _, predicted = torch.max(outputs, 1)
            accuracy = accuracy_score(y_val_fold.numpy(), predicted.numpy())
        
        performance_scores.append(accuracy)
    
    # Calculate the average performance and standard deviation of the training algorithms
    avg_performance = np.mean(performance_scores)
    std_performance = np.std(performance_scores)
    avg_loss = np.mean(loss_scores)
    
    print(f"Average {optimizer_type} performance across {k_folds} folds: {avg_performance:.4f}, Std: {std_performance:.4f}")
    print(f"Average {optimizer_type} loss across {k_folds} folds: {avg_loss:.4f}")
    
    return avg_performance, std_performance, avg_loss

In [None]:
# Hyperparameter combination 
hidden_sizes = [6, 8, 10, 16]
learning_rates = [0.001, 0.01]
num_epochs = 50

# Iterate through different hyperparameter combinations
for hidden_size, lr in product(hidden_sizes, learning_rates):
    print(f"Evaluating model with hidden size {hidden_size}, learning rate {lr}")
    
    # Evaluate with Adam optimizer
    evaluate_model_kfold(optimizer_type='adam', size_of_hidden=hidden_size, learning_rate=lr, num_epochs=num_epochs, k_folds=5)
    
    # Evaluate with RProp optimizer
    evaluate_model_kfold(optimizer_type='rprop', size_of_hidden=hidden_size, learning_rate=lr, num_epochs=num_epochs, k_folds=5)


Evaluating model with hidden size 6, learning rate 0.001
Average adam performance across 5 folds: 0.6411, Std: 0.0256
Average adam loss across 5 folds: 0.6639
Average rprop performance across 5 folds: 0.5090, Std: 0.0127
Average rprop loss across 5 folds: 1.0266
Evaluating model with hidden size 6, learning rate 0.01
Average adam performance across 5 folds: 0.6810, Std: 0.0389
Average adam loss across 5 folds: 0.6042


In [None]:
### Test rprop algorithm on testing data ####
def evaluate_rprop_on_test(size_of_hidden, learning_rate, num_epochs):
    final_loss, trained_model = training_model_rprop(size_of_hidden, learning_rate, num_epochs, test_loader)
    
    trained_model.eval()
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad(): 
        outputs = trained_model(x_test_tensor) 
        labels = y_test_tensor.long()
        test_loss = criterion(outputs, labels).item()
        _, predicted = torch.max(outputs, 1)
        
        accuracy = accuracy_score(y_test_tensor.numpy(), predicted.numpy())        
        print(f"RProp Model Accuracy on Test Set: {accuracy:.4f}")
        print(f"RProp Model Test Loss: {test_loss:.4f}")
    
    return accuracy, test_loss

size_of_hidden = 16
learning_rate = 0.01
num_epochs = 50
evaluate_rprop_on_test(size_of_hidden, learning_rate, num_epochs)


In [None]:
### Test adam algorithm on testing data ####
def evaluate_adam_on_test(size_of_hidden, learning_rate, num_epochs):
    final_loss, trained_model = training_model_adam(size_of_hidden, learning_rate, num_epochs, test_loader)
    
    trained_model.eval()
    criterion = nn.CrossEntropyLoss()

    with torch.no_grad(): 
        outputs = trained_model(x_test_tensor)
        labels = y_test_tensor.long()
        test_loss = criterion(outputs, labels).item()
        
        _, predicted = torch.max(outputs, 1)
        
        accuracy = accuracy_score(y_test_tensor.numpy(), predicted.numpy())        
        print(f"Adam Model Accuracy on Test Set: {accuracy:.4f}")
        print(f"Adam Model Test Loss: {test_loss:.4f}")

    return accuracy, test_loss

size_of_hidden = 16
learning_rate = 0.01
num_epochs = 50
evaluate_adam_on_test(size_of_hidden, learning_rate, num_epochs)


In [None]:
#### Hybrid model ####
def hybrid_training_model(size_of_hidden, learning_rate_adam, learning_rate_rprop, num_epochs, train_loader):
    model = NeuralNetwork(size_of_input, size_of_hidden, num_classes)
    criterion = nn.CrossEntropyLoss()

    optimizer_adam = optim.Adam(model.parameters(), lr=learning_rate_adam)
    optimizer_rprop = optim.Rprop(model.parameters(), lr=learning_rate_rprop)

    weight_update_correlations = [] # Store the weight update differences between Adam and RProp
    final_loss = 0.0

    for epoch in range(num_epochs):
        epoch_loss = 0.0 
        for inputs, labels in train_loader:
            optimizer_adam.zero_grad()
            optimizer_rprop.zero_grad()

            outputs = model(inputs)
            labels = labels.long()
            loss = criterion(outputs, labels)
            loss.backward()

            adam_grads = [param.grad.clone().detach() for param in model.parameters()]
            rprop_grads = [param.grad.clone().detach() for param in model.parameters()]

            with torch.no_grad():
                for param, adam_grad, rprop_grad in zip(model.parameters(), adam_grads, rprop_grads):
                    avg_update = (adam_grad + rprop_grad) / 2.0
                    param.grad.copy_(avg_update)

            for adam_grad, rprop_grad in zip(adam_grads, rprop_grads):
                if adam_grad.numel() > 0 and rprop_grad.numel() > 0:
                    correlation = torch.corrcoef(torch.stack([adam_grad.flatten(), rprop_grad.flatten()]))
                    weight_update_correlations.append(correlation[0, 1].item()) 

            optimizer_adam.step()
            optimizer_rprop.step()

            epoch_loss += loss.item()
            
    final_loss = epoch_loss / len(train_loader)      
    avg_correlation = np.mean(weight_update_correlations)
    print(f"Average correlation between Adam and RProp updates: {avg_correlation:.4f}")

    return final_loss, model

In [None]:
#### Evaluate hybrid model with testing data ####
def evaluate_hybrid_model_on_test(size_of_hidden, learning_rate_adam, learning_rate_rprop, num_epochs, train_loader, x_test_tensor, y_test_tensor):
    final_loss, trained_model = hybrid_training_model(size_of_hidden=size_of_hidden, learning_rate_adam=learning_rate_adam,
                                                      learning_rate_rprop=learning_rate_rprop, num_epochs=num_epochs, train_loader=test_loader)

    trained_model.eval()
    with torch.no_grad():
        outputs = trained_model(x_test_tensor)
        _, predicted = torch.max(outputs, 1)
        accuracy = accuracy_score(y_test_tensor.numpy(), predicted.numpy())

    print(f"Test set accuracy: {accuracy:.4f}")
    print(f"Final training loss: {final_loss:.4f}")

    return accuracy, final_loss


# Hyperparameters for hybrid model 
size_of_hidden = 16
learning_rate_adam = 0.01
learning_rate_rprop = 0.01
num_epochs = 50
accuracy, final_loss = evaluate_hybrid_model_on_test(size_of_hidden=size_of_hidden, learning_rate_adam=learning_rate_adam, 
                                                     learning_rate_rprop=learning_rate_rprop, num_epochs=num_epochs, train_loader=test_loader, 
                                                     x_test_tensor=x_test_tensor, y_test_tensor=y_test_tensor)

