In [1]:
import itertools
import sys
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from Networks.Network import Network
from Utilities.utils import *
from Utilities.experiment_utils import *
from sklearn.metrics import confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from ucimlrepo import fetch_ucirepo, list_available_datasets


In [2]:
wine = fetch_ucirepo(name='Wine')
print(wine.metadata.keys())

dict_keys(['uci_id', 'name', 'repository_url', 'data_url', 'abstract', 'area', 'tasks', 'characteristics', 'num_instances', 'num_features', 'feature_types', 'demographics', 'target_col', 'index_col', 'has_missing_values', 'missing_values_symbol', 'year_of_dataset_creation', 'last_updated', 'dataset_doi', 'creators', 'intro_paper', 'additional_info'])


In [3]:
pd.set_option('display.max_columns', None)

print(wine.variables)

                            name     role         type demographic  \
0                          class   Target  Categorical        None   
1                        Alcohol  Feature   Continuous        None   
2                      Malicacid  Feature   Continuous        None   
3                            Ash  Feature   Continuous        None   
4              Alcalinity_of_ash  Feature   Continuous        None   
5                      Magnesium  Feature      Integer        None   
6                  Total_phenols  Feature   Continuous        None   
7                     Flavanoids  Feature   Continuous        None   
8           Nonflavanoid_phenols  Feature   Continuous        None   
9                Proanthocyanins  Feature   Continuous        None   
10               Color_intensity  Feature   Continuous        None   
11                           Hue  Feature   Continuous        None   
12  0D280_0D315_of_diluted_wines  Feature   Continuous        None   
13                  

In [4]:
X, y = wine.data.features, wine.data.targets.squeeze().to_numpy()
print(f"Dataset Shape (rows,cols): {X.shape}")
print(f"Target Shape (rows,cols): {y.shape}")

Dataset Shape (rows,cols): (178, 13)
Target Shape (rows,cols): (178,)


# Data Preprocessing

In [5]:
# random seed for train-test splits for reproducibility
RANDOM_STATE = 1 

# Since all input features are numerical, we can use the StandardScaler to normalize the data
numerical_preprocessing = Pipeline([
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer([
    ('numerical', numerical_preprocessing, X.columns)
])

# Train, test, validation split
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=RANDOM_STATE)
X_test, X_val, y_test, y_val = train_test_split(X_temp, y_temp, test_size=0.2, random_state=RANDOM_STATE)

# Device configuration
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

print(f"Device: {device}\n")

# Preprocess the data and convert it to tensors
X_train = torch.tensor(preprocessor.fit_transform(X_train), dtype=torch.float32).to(device)
X_val = torch.tensor(preprocessor.transform(X_val), dtype=torch.float32).to(device)
X_test = torch.tensor(preprocessor.transform(X_test), dtype=torch.float32).to(device)

# One-hot encode the target
encoder = OneHotEncoder(sparse=False)
y_train_encoded = torch.tensor(encoder.fit_transform(y_train.reshape(-1, 1)), dtype=torch.long).to(device)
y_val_encoded = torch.tensor(encoder.transform(y_val.reshape(-1, 1)), dtype=torch.long).to(device)
y_test_encoded = torch.tensor(encoder.transform(y_test.reshape(-1, 1)), dtype=torch.long).to(device)

# Data shapes
print("Number of samples in each dataset:")
print(f"Train shape: {X_train.shape}")
print(f"Validation shape: {X_val.shape}")
print(f"Test shape: {X_test.shape}"); print()

# Target shapes
print("Shape of the target after one-hot encoding:")
print(f"Train shape: {y_train_encoded.shape}")
print(f"Validation shape: {y_val_encoded.shape}")
print(f"Test shape: {y_test_encoded.shape}"); print()

Device: cuda

Number of samples in each dataset:
Train shape: torch.Size([142, 13])
Validation shape: torch.Size([8, 13])
Test shape: torch.Size([28, 13])

Shape of the target after one-hot encoding:
Train shape: torch.Size([142, 3])
Validation shape: torch.Size([8, 3])
Test shape: torch.Size([28, 3])





# Global Parameters

In [6]:
### GLOBAL PARAMETERS ###
RUN_EXPERIMENTS = False # if True run the experiments, else load the results from the results file
EPOCHS = 400000 # maximum number of epochs to run, set to a large number to run until early stopping criterion is met

# Set individual experiment parameters
experiment_params = {
    'experiment_1': {
        'NUM_LAYERS': 1,
        'NUM_INPUTS': X_train.shape[1],
        'NUM_OUTPUTS': y_train_encoded.shape[1],
        'params': {
            'hidden_size': [[5], [10], [20]], # <---------------------------- change the hidden layer sizes here for experiment 1
            'learning_rate': [1.0, 0.1, 0.01] # <---------------------------- change the learning rates here for experiment 1
        }},
    'experiment_2': {
        'NUM_LAYERS': 2,
        'NUM_INPUTS': X_train.shape[1],
        'NUM_OUTPUTS': y_train_encoded.shape[1],
        'params': {
            'hidden_size': list(itertools.product([5, 10], repeat=2)), # <--- change the hidden layer sizes here for experiment 2
            'learning_rate': [1.0, 0.1, 0.01] # <---------------------------- change the learning rates here for experiment 2
        }},
    'experiment_3': {
        'NUM_LAYERS': 0,
        'NUM_INPUTS': X_train.shape[1],
        'NUM_OUTPUTS': y_train_encoded.shape[1],
        'params': {
            'hidden_size': [[]], # don't change this if you want no hidden layers
            'learning_rate': [1.0, 0.1, 0.01] # <---------------------------- change the learning rates here for experiment 3
        }},
    }

# Experiments

In [7]:
if RUN_EXPERIMENTS:
    # Initialize the experimental results dictionary
    experiments_dict = {
        'experiment_1': [],
        'experiment_2': [],
        'experiment_3': []
    }

## Experiment 1: Network with One Hidden Layer

In [8]:
# setting local parameters
NUM_LAYERS = experiment_params['experiment_1']['NUM_LAYERS']
NUM_INPUTS = experiment_params['experiment_1']['NUM_INPUTS']
NUM_OUTPUTS = experiment_params['experiment_1']['NUM_OUTPUTS']
params = experiment_params['experiment_1']['params']

In [9]:
if RUN_EXPERIMENTS:
    print("Running Experiments...")

    for hs in params['hidden_size']:

        for lr in params['learning_rate']:
            param_id = f"hs={hs}_lr={lr}"

            # Create the model
            model = Network(NUM_INPUTS, NUM_OUTPUTS, NUM_LAYERS, hs).to(device)

            print("---------------------------------------------")
            print(f"Hidden Layer Shape: {hs}; Learning Rate: {lr}\n")

            # Train the model
            model, train_loss, val_loss, train_cm, val_cm, epoch_stop, time = train_model(model, EPOCHS, lr, X_train, y_train_encoded, X_val, y_val_encoded, verbose=True)
            training_accuracy = train_cm.diag().sum().float() / train_cm.sum().float()  
            validation_accuracy = val_cm.diag().sum().float() / val_cm.sum().float()  
            
            print(f"Training stopped at epoch {epoch_stop} after {time:.2f} seconds\n")

            # Evaluate the model on the test set
            test_loss, test_cm = evaluate_model(model, X_test, y_test_encoded)
            test_accuracy = (test_cm[0,0] + test_cm[1,1] + test_cm[2,2]) / torch.sum(test_cm)

            results = {
                'train_loss': train_loss,
                'val_loss': val_loss,
                'train_cm': train_cm.cpu().numpy().tolist(),
                'val_cm': val_cm.cpu().numpy().tolist(),
                'training_accuracy': training_accuracy.cpu().numpy().tolist(),
                'validation_accuracy': validation_accuracy.cpu().numpy().tolist(),
                'training_time': time,
                'epoch_stop': epoch_stop,
                'test_loss': test_loss.cpu().numpy().tolist(),
                'test_cm': test_cm.cpu().numpy().tolist(),
                'test_accuracy': test_accuracy.cpu().numpy().tolist()
            }

            experiments_dict['experiment_1'].append({param_id: results})

            print("Post-training Results:")
            print(f"Train Loss: {train_loss[-1]:.4f}; Validation Loss: {val_loss[-1]:.4f}; Test Loss: {test_loss:.4f}")
            print(f"Training Accuracy: {training_accuracy:.4f}; Test Accuracy: {test_accuracy:.4f}"); print()
            print(f"Train Confusion Matrix:\n{train_cm}"); print()
            print(f"Test Confusion Matrix:\n{test_cm}"); print()

            print("---------------------------------------------")

    print("Experiments Completed")

## Experiment 2: Network with Two Hidden Layers

In [10]:
# setting local parameters
NUM_LAYERS = experiment_params['experiment_2']['NUM_LAYERS']
NUM_INPUTS = experiment_params['experiment_2']['NUM_INPUTS']
NUM_OUTPUTS = experiment_params['experiment_2']['NUM_OUTPUTS']
params = experiment_params['experiment_2']['params']

In [11]:
if RUN_EXPERIMENTS:
    print("Running Experiments...")

    for hs in params['hidden_size']:

        for lr in params['learning_rate']:
            param_id = f"hs={hs}_lr={lr}"

            # Create the model
            model = Network(NUM_INPUTS, NUM_OUTPUTS, NUM_LAYERS, hs).to(device)

            print("---------------------------------------------")
            print(f"Hidden Layer Shape: {hs}; Learning Rate: {lr}\n")

            # Train the model
            model, train_loss, val_loss, train_cm, val_cm, epoch_stop, time = train_model(model, EPOCHS, lr, X_train, y_train_encoded, X_val, y_val_encoded, verbose=True)
            training_accuracy = train_cm.diag().sum().float() / train_cm.sum().float()  
            validation_accuracy = val_cm.diag().sum().float() / val_cm.sum().float()
            
            print(f"Training stopped at epoch {epoch_stop} after {time:.2f} seconds\n")

            # Evaluate the model on the test set
            test_loss, test_cm = evaluate_model(model, X_test, y_test_encoded)
            test_accuracy = (test_cm[0,0] + test_cm[1,1] + test_cm[2,2]) / torch.sum(test_cm)

            results = {
                'train_loss': train_loss,
                'val_loss': val_loss,
                'train_cm': train_cm.cpu().numpy().tolist(),
                'val_cm': val_cm.cpu().numpy().tolist(),
                'training_accuracy': training_accuracy.cpu().numpy().tolist(),
                'validation_accuracy': validation_accuracy.cpu().numpy().tolist(),
                'training_time': time,
                'epoch_stop': epoch_stop,
                'test_loss': test_loss.cpu().numpy().tolist(),
                'test_cm': test_cm.cpu().numpy().tolist(),
                'test_accuracy': test_accuracy.cpu().numpy().tolist()
            }

            experiments_dict['experiment_2'].append({param_id: results})

            print("Post-training Results:")
            print(f"Train Loss: {train_loss[-1]:.4f}; Validation Loss: {val_loss[-1]:.4f}; Test Loss: {test_loss:.4f}")
            print(f"Training Accuracy: {training_accuracy:.4f}; Test Accuracy: {test_accuracy:.4f}"); print()
            print(f"Train Confusion Matrix:\n{train_cm}"); print()
            print(f"Test Confusion Matrix:\n{test_cm}"); print()

            print("---------------------------------------------")

    print("Experiments Completed")

## Experiment 3: Model with 0 Hidden Layers (Linear Model)

In [12]:
# setting local parameters
NUM_LAYERS = experiment_params['experiment_3']['NUM_LAYERS']
NUM_INPUTS = experiment_params['experiment_3']['NUM_INPUTS']
NUM_OUTPUTS = experiment_params['experiment_3']['NUM_OUTPUTS']
params = experiment_params['experiment_3']['params']

In [13]:
if RUN_EXPERIMENTS:
    print("Running Experiments...")

    for hs in params['hidden_size']:

        for lr in params['learning_rate']:
            param_id = f"hs={hs}_lr={lr}"

            # Create the model
            model = Network(NUM_INPUTS, NUM_OUTPUTS, NUM_LAYERS, hs).to(device)

            print("---------------------------------------------")
            print(f"Hidden Layer Shape: {hs}; Learning Rate: {lr}\n")

            # Train the model
            model, train_loss, val_loss, train_cm, val_cm, epoch_stop, time = train_model(model, EPOCHS, lr, X_train, y_train_encoded, X_val, y_val_encoded, verbose=True)
            training_accuracy = train_cm.diag().sum().float() / train_cm.sum().float()  
            validation_accuracy = val_cm.diag().sum().float() / val_cm.sum().float()
            
            print(f"Training stopped at epoch {epoch_stop} after {time:.2f} seconds\n")

            # Evaluate the model on the test set
            test_loss, test_cm = evaluate_model(model, X_test, y_test_encoded)
            test_accuracy = (test_cm[0,0] + test_cm[1,1] + test_cm[2,2]) / torch.sum(test_cm)

            results = {
                'train_loss': train_loss,
                'val_loss': val_loss,
                'train_cm': train_cm.cpu().numpy().tolist(),
                'val_cm': val_cm.cpu().numpy().tolist(),
                'training_accuracy': training_accuracy.cpu().numpy().tolist(),
                'validation_accuracy': validation_accuracy.cpu().numpy().tolist(),
                'training_time': time,
                'epoch_stop': epoch_stop,
                'test_loss': test_loss.cpu().numpy().tolist(),
                'test_cm': test_cm.cpu().numpy().tolist(),
                'test_accuracy': test_accuracy.cpu().numpy().tolist()
            }

            experiments_dict['experiment_3'].append({param_id: results})

            print("Post-training Results:")
            print(f"Train Loss: {train_loss[-1]:.4f}; Validation Loss: {val_loss[-1]:.4f}; Test Loss: {test_loss:.4f}")
            print(f"Training Accuracy: {training_accuracy:.4f}; Test Accuracy: {test_accuracy:.4f}"); print()
            print(f"Train Confusion Matrix:\n{train_cm}"); print()
            print(f"Test Confusion Matrix:\n{test_cm}"); print()

            print("---------------------------------------------")

    print("Experiments Completed")

In [14]:
if RUN_EXPERIMENTS:
    filename = 'Results/experiments.json'

    with open(filename, 'w') as f:
        json.dump(experiments_dict, f)

## Best Model

In [17]:
if not RUN_EXPERIMENTS:

    try:
        with open('Results/experiments.json', 'r') as f:
            experiments_dict = json.load(f)
    except:
        print("No experiments file found. Please ensure experiments.json exists.")
        sys.exit()
        
    best_experiment, best_param_id, best_accuracy, best_loss, multi_best_acc = find_best_model(experiments_dict)

    if multi_best_acc:
        print("There were multiple models with perfect accuracy, showing the one with the lowest test loss of that set")
        print(f"Model with param_id: {best_param_id} with a test accuracy of {best_accuracy:.4f} had the lowest test loss of {best_loss:.4f}\n")
    else:
        print(f"Best model had param_id: {best_param_id} with a test accuracy of {best_accuracy:.4f} and test loss of {best_loss:.4f}\n")

    # Show the test confusion matrix of the best model
    found = False  # Flag to indicate if the best model has been found
    for experiment in experiments_dict[best_experiment]:
        if found:
            break  # Exit outer loop if best model was found
        for current_param_id, result in experiment.items():
            if current_param_id == best_param_id:
                print("Best Model Test Confusion Matrix:")
                print(np.array(result['test_cm']))
                found = True  # Set flag to true and break inner loop
                break



There were multiple models with perfect accuracy, showing the one with the lowest test loss of that set
Model with param_id: hs=(10, 5)_lr=1.0 with a test accuracy of 1.0000 had the lowest test loss of 0.0006
Best Model Test Confusion Matrix:
[[11  0  0]
 [ 0  8  0]
 [ 0  0  9]]
