In [1]:
!pip install tabulate



**Importing Libraries**

In [2]:
import os
os.environ['TF_USE_LEGACY_KERAS'] = '1'
import io
import torch
import numpy as np
import pandas as pd
import seaborn as sns
import torch.nn as nn
import tensorflow as tf
import torch.optim as optim
from tabulate import tabulate
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler

**Loading Dataset**

In [3]:
from google.colab import files
uploaded = files.upload()

Saving abalone.data.csv to abalone.data.csv


**Converting Dataset to a Dataframe**

In [4]:
df = pd.read_csv(io.BytesIO(uploaded['abalone.data.csv']))
print(df.columns)

Index(['Sex', 'Length', 'Diameter', 'Height', 'Whole_weight', 'Shucked_weight',
       'Viscera_weight', 'Shell_weight', 'Rings'],
      dtype='object')


**Encoding the 'Sex' Variable and standardizing the continuous features**

In [28]:
features = df.drop('Rings', axis=1)
target = df['Rings']

encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')

sex_encoded = encoder.fit_transform(features[['Sex']])
sex_df = pd.DataFrame(sex_encoded, columns=encoder.get_feature_names_out(['Sex']))

continuous_features = features.iloc[:, 1:]

scaler = StandardScaler()
scaler.fit(continuous_features)

features_scaled = scaler.transform(continuous_features)

features = pd.concat([sex_df, pd.DataFrame(features_scaled, columns=continuous_features.columns)], axis=1)
print(features.head())

   Sex_F  Sex_I  Sex_M    Length  Diameter    Height  Whole_weight  \
0    0.0    0.0    1.0 -0.574558 -0.432149 -1.064424     -0.641898   
1    0.0    0.0    1.0 -1.448986 -1.439929 -1.183978     -1.230277   
2    1.0    0.0    0.0  0.050033  0.122130 -0.107991     -0.309469   
3    0.0    0.0    1.0 -0.699476 -0.432149 -0.347099     -0.637819   
4    0.0    1.0    0.0 -1.615544 -1.540707 -1.423087     -1.272086   

   Shucked_weight  Viscera_weight  Shell_weight  
0       -0.607685       -0.726212     -0.638217  
1       -1.170910       -1.205221     -1.212987  
2       -0.463500       -0.356690     -0.207139  
3       -0.648238       -0.607600     -0.602294  
4       -1.215968       -1.287337     -1.320757  




**Converting the Pandas Dataframes into PyTorch Tensors**

In [29]:
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Pandas Dataframe to a Numpy Array
X_train_array = X_train.values
X_test_array = X_test.values
y_train_array = y_train.values
y_test_array = y_test.values

# NumPy arrays to PyTorch tensors
X_train_tensor = torch.tensor(X_train_array, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_array, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_array, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_array, dtype=torch.float32)

print("Training set tensors - X:", X_train_tensor.shape, "y:", y_train_tensor.shape)
print("Testing set tensors - X:", X_test_tensor.shape, "y:", y_test_tensor.shape)

Training set tensors - X: torch.Size([3341, 10]) y: torch.Size([3341])
Testing set tensors - X: torch.Size([836, 10]) y: torch.Size([836])


**Defining the Class for Neural Network Regression (Abalone Age Prediction)**

In [30]:
class AbaloneAgePrediction(nn.Module):
    def __init__(self):
        super(AbaloneAgePrediction, self).__init__()

        self.fc1 = nn.Linear(10, 64)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

**Training and Testing the model using SGD Optimizer**

In [32]:
import matplotlib.pyplot as plt

def train_model(model, X_train, y_train, num_epochs=100, learning_rate=0.01):
    criterion = nn.MSELoss()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)

    train_losses = []

    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()

        outputs = model(X_train)
        loss = criterion(outputs, y_train.view(-1, 1))

        if torch.isnan(loss):
            print(f'NaN loss encountered at epoch {epoch+1}')
            break

        loss.backward()
        optimizer.step()

        train_losses.append(loss.item())

        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

    return train_losses

def evaluate(model, X_test, y_test):
    model.eval()
    criterion = nn.MSELoss()

    with torch.no_grad():
        outputs = model(X_test)
        test_loss = criterion(outputs, y_test.view(-1, 1))

    return test_loss.item()

model = AbaloneAgePrediction()

train_losses = train_model(model, X_train_tensor, y_train_tensor, num_epochs=100, learning_rate=0.01)

test_loss = evaluate(model, X_test_tensor, y_test_tensor)
print(f'Mean Squared Error (MSE) on Test Set: {test_loss}')


Epoch [10/100], Loss: 12.591731071472168
Epoch [20/100], Loss: 6.324141979217529
Epoch [30/100], Loss: 5.457821846008301
Epoch [40/100], Loss: 5.179375171661377
Epoch [50/100], Loss: 5.018242835998535
Epoch [60/100], Loss: 4.908449172973633
Epoch [70/100], Loss: 4.82871150970459
Epoch [80/100], Loss: 4.76861572265625
Epoch [90/100], Loss: 4.721469402313232
Epoch [100/100], Loss: 4.6848907470703125
Mean Squared Error (MSE) on Test Set: 4.847127437591553


**Hyperparameter Tuning (learning rate, batch size, number of hidden nodes, number of layers)**

In [27]:
class AbaloneAgePrediction(nn.Module):
    def __init__(self, input_size, hidden_sizes, num_layers):
        super(AbaloneAgePrediction, self).__init__()
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes
        self.num_layers = num_layers

        layers = []
        layers.append(nn.Linear(input_size, hidden_sizes[0]))
        layers.append(nn.ReLU())

        for i in range(1, num_layers):
            layers.append(nn.Linear(hidden_sizes[i-1], hidden_sizes[i]))
            layers.append(nn.ReLU())

        layers.append(nn.Linear(hidden_sizes[-1], 1))  # Output layer

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = x.view(-1, self.input_size)
        return self.model(x)

def train_model(model, criterion, optimizer, X_train, y_train, X_val, y_val, num_epochs=100, batch_size=32):
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()

        for idx in range(0, len(X_train), batch_size):
            batch_X = X_train[idx:idx+batch_size]
            batch_y = y_train[idx:idx+batch_size]

            outputs = model(batch_X)
            loss = criterion(outputs, batch_y.view(-1, 1))

            loss.backward()
            optimizer.step()

        train_loss = evaluate(model, criterion, X_train, y_train)
        train_losses.append(train_loss)

        val_loss = evaluate(model, criterion, X_val, y_val)
        val_losses.append(val_loss)

        if (epoch+1) == 100:
            print(f'Train Loss: {train_loss}, Val Loss: {val_loss}')
            print()

    return train_losses, val_losses

def evaluate(model, criterion, X, y):
    model.eval()
    with torch.no_grad():
        outputs = model(X)
        loss = criterion(outputs, y.view(-1, 1))
    return loss.item()

def hyperparameter_tuning(learning_rates, batch_sizes, hidden_sizes, num_layers, X_train, y_train, X_val, y_val, num_epochs=100):
    results = []

    for lr in learning_rates:
        for batch_size in batch_sizes:
            for hidden_size in hidden_sizes:
                for n_layers in num_layers:
                    print(f'Training model with lr={lr}, batch_size={batch_size}, hidden_size={hidden_size}, num_layers={n_layers}')

                    # Create model with specified hyperparameters
                    model = AbaloneAgePrediction(input_size=10, hidden_sizes=[hidden_size] * n_layers, num_layers=n_layers)
                    criterion = nn.MSELoss()
                    optimizer = optim.SGD(model.parameters(), lr=lr)
                    train_losses, val_losses = train_model(model, criterion, optimizer, X_train, y_train, X_val, y_val, num_epochs=num_epochs, batch_size=batch_size)

                    final_val_loss = val_losses[-1]

                    results.append({
                        'Learning Rate': lr,
                        'Batch Size': batch_size,
                        'Hidden Size': hidden_size,
                        'Num Layers': n_layers,
                        'Final Val Loss': final_val_loss
                    })

    df_results = pd.DataFrame(results)

    df_results_sorted = df_results.sort_index()

    return df_results_sorted

# Define hyperparameters to tune
learning_rates = [0.0001, 0.001, 0.01]
batch_sizes = [32, 64]
hidden_sizes = [16, 32, 64]
num_layers = [2, 5, 10]

X_train, X_val, y_train, y_val = train_test_split(X_train_tensor, y_train_tensor, test_size=0.2, random_state=42)

df_results = hyperparameter_tuning(learning_rates, batch_sizes, hidden_sizes, num_layers, X_train, y_train, X_val, y_val, num_epochs=100)

table_string = df_results.to_string(index=False)
print(tabulate(df_results, headers='keys', tablefmt='github'))


Training model with lr=0.0001, batch_size=32, hidden_size=16, num_layers=2
Train Loss: 4.176601886749268, Val Loss: 4.446679592132568

Training model with lr=0.0001, batch_size=32, hidden_size=16, num_layers=5
Train Loss: 4.436960220336914, Val Loss: 4.468303680419922

Training model with lr=0.0001, batch_size=32, hidden_size=16, num_layers=10
Train Loss: 4.482954978942871, Val Loss: 4.5943427085876465

Training model with lr=0.0001, batch_size=32, hidden_size=32, num_layers=2
Train Loss: 4.408363342285156, Val Loss: 4.796760082244873

Training model with lr=0.0001, batch_size=32, hidden_size=32, num_layers=5
Train Loss: 4.20327615737915, Val Loss: 4.619257926940918

Training model with lr=0.0001, batch_size=32, hidden_size=32, num_layers=10
Train Loss: 10.407540321350098, Val Loss: 9.805367469787598

Training model with lr=0.0001, batch_size=32, hidden_size=64, num_layers=2
Train Loss: 4.050039291381836, Val Loss: 4.6597065925598145

Training model with lr=0.0001, batch_size=32, hidde

**Comparing SGD with Adagrad Optimizer by adding more layers (5)**

In [33]:
class AbaloneAgePrediction(nn.Module):
    def __init__(self, input_size, hidden_sizes):
        super(AbaloneAgePrediction, self).__init__()
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes

        layers = []
        prev_size = input_size
        for size in hidden_sizes:
            layers.append(nn.Linear(prev_size, size))
            layers.append(nn.ReLU())
            prev_size = size

        layers.append(nn.Linear(prev_size, 1))  # Output layer

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = x.view(-1, self.input_size)
        return self.model(x)

def train_model(model, criterion, optimizer, X_train, y_train, X_val, y_val, num_epochs=100, batch_size=32):
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()

        for idx in range(0, len(X_train), batch_size):
            batch_X = X_train[idx:idx+batch_size]
            batch_y = y_train[idx:idx+batch_size]

            outputs = model(batch_X)
            loss = criterion(outputs, batch_y.view(-1, 1))

            loss.backward()
            optimizer.step()

        train_loss = evaluate(model, criterion, X_train, y_train)
        train_losses.append(train_loss)

        val_loss = evaluate(model, criterion, X_val, y_val)
        val_losses.append(val_loss)

        if (epoch+1) == 100:
            print(f'Train Loss: {train_loss}, Val Loss: {val_loss}')
            print()

    return train_losses, val_losses

def evaluate(model, criterion, X, y):
    model.eval()
    with torch.no_grad():
        outputs = model(X)
        loss = criterion(outputs, y.view(-1, 1))
    return loss.item()

def compare_optimizers(hidden_sizes, X_train, y_train, X_val, y_val, num_epochs=100, batch_size=32):
    learning_rate = 0.01
    optimizers = {
        'SGD': optim.SGD,
        'Adagrad': optim.Adagrad
    }

    results = []

    for optimizer_name, optimizer_class in optimizers.items():
        print(f'Using optimizer: {optimizer_name}')

        print(f'Training model with optimizer: {optimizer_name}, hidden_sizes: {hidden_sizes}')

        model = AbaloneAgePrediction(input_size=10, hidden_sizes=hidden_sizes)
        criterion = nn.MSELoss()
        optimizer = optimizer_class(model.parameters(), lr=learning_rate)

        train_losses, val_losses = train_model(model, criterion, optimizer, X_train, y_train, X_val, y_val, num_epochs=num_epochs, batch_size=batch_size)

        final_val_loss = val_losses[-1]

        results.append({
            'Optimizer': optimizer_name,
            'Hidden Sizes': hidden_sizes,
            'Final Val Loss': final_val_loss
        })

    df_results = pd.DataFrame(results)

X_train, X_val, y_train, y_val = train_test_split(X_train_tensor, y_train_tensor, test_size=0.2, random_state=42)

# The hidden layer sizes
hidden_sizes = [64, 32, 16, 8, 4]
compare_optimizers(hidden_sizes, X_train, y_train, X_val, y_val, num_epochs=100, batch_size=32)


Using optimizer: SGD
Training model with optimizer: SGD, hidden_sizes: [64, 32, 16, 8, 4]
Train Loss: 27.890647888183594, Val Loss: 29.01724624633789

Using optimizer: Adagrad
Training model with optimizer: Adagrad, hidden_sizes: [64, 32, 16, 8, 4]
Train Loss: 4.049515724182129, Val Loss: 4.605296611785889



**Training and Testing using Sigmoid as activation function, 10 hidden layers with 64 nodes each**

In [34]:
class DeepAbaloneAgePrediction(nn.Module):
    def __init__(self, input_size, hidden_sizes):
        super(DeepAbaloneAgePrediction, self).__init__()
        self.input_size = input_size
        self.hidden_sizes = hidden_sizes

        layers = []
        prev_size = input_size
        for size in hidden_sizes:
            layers.append(nn.Linear(prev_size, size))
            layers.append(nn.Sigmoid())
            prev_size = size

        layers.append(nn.Linear(prev_size, 1))

        self.model = nn.Sequential(*layers)

    def forward(self, x):
        x = x.view(-1, self.input_size)
        return self.model(x)

def train_model(model, criterion, optimizer, X_train, y_train, X_val, y_val, num_epochs=100, batch_size=32):
    train_losses = []
    val_losses = []

    for epoch in range(num_epochs):
        model.train()
        optimizer.zero_grad()

        for idx in range(0, len(X_train), batch_size):
            batch_X = X_train[idx:idx+batch_size]
            batch_y = y_train[idx:idx+batch_size]

            outputs = model(batch_X)
            loss = criterion(outputs, batch_y.view(-1, 1))

            loss.backward()
            optimizer.step()

        train_loss = evaluate(model, criterion, X_train, y_train)
        train_losses.append(train_loss)

        val_loss = evaluate(model, criterion, X_val, y_val)
        val_losses.append(val_loss)

        if (epoch+1) % 10 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss}, Val Loss: {val_loss}')

    return train_losses, val_losses

def evaluate(model, criterion, X, y):
    model.eval()
    with torch.no_grad():
        outputs = model(X)
        loss = criterion(outputs, y.view(-1, 1))
    return loss.item()

X_train, X_val, y_train, y_val = train_test_split(X_train_tensor, y_train_tensor, test_size=0.2, random_state=42)

hidden_sizes = [64] * 10

model = DeepAbaloneAgePrediction(input_size=10, hidden_sizes=hidden_sizes)
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)

train_losses, val_losses = train_model(model, criterion, optimizer, X_train, y_train, X_val, y_val, num_epochs=100, batch_size=32)

final_val_loss = val_losses[-1]
print(f'Final Validation Loss: {final_val_loss}')


Epoch [10/100], Train Loss: 10.494305610656738, Val Loss: 10.008973121643066
Epoch [20/100], Train Loss: 10.49427318572998, Val Loss: 10.008919715881348
Epoch [30/100], Train Loss: 10.49427318572998, Val Loss: 10.008919715881348
Epoch [40/100], Train Loss: 10.49427318572998, Val Loss: 10.008919715881348
Epoch [50/100], Train Loss: 10.49427318572998, Val Loss: 10.008919715881348
Epoch [60/100], Train Loss: 10.49427318572998, Val Loss: 10.008919715881348
Epoch [70/100], Train Loss: 10.49427318572998, Val Loss: 10.008919715881348
Epoch [80/100], Train Loss: 10.49427318572998, Val Loss: 10.008919715881348
Epoch [90/100], Train Loss: 10.49427318572998, Val Loss: 10.008919715881348
Epoch [100/100], Train Loss: 10.49427318572998, Val Loss: 10.008919715881348
Final Validation Loss: 10.008919715881348
