In [147]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [148]:
df = pd.read_csv('abalone.data.csv')
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


One-hot encoding Gender

In [149]:
data = pd.get_dummies(df, columns=['Sex'], drop_first=True)

80-20 Split

In [150]:
# Separate features and target
X = data.drop('Rings', axis=1)
y = data['Rings']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

Convert data to PyTorch tensors

In [151]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).reshape(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).reshape(-1, 1)

In [152]:
class BPN(nn.Module):
    def __init__(self, input_size):
        super(BPN, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Initialize the model
input_size = X_train_tensor.shape[1]
model = BPN(input_size)
print(model)

BPN(
  (fc1): Linear(in_features=9, out_features=64, bias=True)
  (relu): ReLU()
  (fc2): Linear(in_features=64, out_features=32, bias=True)
  (fc3): Linear(in_features=32, out_features=1, bias=True)
)


In [156]:
# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.01)
mse = criterion
# Training loop
epochs = 100
for epoch in range(epochs):
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = mse(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}')
print("Train Loss:", loss.item())
# Evaluate on test set
with torch.no_grad():
    model.eval()
    predictions = model(X_test_tensor)
    test_loss1 = mse(predictions, y_test_tensor)
    test_loss1
    print(f'Test Loss: {test_loss1.item()}')

Epoch [1/100], Loss: 4.4808526039123535
Epoch [11/100], Loss: 4.4750657081604
Epoch [21/100], Loss: 4.472352027893066
Epoch [31/100], Loss: 4.470798015594482
Epoch [41/100], Loss: 4.469691753387451
Epoch [51/100], Loss: 4.468764781951904
Epoch [61/100], Loss: 4.467916488647461
Epoch [71/100], Loss: 4.467106342315674
Epoch [81/100], Loss: 4.466320037841797
Epoch [91/100], Loss: 4.465551853179932
Train Loss: 4.46487283706665
Test Loss: 4.85244607925415


Using Adagrad optimizer


In [159]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

input_size = X_train.shape[1]
model = NeuralNetwork(input_size)

criterion = nn.MSELoss()
optimizer = optim.Adagrad(model.parameters(), lr=0.01)
epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item()}')
print("Train Loss:", loss.item())
model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = criterion(test_outputs, y_test_tensor)
    print(f'Test Loss: {test_loss.item()}')

Epoch [1/100], Loss: 105.92761993408203
Epoch [11/100], Loss: 49.41080093383789
Epoch [21/100], Loss: 29.232635498046875
Epoch [31/100], Loss: 23.95831298828125
Epoch [41/100], Loss: 19.78508186340332
Epoch [51/100], Loss: 16.22826385498047
Epoch [61/100], Loss: 13.342865943908691
Epoch [71/100], Loss: 11.140080451965332
Epoch [81/100], Loss: 9.490828514099121
Epoch [91/100], Loss: 8.268624305725098
Train Loss: 7.492022514343262
Test Loss: 6.736841678619385


Define the neural network architecture with more hidden layers and Sigmoid activation

In [155]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.hidden = nn.Linear(input_size, hidden_size)
        self.sigmoid = nn.Sigmoid()
        self.output = nn.Linear(hidden_size, output_size)
    def forward(self, x):
        x = self.sigmoid(self.hidden(x))
        x = self.output(x)
        return x

input_size = X_train.shape[1]
hidden_size = 15
output_size = 1
model = NeuralNetwork(input_size, hidden_size, output_size)

criterion = nn.MSELoss()
optimizer = optim.Adagrad(model.parameters(), lr=0.5)
epochs = 100
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    loss.backward()
    optimizer.step()

model.eval()
with torch.no_grad():
    test_outputs = model(X_test_tensor)
    test_loss = criterion(test_outputs, y_test_tensor)
    print(f'Test Loss: {test_loss.item()}')

Test Loss: 4.88218355178833


As we increase the number of hidden layers from 1 to 100 we first see a rapid decrease in loss function to 5.7 at 10 hidden layers and then gradually decreasing to 5.02 at 100

Also, as we increase our learning rate(lr) from 0 to 1, the value of loss function decreases from 112 to 4.8

**We observe from above runs that, Back-Propagation Neural Network (BPN) with SGD Optimizer gives least error among all the models.**