In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

In [4]:
df = pd.read_csv('abalone.data.csv')

one_hot_encoded = pd.get_dummies(df['Sex'], prefix='Sex').astype(int)
scaler_y = MinMaxScaler();
y_unscaled = df['Rings']
df['Rings'] = scaler_y.fit_transform(df[['Rings']])
df_encoded = pd.concat([df, one_hot_encoded], axis=1)
df_encoded.drop('Sex', axis=1, inplace=True)

df_encoded.head()

Unnamed: 0,Length,Diameter,Height,Whole_weight,Shucked_weight,Viscera_weight,Shell_weight,Rings,Sex_F,Sex_I,Sex_M
0,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,0.5,0,0,1
1,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,0.214286,0,0,1
2,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,0.285714,1,0,0
3,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,0.321429,0,0,1
4,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,0.214286,0,1,0


In [10]:
y = df_encoded['Rings']
X = df_encoded.drop(columns='Rings')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
def convertToTensor(fr):
    tensors = []
    for col in fr.columns:
        # Convert each column to a PyTorch tensor
        tensor = torch.tensor(fr[col].values)
        tensors.append(tensor)

    # Concatenate the tensors along the second dimension
    tensor_2d = torch.stack(tensors, dim=1)
    return tensor_2d


In [12]:
y_train_tensor = torch.tensor(y_train.values).float()
y_test_tensor = torch.tensor(y_test.values).float()


In [14]:
X_train_tensor = convertToTensor(X_train).float()
X_test_tensor = convertToTensor(X_test).float()

In [15]:
class ANN(nn.Module):
    def __init__(self):
        super(ANN, self).__init__()
        self.input_layer = nn.Linear(10, 64)  # Input layer with 10 nodes
        self.hidden_layer1 = nn.Linear(64, 32)  # First hidden layer with 64 nodes
        self.hidden_layer2 = nn.Linear(32, 1)  # Second hidden layer with 32 nodes
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.input_layer(x))
        x = self.relu(self.hidden_layer1(x))
        x = self.hidden_layer2(x)
        return x



In [17]:
learning_rate = 0.01

# Create an instance of the ANN
model = ANN()

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)

# Training the model
num_epochs = 5000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 500 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [500/5000], Loss: 0.0134
Epoch [1000/5000], Loss: 0.0133
Epoch [1500/5000], Loss: 0.0132
Epoch [2000/5000], Loss: 0.0132
Epoch [2500/5000], Loss: 0.0132
Epoch [3000/5000], Loss: 0.0132
Epoch [3500/5000], Loss: 0.0132
Epoch [4000/5000], Loss: 0.0132
Epoch [4500/5000], Loss: 0.0131
Epoch [5000/5000], Loss: 0.0131


In [18]:
def evaluate(model):
  model.eval()
  with torch.no_grad():
      outputs = model(X_test_tensor)
      mse = nn.MSELoss()
      loss = mse(outputs, y_test_tensor)


  print(loss)

In [19]:
evaluate(model)

tensor(0.0138)


  return F.mse_loss(input, target, reduction=self.reduction)


In [29]:
## With AdaGrad
learning_rate = 0.1

# Create an instance of the ANN
model = ANN()

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

# Training the model
num_epochs = 200
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [10/200], Loss: 0.0156
Epoch [20/200], Loss: 0.0133
Epoch [30/200], Loss: 0.0132
Epoch [40/200], Loss: 0.0132
Epoch [50/200], Loss: 0.0132
Epoch [60/200], Loss: 0.0132
Epoch [70/200], Loss: 0.0132
Epoch [80/200], Loss: 0.0131
Epoch [90/200], Loss: 0.0131
Epoch [100/200], Loss: 0.0131
Epoch [110/200], Loss: 0.0131
Epoch [120/200], Loss: 0.0131
Epoch [130/200], Loss: 0.0131
Epoch [140/200], Loss: 0.0131
Epoch [150/200], Loss: 0.0131
Epoch [160/200], Loss: 0.0131
Epoch [170/200], Loss: 0.0131
Epoch [180/200], Loss: 0.0131
Epoch [190/200], Loss: 0.0131
Epoch [200/200], Loss: 0.0131


In [21]:
class CustomANN(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(CustomANN, self).__init__()
        self.input_layer = nn.Linear(input_size, hidden_sizes[0])
        self.hidden_layers = nn.ModuleList([nn.Linear(hidden_sizes[i], hidden_sizes[i+1]) for i in range(len(hidden_sizes)-1)])
        self.output_layer = nn.Linear(hidden_sizes[-1], output_size)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.sigmoid(self.input_layer(x))
        for layer in self.hidden_layers:
            x = self.sigmoid(layer(x))
        x = self.output_layer(x)
        return x

In [27]:
## With AdaGrad
learning_rate = 0.01

# Create an instance of the ANN
model_10 = CustomANN(10,[64,64,64,32,32,32,16,16,8,8],1)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adagrad(model.parameters(), lr=learning_rate)

# Training the model
num_epochs = 100
for epoch in range(num_epochs):
    # Forward pass
    outputs = model_10(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward Propagation and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch [10/100], Loss: 0.3737
Epoch [20/100], Loss: 0.3737
Epoch [30/100], Loss: 0.3737
Epoch [40/100], Loss: 0.3737
Epoch [50/100], Loss: 0.3737
Epoch [60/100], Loss: 0.3737
Epoch [70/100], Loss: 0.3737
Epoch [80/100], Loss: 0.3737
Epoch [90/100], Loss: 0.3737
Epoch [100/100], Loss: 0.3737


In [26]:
evaluate(model_10)

tensor(0.0372)


  return F.mse_loss(input, target, reduction=self.reduction)


###Summary


*   Using sigmoid the loss does not decrease at all, hence we can conclude vanshing gradient problem is occuring.
*   Hence, Relu is better for minimizng losses as compared to Sigmoid.



