**Importing the libraries**

In [22]:
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.autograd as autograd
from torch.autograd import Variable
from collections import deque, namedtuple

**Loading the dataset**

In [23]:
dataset=pd.read_csv('abalone.data.csv')
X=dataset.iloc[:,:-1].values
y=dataset.iloc[:,-1].values

In [24]:
X

array([['M', 0.455, 0.365, ..., 0.2245, 0.101, 0.15],
       ['M', 0.35, 0.265, ..., 0.0995, 0.0485, 0.07],
       ['F', 0.53, 0.42, ..., 0.2565, 0.1415, 0.21],
       ...,
       ['M', 0.6, 0.475, ..., 0.5255, 0.2875, 0.308],
       ['F', 0.625, 0.485, ..., 0.531, 0.261, 0.296],
       ['M', 0.71, 0.555, ..., 0.9455, 0.3765, 0.495]], dtype=object)

**Encoding gender**

In [25]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [0])], remainder='passthrough')
X = np.array(ct.fit_transform(X))

In [26]:
print(X)

[[0.0 0.0 1.0 ... 0.2245 0.101 0.15]
 [0.0 0.0 1.0 ... 0.0995 0.0485 0.07]
 [1.0 0.0 0.0 ... 0.2565 0.1415 0.21]
 ...
 [0.0 0.0 1.0 ... 0.5255 0.2875 0.308]
 [1.0 0.0 0.0 ... 0.531 0.261 0.296]
 [0.0 0.0 1.0 ... 0.9455 0.3765 0.495]]


**Data Splitting**

In [27]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1)

**Converting data into PytorchTensor**

In [28]:
X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.float32)
X_test = X_test.astype(np.float32)
y_test = y_test.astype(np.float32)

X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train)
X_test = torch.tensor(X_test)
y_test = torch.tensor(y_test)

**Creating Arch for Neural Network**

In [29]:
class Network(nn.Module):

  def __init__(self, state_size, action_size, seed = 42):
    super(Network, self).__init__()
    self.seed = torch.manual_seed(seed)
    self.fc1 = nn.Linear(state_size, 64)
    self.fc2 = nn.Linear(64, 32)
    self.fc3 = nn.Linear(32, action_size)

  def forward(self, state):
    x = self.fc1(state)
    x = F.relu(x)
    x = self.fc2(x)
    x = F.relu(x)
    return self.fc3(x)

**Declaring Model and related Things**

In [30]:
# Define the model
model = Network(state_size=10, action_size=1)

# Define the criterion (loss function)
criterion = nn.MSELoss()

# Define the optimizer (Stochastic Gradient Descent)
optimizer = optim.SGD(model.parameters(), lr=0.01)

**Training and testing**

In [31]:
def train_model(model, criterion, optimizer, train_loader, epochs):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(train_loader)}")

In [32]:
def evaluate(model, criterion, test_loader):
    model.eval()
    test_loss = 0.0
    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            test_loss += loss.item()
    return test_loss / len(test_loader)

In [33]:
from torch.utils.data import DataLoader, TensorDataset

# Create DataLoader for training data
train_dataset = TensorDataset(X_train, y_train)
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Create DataLoader for testing data
test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Train the model
epochs = 100
train_model(model, criterion, optimizer, train_loader, epochs)

# Evaluate the model
test_loss = evaluate(model, criterion, test_loader)
print(f"Test Loss: {test_loss}")

Epoch 1/100, Loss: 21.115905284881592


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 2/100, Loss: 10.932796631219253
Epoch 3/100, Loss: 11.554119559953797
Epoch 4/100, Loss: 11.08854323513103
Epoch 5/100, Loss: 10.946391060667217
Epoch 6/100, Loss: 10.735776253466335
Epoch 7/100, Loss: 11.008050045877132
Epoch 8/100, Loss: 11.011870033336136
Epoch 9/100, Loss: 10.822822111957478
Epoch 10/100, Loss: 10.643092434361296
Epoch 11/100, Loss: 10.91912762623913
Epoch 12/100, Loss: 10.755449582945626
Epoch 13/100, Loss: 10.850874208054453
Epoch 14/100, Loss: 10.708555194566834
Epoch 15/100, Loss: 10.517760515213013
Epoch 16/100, Loss: 10.757109435099476
Epoch 17/100, Loss: 10.734208772767264
Epoch 18/100, Loss: 10.623125193254003
Epoch 19/100, Loss: 10.824467029211656
Epoch 20/100, Loss: 10.764252365760083
Epoch 21/100, Loss: 11.289929201018136
Epoch 22/100, Loss: 10.923855286724162
Epoch 23/100, Loss: 11.14354054432995
Epoch 24/100, Loss: 10.733012136423364
Epoch 25/100, Loss: 10.831173815817204
Epoch 26/100, Loss: 10.6753188349166
Epoch 27/100, Loss: 10.638568842186118

  return F.mse_loss(input, target, reduction=self.reduction)


**Hypertuning**

In [34]:
class Network(nn.Module):

    def __init__(self, state_size, action_size, hidden_nodes, hidden_layers, seed=42):
        super(Network, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.hidden_layers = nn.ModuleList([nn.Linear(state_size, hidden_nodes)])
        self.hidden_layers.extend([nn.Linear(hidden_nodes, hidden_nodes) for _ in range(hidden_layers - 1)])
        self.output = nn.Linear(hidden_nodes, action_size)

    def forward(self, state):
        x = state
        for layer in self.hidden_layers:
            x = F.relu(layer(x))
        return self.output(x)


In [35]:
# Define hyperparameter values to tune
learning_rates = [0.01, 0.05, 0.1]
batch_sizes = [32,64]
hidden_nodes = [32, 128]
hidden_layers = [1, 2]

# Initialize a DataFrame to store observations
results = pd.DataFrame(columns=['Learning Rate', 'Batch Size', 'Hidden Nodes', 'Hidden Layers', 'MSE'])

# Iterate over hyperparameter combinations
for lr in learning_rates:
    for batch_size in batch_sizes:
        for nodes in hidden_nodes:
            for layers in hidden_layers:
                # Define and train the model with current hyperparameters
                model = Network(state_size=10, action_size=1, hidden_nodes=nodes, hidden_layers=layers)
                optimizer = optim.SGD(model.parameters(), lr=lr)
                train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
                train_model(model, criterion, optimizer, train_loader, epochs)

                # Evaluate the model on the validation set
                test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
                mse = evaluate(model, criterion, test_loader)

                # Record the observations
                results = pd.concat([results, pd.DataFrame({'Learning Rate': lr,
                                            'Batch Size': batch_size,
                                            'Hidden Nodes': nodes,
                                            'Hidden Layers': layers,
                                            'MSE': mse}, index=[0])], ignore_index=True)



  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/100, Loss: 15.65377368927002
Epoch 2/100, Loss: 10.720841880071731
Epoch 3/100, Loss: 10.637817155747186
Epoch 4/100, Loss: 10.782126476651147
Epoch 5/100, Loss: 10.613530102230254
Epoch 6/100, Loss: 10.71669027237665
Epoch 7/100, Loss: 10.642958209628151
Epoch 8/100, Loss: 10.785339614323208
Epoch 9/100, Loss: 10.657235774539766
Epoch 10/100, Loss: 10.704569898332869
Epoch 11/100, Loss: 10.670412817455473
Epoch 12/100, Loss: 10.617876588730585
Epoch 13/100, Loss: 10.62967058363415
Epoch 14/100, Loss: 10.644403566632953
Epoch 15/100, Loss: 10.681176480792818
Epoch 16/100, Loss: 10.654634244101388
Epoch 17/100, Loss: 10.702790850684757
Epoch 18/100, Loss: 10.653210603623164
Epoch 19/100, Loss: 10.643854867844354
Epoch 20/100, Loss: 10.727803975059873
Epoch 21/100, Loss: 10.678318446023123
Epoch 22/100, Loss: 10.709434491112118
Epoch 23/100, Loss: 10.69072841462635
Epoch 24/100, Loss: 10.702781248092652
Epoch 25/100, Loss: 10.680910446530296
Epoch 26/100, Loss: 10.636830942971367

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/100, Loss: 16.96027529126122
Epoch 2/100, Loss: 11.13758471806844
Epoch 3/100, Loss: 11.029406238737561
Epoch 4/100, Loss: 11.058195052828108
Epoch 5/100, Loss: 10.981804775056386
Epoch 6/100, Loss: 10.953333414168585
Epoch 7/100, Loss: 10.986280822753907
Epoch 8/100, Loss: 10.732587124052502
Epoch 9/100, Loss: 10.858589665095012
Epoch 10/100, Loss: 10.67996821176438
Epoch 11/100, Loss: 10.818375853129796
Epoch 12/100, Loss: 10.79281568073091
Epoch 13/100, Loss: 10.75046689623878
Epoch 14/100, Loss: 10.834370626722063
Epoch 15/100, Loss: 10.81799312773205
Epoch 16/100, Loss: 10.700234785534086
Epoch 17/100, Loss: 10.725620494570052
Epoch 18/100, Loss: 10.655122620718819
Epoch 19/100, Loss: 10.772741726466588
Epoch 20/100, Loss: 10.64645535378229
Epoch 21/100, Loss: 10.694864625022525
Epoch 22/100, Loss: 10.62953884942191
Epoch 23/100, Loss: 10.59887856074742
Epoch 24/100, Loss: 10.675733137130738
Epoch 25/100, Loss: 10.643858205704461
Epoch 26/100, Loss: 10.62018181482951
Epoch

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/100, Loss: 20.32336672297064
Epoch 2/100, Loss: 10.892962707663482
Epoch 3/100, Loss: 10.610075275852996
Epoch 4/100, Loss: 10.93366021030354
Epoch 5/100, Loss: 10.525208554177913
Epoch 6/100, Loss: 10.683980617883071
Epoch 7/100, Loss: 10.60504548954514
Epoch 8/100, Loss: 10.744359484258688
Epoch 9/100, Loss: 10.510836583263469
Epoch 10/100, Loss: 10.739296751202277
Epoch 11/100, Loss: 10.709341598006914
Epoch 12/100, Loss: 10.581515816022765
Epoch 13/100, Loss: 10.590799457622024
Epoch 14/100, Loss: 10.558506623753962
Epoch 15/100, Loss: 10.649582889844787
Epoch 16/100, Loss: 10.645589729525009
Epoch 17/100, Loss: 10.610521325525248
Epoch 18/100, Loss: 10.58938745282731
Epoch 19/100, Loss: 10.594097983162358
Epoch 20/100, Loss: 10.742177738333648
Epoch 21/100, Loss: 10.58660374947314
Epoch 22/100, Loss: 10.650580037314937
Epoch 23/100, Loss: 10.697195062097514
Epoch 24/100, Loss: 10.750469266243702
Epoch 25/100, Loss: 10.754419758634747
Epoch 26/100, Loss: 10.621043520153693


In [36]:
# Print tabulated results
print(results)

    Learning Rate Batch Size Hidden Nodes Hidden Layers       MSE
0            0.01         32           32             1  9.540209
1            0.01         32           32             2  9.594331
2            0.01         32          128             1  9.767952
3            0.01         32          128             2  9.594599
4            0.01         64           32             1  9.314798
5            0.01         64           32             2  9.454608
6            0.01         64          128             1  9.572047
7            0.01         64          128             2  9.633788
8            0.05         32           32             1  9.542487
9            0.05         32           32             2  9.553374
10           0.05         32          128             1  9.547860
11           0.05         32          128             2  9.543294
12           0.05         64           32             1  9.289015
13           0.05         64           32             2  9.312218
14        

**Compairing Adagrad and SGD**

In [37]:
class Network(nn.Module):

  def __init__(self, state_size, action_size, seed = 42):
    super(Network, self).__init__()
    self.seed = torch.manual_seed(seed)
    self.fc1 = nn.Linear(state_size, 64)
    self.fc2 = nn.Linear(64, 32)
    self.fc3 = nn.Linear(32, action_size)

  def forward(self, state):
    x = self.fc1(state)
    x = F.relu(x)
    x = self.fc2(x)
    x = F.relu(x)
    return self.fc3(x)

In [38]:
model = Network(state_size=10, action_size=1)

criterion = nn.MSELoss()

# Define the Adagrad optimizer
optimizer_adagrad = optim.Adagrad(model.parameters(), lr=0.1)

train_model(model, criterion, optimizer_adagrad, train_loader, epochs)

test_loss_adagrad = evaluate(model, criterion, test_loader)


# Define the SGD optimizer
optimizer_sgd = optim.SGD(model.parameters(), lr=0.1)

train_model(model, criterion, optimizer_sgd, train_loader, epochs)

test_loss_sgd = evaluate(model, criterion, test_loader)


Epoch 1/100, Loss: 16.169122893855256


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 2/100, Loss: 10.94445410314596
Epoch 3/100, Loss: 11.459477487600074
Epoch 4/100, Loss: 10.876520066891077
Epoch 5/100, Loss: 10.8078112962111
Epoch 6/100, Loss: 10.607053900664708
Epoch 7/100, Loss: 10.826662648398921
Epoch 8/100, Loss: 10.73447510881244
Epoch 9/100, Loss: 10.702914192991436
Epoch 10/100, Loss: 10.605248604180678
Epoch 11/100, Loss: 10.80018917119728
Epoch 12/100, Loss: 10.651664823855993
Epoch 13/100, Loss: 10.703673515679702
Epoch 14/100, Loss: 10.652518704252422
Epoch 15/100, Loss: 10.479391161000953
Epoch 16/100, Loss: 10.647518310906753
Epoch 17/100, Loss: 10.582206519144886
Epoch 18/100, Loss: 10.581535870174193
Epoch 19/100, Loss: 10.664650404228354
Epoch 20/100, Loss: 10.591202268060648
Epoch 21/100, Loss: 11.091433660039362
Epoch 22/100, Loss: 10.669152709673035
Epoch 23/100, Loss: 10.942477010331064
Epoch 24/100, Loss: 10.639168631355718
Epoch 25/100, Loss: 10.6801451737026
Epoch 26/100, Loss: 10.587676543109822
Epoch 27/100, Loss: 10.55294541592868
Ep

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 3/100, Loss: 10.665721146565563
Epoch 4/100, Loss: 10.530449048528132
Epoch 5/100, Loss: 10.671738606578899
Epoch 6/100, Loss: 10.563251540345966
Epoch 7/100, Loss: 10.90152905122289
Epoch 8/100, Loss: 10.726599360412022
Epoch 9/100, Loss: 10.58096416041536
Epoch 10/100, Loss: 10.490716142474481
Epoch 11/100, Loss: 10.571684828344381
Epoch 12/100, Loss: 10.49242154607233
Epoch 13/100, Loss: 10.537050670047975
Epoch 14/100, Loss: 10.508311847470841
Epoch 15/100, Loss: 10.604650848316696
Epoch 16/100, Loss: 10.905977474068695
Epoch 17/100, Loss: 10.48210515616075
Epoch 18/100, Loss: 10.616149047635636
Epoch 19/100, Loss: 10.639304089096358
Epoch 20/100, Loss: 10.541236220665699
Epoch 21/100, Loss: 10.515700205317083
Epoch 22/100, Loss: 10.8128638897302
Epoch 23/100, Loss: 10.559402213906342
Epoch 24/100, Loss: 10.492803294703645
Epoch 25/100, Loss: 10.881929766457036
Epoch 26/100, Loss: 10.560062912275207
Epoch 27/100, Loss: 10.437984790442124
Epoch 28/100, Loss: 10.585671280914882

In [39]:
print(f"Test Loss with Adagrad: {test_loss_adagrad}")
print(f"Test Loss with SGD: {test_loss_sgd}")

Test Loss with Adagrad: 9.384152582713536
Test Loss with SGD: 9.278236508369446


**Using Different activation function and layers and comparision**

In [40]:
class Networkx(nn.Module):

    def __init__(self, state_size, action_size, num_hidden_layers, seed=42):
        super(Networkx, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.hidden_layers = nn.ModuleList([nn.Linear(state_size, 64)])
        self.hidden_layers.extend([nn.Linear(64, 64) for _ in range(num_hidden_layers - 1)])
        self.output = nn.Linear(64, action_size)

    def forward(self, state):
        x = state
        for layer in self.hidden_layers:
            x = torch.sigmoid(layer(x))
        return self.output(x)


In [41]:
# Define the model with 10 hidden layers and sigmoid activation function
model_with_sigmoid = Networkx(state_size=10, action_size=1, num_hidden_layers=10)

# Define the criterion (loss function)
criterion = nn.MSELoss()

# Define the optimizer (SGD)
optimizer = optim.SGD(model_with_sigmoid.parameters(), lr=0.1)

# Train the model
train_model(model_with_sigmoid, criterion, optimizer, train_loader, epochs)

# Evaluate the model
test_loss_sigmoid = evaluate(model_with_sigmoid, criterion, test_loader)


# Compare with the previous architecture
# Define the model with previous architecture
model_previous = Network(state_size=10, action_size=1)

# Define the optimizer (SGD)
optimizer = optim.SGD(model_previous.parameters(), lr=0.1)

# Train the model
train_model(model_previous, criterion, optimizer, train_loader, epochs)

# Evaluate the model
test_loss_previous = evaluate(model_previous, criterion, test_loader)


Epoch 1/100, Loss: 34.086685252639484
Epoch 2/100, Loss: 10.470642539690125
Epoch 3/100, Loss: 10.620572791909272
Epoch 4/100, Loss: 10.523494450551159
Epoch 5/100, Loss: 10.671226609427974
Epoch 6/100, Loss: 10.601879209842322
Epoch 7/100, Loss: 10.517224554745656
Epoch 8/100, Loss: 10.512722429239526
Epoch 9/100, Loss: 10.481573973061904
Epoch 10/100, Loss: 10.505812959850958
Epoch 11/100, Loss: 10.672176037194594
Epoch 12/100, Loss: 10.708386196280426
Epoch 13/100, Loss: 10.582567133993473
Epoch 14/100, Loss: 10.52053899585076
Epoch 15/100, Loss: 10.528720477841935
Epoch 16/100, Loss: 10.65225622788915
Epoch 17/100, Loss: 10.473404380510438
Epoch 18/100, Loss: 10.656068801879883
Epoch 19/100, Loss: 10.546274545057765
Epoch 20/100, Loss: 10.514759711499485
Epoch 21/100, Loss: 10.570265832936988
Epoch 22/100, Loss: 10.536612789585906
Epoch 23/100, Loss: 10.655741952500254
Epoch 24/100, Loss: 10.521608586581248
Epoch 25/100, Loss: 10.498464962221542
Epoch 26/100, Loss: 10.6621997941215

In [42]:
print(f"Test Loss with 10 Hidden Layers and Sigmoid Activation: {test_loss_sigmoid}")
print(f"Test Loss with Previous Architecture: {test_loss_previous}")

Test Loss with 10 Hidden Layers and Sigmoid Activation: 9.346244011606489
Test Loss with Previous Architecture: 9.292383296149117
