### Load Data

In [1]:
import pandas as pd

In [2]:
# Loading data
data = pd.read_csv('Prodigy University Dataset.csv')
# Split the data into features (X) and target (y)
data.head()

Unnamed: 0,sat_sum,hs_gpa,fy_gpa
0,508,3.4,3.18
1,488,4.0,3.33
2,464,3.75,3.25
3,380,3.75,2.42
4,428,4.0,2.63


### Data pre-processing

In [3]:
# Converting data to numpy so that we can build tensor
X = data[['sat_sum', 'hs_gpa']].values
# reshape the fy_gpa into a 2D array with [data_size] rows and 1 column
y = data['fy_gpa'].values.reshape(-1, 1)
print(X.shape)
print(y.shape)

(1000, 2)
(1000, 1)


In [4]:
from sklearn.model_selection import train_test_split
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
from sklearn.preprocessing import StandardScaler

# Normalize the features so that it is easier to train the data
scaler = StandardScaler()
X_train= scaler.fit_transform(X_train)
X_test= scaler.fit_transform(X_test)

In [6]:
X_train.shape

(800, 2)

In [7]:
import torch
# Convert numpy to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

### Building the Model

In [8]:
import torch.nn as nn

In [9]:
# Building model with 2 neurons
model = nn.Sequential(
    nn.Linear(2, 2), # 2 input features, 1 hidden layer with 2 neurons
    nn.Sigmoid(), 
    nn.Linear(2, 1) # output layer with single neuron output
)

# We use sigmoid activation function for hidden layer and linear activation function for output layer

In [10]:
# Forward Propagation
preds = model(X_train_tensor)

In [11]:
preds[:5]

tensor([[0.0580],
        [0.0892],
        [0.2837],
        [0.1818],
        [0.2139]], grad_fn=<SliceBackward0>)

In [12]:
from torch.nn import MSELoss

In [13]:
# Calculating Loss
criterion = MSELoss()
loss = criterion(preds, y_train_tensor)
print(loss)
# very high loss

tensor(5.4397, grad_fn=<MseLossBackward0>)


# Optimization and Backpropagation

In [14]:
import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr = 0.001)

In [15]:
# Backpropagate
loss.backward()

In [16]:
# Apply the updated weights to the model
optimizer.step()

This was done just once but we need to do this several times

In [17]:
from torch.utils.data import TensorDataset, DataLoader

In [18]:
train_data = TensorDataset(X_train_tensor, y_train_tensor)

### Stochastic Gradient Descent

In [19]:
train_loader = DataLoader(train_data, batch_size=1, shuffle=True)
# Execute the training loop
for epoch in range(10): # 10 epochs
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad() #reset gradient set in previous step
        loss.backward()
        optimizer.step()

    train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
    # print(epoch,': ', train_loss)
    test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
    print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 1: Train Loss: 0.7061, Test Loss: 0.7871
Epoch 2: Train Loss: 0.6283, Test Loss: 0.6926
Epoch 3: Train Loss: 0.6004, Test Loss: 0.6606
Epoch 4: Train Loss: 0.5802, Test Loss: 0.6410
Epoch 5: Train Loss: 0.5665, Test Loss: 0.6281
Epoch 6: Train Loss: 0.5560, Test Loss: 0.6153
Epoch 7: Train Loss: 0.5483, Test Loss: 0.6069
Epoch 8: Train Loss: 0.5421, Test Loss: 0.6001
Epoch 9: Train Loss: 0.5367, Test Loss: 0.5935
Epoch 10: Train Loss: 0.5312, Test Loss: 0.5873


We see a reduction in train and test loss

In [20]:
# Looking at predictions
model(X_train_tensor)[:5]

tensor([[2.4711],
        [2.4711],
        [2.4699],
        [2.4758],
        [2.4672]], grad_fn=<SliceBackward0>)

### Batch Gradient Descent

In [21]:
# Reinitialising model weights
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)
optimizer = optim.SGD(model.parameters(), lr = 0.001)

In [23]:
len(X_train)
# This is what we will set the batch size as

800

In [24]:
train_loader = DataLoader(train_data, batch_size=800, shuffle=True) #800 is the number of samples in train set
# Execute the training loop
for epoch in range(1000): # increasing the epochs for effective training
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 100 == 0: # printing after every 100 epochs
        train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
        # print(epoch,': ', train_loss)
        test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 100: Train Loss: 3.4342, Test Loss: 3.6353
Epoch 200: Train Loss: 2.1002, Test Loss: 2.2633
Epoch 300: Train Loss: 1.3390, Test Loss: 1.4731
Epoch 400: Train Loss: 0.9103, Test Loss: 1.0225
Epoch 500: Train Loss: 0.6715, Test Loss: 0.7673
Epoch 600: Train Loss: 0.5394, Test Loss: 0.6231
Epoch 700: Train Loss: 0.4665, Test Loss: 0.5413
Epoch 800: Train Loss: 0.4259, Test Loss: 0.4943
Epoch 900: Train Loss: 0.4030, Test Loss: 0.4667
Epoch 1000: Train Loss: 0.3896, Test Loss: 0.4500


### Mini-batch Gradient Descent

In [25]:
# Reinitialising model weights
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)
optimizer = optim.SGD(model.parameters(), lr = 0.001)

In [26]:
train_loader = DataLoader(train_data, batch_size= 64, shuffle=True) #800 is the number of samples in train set
# Execute the training loop
for epoch in range(500): # increasing the epochs for effective training
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 50 == 0: # printing after every 100 epochs
        train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
        # print(epoch,': ', train_loss)
        test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 50: Train Loss: 1.0300, Test Loss: 1.1382
Epoch 100: Train Loss: 0.7152, Test Loss: 0.7870
Epoch 150: Train Loss: 0.6046, Test Loss: 0.6667
Epoch 200: Train Loss: 0.5249, Test Loss: 0.5825
Epoch 250: Train Loss: 0.4678, Test Loss: 0.5230
Epoch 300: Train Loss: 0.4282, Test Loss: 0.4824
Epoch 350: Train Loss: 0.4011, Test Loss: 0.4551
Epoch 400: Train Loss: 0.3827, Test Loss: 0.4368
Epoch 450: Train Loss: 0.3702, Test Loss: 0.4249
Epoch 500: Train Loss: 0.3616, Test Loss: 0.4169


### Gradient Descent with Momentum

In [27]:
# Reinitialising model weights
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)
optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum=0.9)

In [28]:
train_loader = DataLoader(train_data, batch_size= 64, shuffle=True) #800 is the number of samples in train set
# Execute the training loop
for epoch in range(500): # increasing the epochs for effective training
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 50 == 0: # printing after every 100 epochs
        train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
        # print(epoch,': ', train_loss)
        test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 50: Train Loss: 0.3584, Test Loss: 0.4156
Epoch 100: Train Loss: 0.3460, Test Loss: 0.4055
Epoch 150: Train Loss: 0.3442, Test Loss: 0.4040
Epoch 200: Train Loss: 0.3435, Test Loss: 0.4029
Epoch 250: Train Loss: 0.3430, Test Loss: 0.4025
Epoch 300: Train Loss: 0.3426, Test Loss: 0.4022
Epoch 350: Train Loss: 0.3423, Test Loss: 0.4020
Epoch 400: Train Loss: 0.3420, Test Loss: 0.4018
Epoch 450: Train Loss: 0.3418, Test Loss: 0.4017
Epoch 500: Train Loss: 0.3415, Test Loss: 0.4013


### Nesterov Momentum

In [29]:
# Reinitialising model weights
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)
optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum=0.9, nesterov=True)

In [30]:
train_loader = DataLoader(train_data, batch_size= 64, shuffle=True) #800 is the number of samples in train set
# Execute the training loop
for epoch in range(500): # increasing the epochs for effective training
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 50 == 0: # printing after every 100 epochs
        train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
        # print(epoch,': ', train_loss)
        test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 50: Train Loss: 0.3526, Test Loss: 0.4035
Epoch 100: Train Loss: 0.3469, Test Loss: 0.4014
Epoch 150: Train Loss: 0.3458, Test Loss: 0.4016
Epoch 200: Train Loss: 0.3452, Test Loss: 0.4019
Epoch 250: Train Loss: 0.3446, Test Loss: 0.4018
Epoch 300: Train Loss: 0.3441, Test Loss: 0.4009
Epoch 350: Train Loss: 0.3437, Test Loss: 0.4011
Epoch 400: Train Loss: 0.3433, Test Loss: 0.4005
Epoch 450: Train Loss: 0.3429, Test Loss: 0.4008
Epoch 500: Train Loss: 0.3426, Test Loss: 0.4003


### Adagrad

In [31]:
# Reinitialising model weights
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)
optimizer = optim.Adagrad(model.parameters())

In [32]:
train_loader = DataLoader(train_data, batch_size= 64, shuffle=True) #800 is the number of samples in train set
# Execute the training loop
for epoch in range(500): # increasing the epochs for effective training
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 50 == 0: # printing after every 100 epochs
        train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
        # print(epoch,': ', train_loss)
        test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 50: Train Loss: 6.4846, Test Loss: 6.7470
Epoch 100: Train Loss: 4.5405, Test Loss: 4.7661
Epoch 150: Train Loss: 3.2570, Test Loss: 3.4549
Epoch 200: Train Loss: 2.3882, Test Loss: 2.5641
Epoch 250: Train Loss: 1.7883, Test Loss: 1.9460
Epoch 300: Train Loss: 1.3680, Test Loss: 1.5101
Epoch 350: Train Loss: 1.0716, Test Loss: 1.2003
Epoch 400: Train Loss: 0.8614, Test Loss: 0.9784
Epoch 450: Train Loss: 0.7124, Test Loss: 0.8193
Epoch 500: Train Loss: 0.6072, Test Loss: 0.7054


### RMS Prop

In [33]:
# Reinitialising model weights
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)
optimizer = optim.RMSprop(model.parameters())

In [34]:
train_loader = DataLoader(train_data, batch_size= 64, shuffle=True) #800 is the number of samples in train set
# Execute the training loop
for epoch in range(500): # increasing the epochs for effective training
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 50 == 0: # printing after every 100 epochs
        train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
        # print(epoch,': ', train_loss)
        test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 50: Train Loss: 0.3417, Test Loss: 0.4007
Epoch 100: Train Loss: 0.3389, Test Loss: 0.4011
Epoch 150: Train Loss: 0.3389, Test Loss: 0.4017
Epoch 200: Train Loss: 0.3396, Test Loss: 0.3983
Epoch 250: Train Loss: 0.3391, Test Loss: 0.4042
Epoch 300: Train Loss: 0.3372, Test Loss: 0.4005
Epoch 350: Train Loss: 0.3389, Test Loss: 0.4046
Epoch 400: Train Loss: 0.3366, Test Loss: 0.3985
Epoch 450: Train Loss: 0.3367, Test Loss: 0.3987
Epoch 500: Train Loss: 0.3390, Test Loss: 0.3989


### Adam

In [35]:
# Reinitialising model weights
model = nn.Sequential(
    nn.Linear(2, 2),
    nn.Sigmoid(),
    nn.Linear(2, 1)
)
optimizer = optim.Adam(model.parameters())

In [36]:
train_loader = DataLoader(train_data, batch_size= 64, shuffle=True) #800 is the number of samples in train set
# Execute the training loop
for epoch in range(500): # increasing the epochs for effective training
    for X_batch, y_batch in train_loader:
        # Forward pass
        pred = model(X_batch)
        loss = criterion(pred, y_batch)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch+1) % 50 == 0: # printing after every 100 epochs
        train_loss = criterion(model(X_train_tensor), y_train_tensor).item()
        # print(epoch,': ', train_loss)
        test_loss = criterion(model(X_test_tensor), y_test_tensor).item()
        print(f'Epoch {epoch+1}: Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

Epoch 50: Train Loss: 1.3292, Test Loss: 1.4643
Epoch 100: Train Loss: 0.4310, Test Loss: 0.4997
Epoch 150: Train Loss: 0.3758, Test Loss: 0.4283
Epoch 200: Train Loss: 0.3711, Test Loss: 0.4220
Epoch 250: Train Loss: 0.3674, Test Loss: 0.4190
Epoch 300: Train Loss: 0.3632, Test Loss: 0.4154
Epoch 350: Train Loss: 0.3589, Test Loss: 0.4125
Epoch 400: Train Loss: 0.3550, Test Loss: 0.4097
Epoch 450: Train Loss: 0.3516, Test Loss: 0.4079
Epoch 500: Train Loss: 0.3490, Test Loss: 0.4066


RMS Prop gave the best results so far