<a href="https://colab.research.google.com/github/Gouthammajjari/Intro_to_ml/blob/main/project/Neural_network_(no_scaling).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
#Use the URL for the raw CSV data
url = 'https://raw.githubusercontent.com/Gouthammajjari/Intro_to_ml/main/project/data_preprocessed.csv'
df = pd.read_csv(url)

# Display the first 5 rows of the DataFrame
df.head()

Unnamed: 0,fare_amount,pickup_longitude,pickup_latitude,dropoff_longitude,dropoff_latitude,passenger_count,distance,day,hour,weekday,month,year
0,7.5,-73.999817,40.738354,-73.999512,40.723217,1,1.683323,7,19,3,5,2015
1,7.7,-73.994355,40.728225,-73.99471,40.750325,1,2.45759,17,20,4,7,2009
2,12.9,-74.005043,40.74077,-73.962565,40.772647,1,5.036377,24,21,0,8,2009
3,5.3,-73.976124,40.790844,-73.965316,40.803349,3,1.661683,26,8,4,6,2009
4,16.0,-73.925023,40.744085,-73.973082,40.761247,5,4.47545,28,17,3,8,2014


In [3]:

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [4]:
X = df.drop('fare_amount', axis = 1)
y = df[['fare_amount']]

In [5]:

X = pd.DataFrame(X, columns=X.columns, index=X.index).values
y = pd.DataFrame(y, columns=y.columns, index=y.index).values


In [6]:

train_X, val_X, train_Y, val_Y = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = torch.tensor(train_X)
y_train =  torch.tensor(train_Y)
X_val =  torch.tensor(val_X)
y_val =  torch.tensor(val_Y)

In [7]:

training_data = TensorDataset(X_train, y_train)
train_loader = DataLoader(training_data, batch_size=32, shuffle=True)

val_data = TensorDataset(X_val, y_val)
val_loader = DataLoader(val_data, batch_size=32)


In [8]:

def model_train(model, criterion, optimizer, epochs, train_loader, X_val, y_val):
    loss_train = []
    loss_val = []
    for epoch in range(epochs + 1):
        # Training
        model.train()
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets.view(-1,1))
            loss.backward()
            optimizer.step()
        # Validation
        model.eval()
        with torch.no_grad():
            val_outputs = model(X_val)
            val_loss = criterion(val_outputs, y_val)
            if epoch % 50 == 0:
              print(f'Epoch {epoch}/{epochs}, Training Loss: {loss.item()}, Validation Loss: {val_loss.item()}')
            loss_val.append(val_loss)
            loss_train.append(loss)
    return loss_train, loss_val

In [9]:
model1 = nn.Sequential(
    nn.Linear(X_train.shape[1], 32),
    nn.Tanh(),
    nn.Linear(32, 1)
).to(torch.float64)
optimizer1 = optim.SGD(model1.parameters(), lr=0.01)


In [10]:
import time

start_time = time.time()
train_loss, val_loss = model_train(model1, nn.MSELoss(), optimizer1, 1000, train_loader, X_val, y_val)
end_time = time.time()
training_time = end_time - start_time
print(f"Training Time: {training_time} seconds")

Epoch 0/1000, Training Loss: 29.044459820690317, Validation Loss: 102.41610124599553
Epoch 50/1000, Training Loss: 437.3737711119014, Validation Loss: 104.32265634196828
Epoch 100/1000, Training Loss: 84.01752100495202, Validation Loss: 103.97240182483982
Epoch 150/1000, Training Loss: 124.06451732118009, Validation Loss: 102.42523602396017
Epoch 200/1000, Training Loss: 122.57191376168933, Validation Loss: 103.06551921774253
Epoch 250/1000, Training Loss: 39.518204832544654, Validation Loss: 102.61301344121165
Epoch 300/1000, Training Loss: 26.389157164501636, Validation Loss: 103.08320852811511
Epoch 350/1000, Training Loss: 70.61036240364022, Validation Loss: 102.91595238275994
Epoch 400/1000, Training Loss: 40.80499785440416, Validation Loss: 104.33877220611112
Epoch 450/1000, Training Loss: 89.80971277404977, Validation Loss: 106.87234811418531
Epoch 500/1000, Training Loss: 37.67221963871547, Validation Loss: 103.34266273687754
Epoch 550/1000, Training Loss: 31.392777278598082, V

In [11]:
torch.save(model1, 'model1.pth')

In [12]:
import pickle

# Save list to a file
with open('train_loss1.pkl', 'wb') as file:
    pickle.dump(train_loss, file)
with open('val_loss1.pkl', 'wb') as file:
    pickle.dump(val_loss, file)

In [13]:
def downsample_losses(train_loss, val_loss, step=50):
    train_loss_batch = [train_loss[i].detach().item() for i in range(0, len(train_loss), step)]
    val_loss_batch = [val_loss[i].detach().item() for i in range(0, len(val_loss), step)]
    return train_loss_batch, val_loss_batch

In [14]:
epochs = range(0, 1001, 50)

In [15]:
model1 = nn.Sequential(
    nn.Linear(X_train.shape[1], 32),
    nn.Tanh(),
    nn.Linear(32, 64),  # Second hidden layer with 64 neurons
    nn.Tanh(),
    nn.Linear(64, 16),  # Third hidden layer with 16 neurons
    nn.Tanh(),
    nn.Linear(16, 1) # Output layer with 1 neuron
).to(torch.float64)
criterion = nn.MSELoss()
optimizer1 = optim.Adam(model1.parameters(), lr=0.01)

In [None]:
import time

start_time = time.time()
train_loss2, val_loss2 = model_train(model1, nn.MSELoss(), optimizer1, 1000, train_loader, X_val, y_val)
end_time = time.time()
training_time = end_time - start_time
print(f"Training Time: {training_time} seconds")

Epoch 0/1000, Training Loss: 23.1641469510835, Validation Loss: 101.80513562625656
Epoch 50/1000, Training Loss: 103.25925641516267, Validation Loss: 101.97855204597286
Epoch 100/1000, Training Loss: 58.236677860357, Validation Loss: 101.84435428186329
Epoch 150/1000, Training Loss: 165.03528316024003, Validation Loss: 101.98914142436509
Epoch 200/1000, Training Loss: 43.2846619855687, Validation Loss: 101.9793767668257
Epoch 250/1000, Training Loss: 131.26770893422426, Validation Loss: 101.80200920867468
Epoch 300/1000, Training Loss: 167.37675403835433, Validation Loss: 102.0124654278065
Epoch 350/1000, Training Loss: 27.96075506238705, Validation Loss: 101.80198027097381
Epoch 400/1000, Training Loss: 49.295231197290455, Validation Loss: 102.16183836021139
Epoch 450/1000, Training Loss: 148.64603481539228, Validation Loss: 101.8153357468049
Epoch 500/1000, Training Loss: 91.62906109850165, Validation Loss: 101.81543997143802
Epoch 550/1000, Training Loss: 48.2844325054651, Validatio

In [None]:

torch.save(model1, 'model2.pth')

In [None]:
import pickle

# Save list to a file
with open('train_loss2.pkl', 'wb') as file:
    pickle.dump(train_loss2, file)
with open('val_loss2.pkl', 'wb') as file:
    pickle.dump(val_loss2, file)

In [None]:
train_loss_batch1, val_loss_batch1 = downsample_losses(train_loss, val_loss, step=50)
train_loss_batch2, val_loss_batch2 = downsample_losses(train_loss2, val_loss2, step=50)

In [None]:
plt.figure(figsize=(10, 8))

plt.subplot(2,2,1)
plt.plot(epochs, train_loss_batch1, color='blue', label = 'training Loss')
plt.plot(epochs, val_loss_batch1, color='red', label = 'validation Loss')
plt.title('Loss with Learning Rate of 0.01 (1 hidden layer)')
plt.legend()

plt.subplot(2,2,2)
plt.plot(epochs, train_loss_batch2, color='blue', label = 'training Loss')
plt.plot(epochs, val_loss_batch2, color='red', label = 'validation Loss')
plt.title('Loss with Learning Rate of 0.01 (3 Hidden Layers)')
plt.legend()

plt.show()

In [None]:
model1 = nn.Sequential(
    nn.Linear(X_train.shape[1], 32),
    nn.Tanh(),
    nn.Linear(32, 1)
).to(torch.float64)
optimizer1 = optim.SGD(model1.parameters(), lr=0.01)

In [None]:
import time

start_time = time.time()
train_loss5k, val_loss5k = model_train(model1, nn.MSELoss(), optimizer1, 5000, train_loader, X_val, y_val)
end_time = time.time()
training_time = end_time - start_time
print(f"Training Time: {training_time} seconds")

In [None]:
torch.save(model1, 'model15k.pth')

In [None]:
import pickle

# Save list to a file
with open('train_loss15k.pkl', 'wb') as file:
    pickle.dump(train_loss5k, file)
with open('val_loss15k.pkl', 'wb') as file:
    pickle.dump(val_loss5k, file)