In [4]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import r2_score, mean_squared_error
import numpy as np

import torch.nn as nn


In [3]:
train=pd.read_csv("/content/final_train.csv")
val=pd.read_csv("/content/final_val.csv")

In [18]:
train.dropna(inplace=True)


In [7]:
X_train = train.drop('duration',axis=1)
y_train = train['duration']

X_val = val.drop('duration',axis=1)
y_val = val['duration']

In [5]:
# Custom dataset class
class TaxiDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X.values)
        self.y = torch.FloatTensor(y.values)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [31]:
# Neural Network class
class TaxiNN(nn.Module):
    def __init__(self, input_dim):
        super(TaxiNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)

        self.fc2 = nn.Linear(256, 128)
        self.bn1 = nn.BatchNorm1d(128)
        self.dropout1 = nn.Dropout(0.3)

        self.fc3 = nn.Linear(128, 64)
        self.bn2 = nn.BatchNorm1d(64)  # Changed to match fc3 output size

        self.fc4 = nn.Linear(64, 32)
        self.bn3 = nn.BatchNorm1d(32)  # New batch norm layer for fc4
        self.dropout2 = nn.Dropout(0.3)

        self.fc5 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.dropout1(torch.relu(self.bn1(self.fc2(x))))
        x = torch.relu(self.bn2(self.fc3(x)))  # Using bn2 for fc3's output
        x = self.dropout2(torch.relu(self.bn3(self.fc4(x))))  # Using bn3 for fc4's output
        x = self.fc5(x)
        return x

In [32]:
# Create data loaders
train_dataset = TaxiDataset(X_train, y_train)
val_dataset = TaxiDataset(X_val, y_val)

In [33]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)

In [34]:
# Initialize model, loss and optimizer
model = TaxiNN(X_train.shape[1])
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(),lr=0.01)

In [35]:
# Training loop
best_val_loss = float('inf')
patience = 5
counter = 0
epochs=30

In [36]:
for epoch in range(epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        # Check for and handle NaNs in batches
        if torch.isnan(X_batch).any():
            # Replace NaNs with zeros or means
            X_batch = torch.nan_to_num(X_batch, nan=0.0)

        optimizer.zero_grad()
        y_pred = model(X_batch)
        loss = criterion(y_pred, y_batch.unsqueeze(1))
        loss.backward()
        optimizer.step()

    # Validation
    model.eval()
    val_predictions = []
    val_targets = []

    with torch.no_grad():
        for X_batch, y_batch in val_loader:
            # Handle potential NaNs in validation data too
            if torch.isnan(X_batch).any():
                X_batch = torch.nan_to_num(X_batch, nan=0.0)

            y_pred = model(X_batch)
            val_predictions.extend(y_pred.squeeze().tolist())
            val_targets.extend(y_batch.tolist())

    val_predictions = np.array(val_predictions)
    val_targets = np.array(val_targets)

    # Check for NaNs before computing metrics
    if np.isnan(val_predictions).any() or np.isnan(val_targets).any():
        print("Warning: NaN values found in predictions or targets!")
        # Handle or skip this evaluation
    else:
        val_r2 = r2_score(val_targets, val_predictions)
        val_rmse = np.sqrt(mean_squared_error(val_targets, val_predictions))
        print(f'Epoch {epoch+1}: Val R2: {val_r2:.4f}, Val RMSE: {val_rmse:.4f}')
     # Early stopping
    val_loss = val_rmse
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print(f'Early stopping at epoch {epoch+1}')
            break

Epoch 1: Val R2: 0.7635, Val RMSE: 4.6257
Epoch 2: Val R2: 0.7409, Val RMSE: 4.8416
Epoch 3: Val R2: 0.7472, Val RMSE: 4.7824
Epoch 4: Val R2: 0.7761, Val RMSE: 4.5010
Epoch 5: Val R2: 0.7672, Val RMSE: 4.5893
Epoch 6: Val R2: 0.7716, Val RMSE: 4.5456
Epoch 7: Val R2: 0.7524, Val RMSE: 4.7328
Epoch 8: Val R2: 0.7812, Val RMSE: 4.4489
Epoch 9: Val R2: 0.7658, Val RMSE: 4.6029
Epoch 10: Val R2: 0.7763, Val RMSE: 4.4982
Epoch 11: Val R2: 0.7606, Val RMSE: 4.6533
Epoch 12: Val R2: 0.7823, Val RMSE: 4.4377
Epoch 13: Val R2: 0.7774, Val RMSE: 4.4879
Epoch 14: Val R2: 0.7675, Val RMSE: 4.5866
Epoch 15: Val R2: 0.7489, Val RMSE: 4.7663
Epoch 16: Val R2: 0.7841, Val RMSE: 4.4196
Epoch 17: Val R2: 0.7791, Val RMSE: 4.4706
Epoch 18: Val R2: 0.7797, Val RMSE: 4.4643
Epoch 19: Val R2: 0.7436, Val RMSE: 4.8164
Epoch 20: Val R2: 0.7683, Val RMSE: 4.5783
Epoch 21: Val R2: 0.7681, Val RMSE: 4.5799
Early stopping at epoch 21


In [39]:
torch.save(model.state_dict(), 'neural_network_model.pth')