# L06 26/03/24

# Uber dataset

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
from tqdm import tqdm
from torch.utils.tensorboard import SummaryWriter

## Prepare the heat generators

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## Override Dataset

Target in this case was the fare amount of the ride

In [3]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, csv):
        df = pd.read_csv(csv, sep=r'\s+')
        
        self.data = torch.tensor(df.drop(columns=['fare_amount']).values, dtype=torch.float32)
        self.target = torch.tensor(df.fare_amount.values, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.target[idx]


## Create the Model

In [4]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        self.fc1 = nn.Linear(50, 128) # 50 input features
        self.fc2 = nn.Linear(128, 128) 
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 16)
        self.fc5 = nn.Linear(16, 1) # 1 output feature

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        
        return x.squeeze()

## Validation

In [5]:
def validate(model, val_loader):
    model.eval()
    loss = 0
    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss += F.l1_loss(output, target)
    return loss / len(val_loader)

In [9]:
model = Net().to(device)

criterion = nn.MSELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.001)
batch_size = 5000
epochs = 250
num_workers = 0
n_iter = 0


train_dataset = Dataset('../datasets/Uber/train.csv')
val_dataset = Dataset('../datasets/Uber/val.csv')

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)

writer = SummaryWriter('Uber_experiment')

In [7]:
# %load_ext tensorboard
%reload_ext tensorboard
%tensorboard --logdir={experiment_name}

## Train

In [10]:
for epoch in tqdm(range(epochs)):
    writer.add_scalar("epoch", epoch, n_iter)
    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.l1_loss(output, target)
        writer.add_scalar("loss", loss.item(), n_iter)
        loss.backward()
        optimizer.step()
        n_iter += 1
    writer.add_scalar("val", loss.item(), n_iter)
score = validate(model, val_loader)

print(f'Loss: {score.item()}')

 68%|██████▊   | 170/250 [04:29<02:06,  1.59s/it]


KeyboardInterrupt: 