In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch import Tensor
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
torch.manual_seed(0);

# Part (i)

In [None]:
# Import over-35 dataset
df = pd.read_csv('london_data_35+.csv')
y = pd.CategoricalIndex(df.travel_mode).codes
X = df.loc[:, df.columns != 'travel_mode'].to_numpy()

# Standardize
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Build train and validation dataloaders
batch_size = 128
X_train, y_train = X[:40000], y[:40000]
X_val, y_val = X[40000:], y[40000:]
train_dataset = TensorDataset(Tensor(X_train.copy()), Tensor(y_train.copy()))
val_dataset = TensorDataset(Tensor(X_val.copy()), Tensor(y_val.copy()))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

## Train Neural Network

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(22, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Linear(64, 4),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
NN = NeuralNetwork()
optimizer = torch.optim.Adam(NN.parameters(), lr=1e-4)
CrossEntropy = nn.CrossEntropyLoss(reduction='mean')

In [None]:
train_acc_history = []
val_acc_history = []

for epoch in range(150):

    # Training loop
    train_acc = 0.0
    for batch, (X, y) in enumerate(train_loader):
        pred = NN(X) # predict logits
        loss = CrossEntropy(pred, y.type(torch.LongTensor)) # compute Cross Entropy loss
        loss.backward() # backward pass
        optimizer.step() # update step
        optimizer.zero_grad()
        train_acc += (pred.softmax(dim=1).argmax(dim=1) == y).type(torch.float).sum() / len(train_dataset)
    train_acc_history.append(train_acc)

    # Validation loop
    val_acc = 0.0
    with torch.no_grad():
        for batch, (X, y) in enumerate(val_loader):
            pred = NN(X)
            loss = CrossEntropy(pred, y.type(torch.LongTensor))
            val_acc += (pred.softmax(dim=1).argmax(dim=1) == y).type(torch.float).sum() / len(val_dataset)
        val_acc_history.append(val_acc)

    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1} train accuracy:, {round(float(train_acc), 4)}, validation accuracy:, {round(float(val_acc), 4)}")

In [None]:
fig = plt.figure(figsize=(6,4))
plt.plot(train_acc_history, label='train')
plt.plot(val_acc_history, label='validation')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()

## Train Logistic Regression Model

In [None]:
# Training multi-class logistic regression model
logistic = LogisticRegression(max_iter=5000, penalty=None).fit(X_train, y_train)
print('train accuracy:', round(logistic.score(X_train, y_train), 3))
print('validation accuracy:', round(logistic.score(X_val, y_val), 3))

# Part (ii)

## Compare out-of-distribution accuracy

In [None]:
# Import under-35 dataset
df = pd.read_csv('london_data_35-.csv')
y = pd.CategoricalIndex(df.travel_mode).codes
X = df.loc[:, df.columns != 'travel_mode'].to_numpy()
X = scaler.transform(X)

pred = NN(Tensor(X.copy()))
acc = (pred.softmax(dim=1).argmax(dim=1) == Tensor(y.copy())).type(torch.float).sum() / X.shape[0]
print('NN accuracy:', round(float(acc), 3))
print('logistic accuracy:', round(logistic.score(X, y), 3))

# Part (iii)

In [None]:
df = pd.concat([pd.read_csv('london_data_35-.csv'), pd.read_csv('london_data_35-.csv')])
y = pd.CategoricalIndex(df.travel_mode).codes
X = df.loc[:, df.columns != 'travel_mode'].to_numpy()

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
train_dataset = TensorDataset(Tensor(X_train), Tensor(y_train))
val_dataset = TensorDataset(Tensor(X_val), Tensor(y_val))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)

## Tune network architecture

In [None]:
# Tune the following neural network architecture
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            # ADD LAYERS
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

## Tune learning hyperparameters

In [None]:
lr = # SET LEARNING RATE
epochs = # SET NUMBER OF EPOCHS

NN = NeuralNetwork()
optimizer = torch.optim.Adam(NN.parameters(), lr=lr)
CrossEntropy = nn.CrossEntropyLoss(reduction='mean')

In [None]:
train_acc_history = []
val_acc_history = []

for epoch in range(150):

    # Training loop
    train_acc = 0.0
    for batch, (X, y) in enumerate(train_loader):
        pred = NN(X) # predict logits
        loss = CrossEntropy(pred, y.type(torch.LongTensor)) # compute Cross Entropy loss
        loss.backward() # backward pass
        optimizer.step() # update step
        optimizer.zero_grad()
        train_acc += (pred.softmax(dim=1).argmax(dim=1) == y).type(torch.float).sum() / len(train_dataset)
    train_acc_history.append(train_acc)

    # Validation loop
    val_acc = 0.0
    with torch.no_grad():
        for batch, (X, y) in enumerate(val_loader):
            pred = NN(X)
            loss = CrossEntropy(pred, y.type(torch.LongTensor))
            val_acc += (pred.softmax(dim=1).argmax(dim=1) == y).type(torch.float).sum() / len(val_dataset)
        val_acc_history.append(val_acc)

    if (epoch + 1) % 5 == 0:
        print(f"Epoch {epoch+1} train accuracy:, {round(float(train_acc), 4)}, validation accuracy:, {round(float(val_acc), 4)}")

In [None]:
fig = plt.figure(figsize=(6,4))
plt.plot(train_acc_history, label='train')
plt.plot(val_acc_history, label='validation')
plt.legend()
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.show()