# AADT Training

## Imports

In [None]:
!pip install torchmetrics

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torchmetrics
  Downloading torchmetrics-0.11.4-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torchmetrics
Successfully installed torchmetrics-0.11.4


In [None]:
import pandas as pd
import numpy as np
import os
import torch
import random
import cv2
from tqdm import tqdm
from matplotlib import pyplot as plt
import albumentations as album
from PIL import Image
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import random_split
from sklearn.metrics import accuracy_score
import plotly.express as px
import torchmetrics
from torchmetrics import MeanAbsolutePercentageError
from glob import glob
import plotly.graph_objs as go

%matplotlib inline

## Global Variables

In [None]:
COLAB = True

In [None]:
ROOT_DIR_PATH = os.path.abspath('..')

if COLAB:
  from google.colab import drive
  drive.mount('/content/drive')

  ROOT_DIR_PATH = os.path.abspath('drive/MyDrive/Spatial_Finance_Transport/')

VEHICLE_DETECTION_COUNT_PATH = os.path.join(ROOT_DIR_PATH, 'data/vehicle_counts_detection.csv')
AADT_PROCESSED_PATH = os.path.join(ROOT_DIR_PATH, 'data/ground_truth_data/aadt/processed/')

NN_MODEL_PATH = os.path.join(ROOT_DIR_PATH, "models/aadt_models/")

NORMALISED = True

Mounted at /content/drive


## Helper Functions

In [None]:
def zip_lists_by_name_attribute(list1, list2):
    """
    Zips two lists together based on the 'name' attribute of the elements.

    Args:
        list1 (list): The first list to be zipped.
        list2 (list): The second list to be zipped.

    Returns:
        list: The zipped list of tuples, where each tuple contains elements from list1 and list2
              that have the same 'name' attribute value.
    """
    # Create a dictionary to store the elements of list1 with the 'name' attribute as the key
    dict1 = {elem.name: elem for elem in list1}

    # Create a dictionary to store the elements of list2 with the 'name' attribute as the key
    dict2 = {elem.name: elem for elem in list2}

    # Get the set of keys that are common to both dictionaries
    common_keys = set(dict1.keys()) & set(dict2.keys())

    # Zip the common elements from list1 and list2 together
    zipped_list = [(dict1[key], dict2[key]) for key in common_keys]

    return zipped_list

## True Count Data
- Traffic monitoring stations for long-term traffic count data
    - Extract at same time as Satellite Image!
- How to use permanent and temporary traffic count stations

## Vehicle detection number
From vehicle detection model

## Road characteristics
From road characterstics pipeline

Includes:
- Road width
- Live speed data
- Directionality

## Data Loading

In [None]:
pattern = os.path.join(AADT_PROCESSED_PATH, 'aadt_*.csv')

processed_aadt_file_paths = [os.path.join(AADT_PROCESSED_PATH, os.path.basename(x)) for x in glob(pattern)]
print("Processed aadt files: {}".format(processed_aadt_file_paths))

processed_aadt_df_list = []
processed_aadt_df_test_list = []

for i in range(len(processed_aadt_file_paths)):
    processed_aadt_df = pd.read_csv(processed_aadt_file_paths[i])
    processed_aadt_df = processed_aadt_df.loc[:, ~processed_aadt_df.columns.str.contains('^Unnamed')]

    processed_aadt_df['site_name'] = processed_aadt_df['site_name'].astype(str)

    processed_aadt_df.name = processed_aadt_df.iloc[0]['Local Authority']+'_'+processed_aadt_df.iloc[0]['site_name'].replace('/', '_')

    if processed_aadt_df.iloc[0]['year'] == 2018:
        processed_aadt_df_test_list.append(processed_aadt_df)
    else:
        processed_aadt_df_list.append(processed_aadt_df)

Processed aadt files: ['/content/drive/MyDrive/Spatial_Finance_Transport/data/ground_truth_data/aadt/processed/aadt_Blackburn with Darwen_30361033_2018.csv', '/content/drive/MyDrive/Spatial_Finance_Transport/data/ground_truth_data/aadt/processed/aadt_Havering_M25_5790A_2017.csv', '/content/drive/MyDrive/Spatial_Finance_Transport/data/ground_truth_data/aadt/processed/aadt_Havering_M25_5790A_2018.csv', '/content/drive/MyDrive/Spatial_Finance_Transport/data/ground_truth_data/aadt/processed/aadt_Blackburn with Darwen_30361033_2017.csv', '/content/drive/MyDrive/Spatial_Finance_Transport/data/ground_truth_data/aadt/processed/aadt_Luton_M1_2557A_2017.csv', '/content/drive/MyDrive/Spatial_Finance_Transport/data/ground_truth_data/aadt/processed/aadt_Havering_M25_5790B_2017.csv', '/content/drive/MyDrive/Spatial_Finance_Transport/data/ground_truth_data/aadt/processed/aadt_Havering_M25_5790B_2018.csv', '/content/drive/MyDrive/Spatial_Finance_Transport/data/ground_truth_data/aadt/processed/aadt_Bla

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  processed_aadt_df['site_name'] = processed_aadt_df['site_name'].astype(str)


## Custom Dataset

In [None]:
class CustomDataset(Dataset):
    def __init__(self, df, normalised=True):

        self.name = df.name
        
        self.labels = torch.tensor(df['all_motor_vehicles'].values.astype('float32'))
        #self.speed_data = pd.read_csv(SPEED_DATA_PATH) 
        #self.road_width = pd.read_csv(ROAD_WIDTH_PATH)
        self.hour = torch.tensor(df['hour'].values.astype('float32')).unsqueeze(1)
        self.avg_mph = torch.tensor(df['avg_mph'].values.astype('float32')).unsqueeze(1)
        self.day = torch.tensor(df['day'].values.astype('float32')).unsqueeze(1)
        self.month = torch.tensor(df['month'].values.astype('float32')).unsqueeze(1)

        if normalised:
            vehicle_types = ['0-520cm_normalised', '521-660cm_normalised', '661-1160cm_normalised', '1160+cm_normalised', 'total_volume_normalised']
        else:
            vehicle_types = ['0-520cm', '521-660cm', '661-1160cm', '1160+cm', 'total_volume']

        self.small_vehicle = torch.tensor(df[vehicle_types[0]].values.astype('float32')).unsqueeze(1)
        self.mid_vehicle = torch.tensor(df[vehicle_types[1]].values.astype('float32')).unsqueeze(1)
        self.large_vehicle = torch.tensor(df[vehicle_types[2]].values.astype('float32')).unsqueeze(1)
        self.very_large_vehicle = torch.tensor(df[vehicle_types[3]].values.astype('float32')).unsqueeze(1)
        self.vehicle_count = torch.tensor(df[vehicle_types[4]].values.astype('float32')).unsqueeze(1)

        self.x = torch.concat((self.vehicle_count, self.small_vehicle, self.mid_vehicle, self.large_vehicle, self.very_large_vehicle, self.avg_mph, self.day, self.month, self.hour), dim=-1)
        self.y = self.labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.name, self.x[idx], self.y[idx]

In [None]:
dataset_list = []

for df in processed_aadt_df_list:

    custom_dataset = CustomDataset(df, normalised=NORMALISED)
    dataset_list.append(custom_dataset)

## Neural Network Models

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, name):
        super(NeuralNetwork, self).__init__()

        self.name = name
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(9, 9),
            nn.Linear(9,5),
            nn.LeakyReLU(negative_slope=0.2),
            nn.Linear(5,1),
            nn.ReLU()
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
nn_model_list = []

for df in processed_aadt_df_list:
    nn_model = NeuralNetwork(name=df.name)
    nn_model_list.append(nn_model)

nn_model_list[0]

NeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=9, out_features=9, bias=True)
    (1): Linear(in_features=9, out_features=5, bias=True)
    (2): LeakyReLU(negative_slope=0.2)
    (3): Linear(in_features=5, out_features=1, bias=True)
    (4): ReLU()
  )
)

## EarlyStopping

In [None]:
class EarlyStopper:
    def __init__(self, patience=1, min_delta=0):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.min_validation_loss = np.inf

    def early_stop(self, validation_loss):
        if validation_loss < self.min_validation_loss:
            self.min_validation_loss = validation_loss
            self.counter = 0
        elif validation_loss > (self.min_validation_loss + self.min_delta):
            self.counter += 1
            if self.counter >= self.patience:
                return True
        return False

## Weights Initialisation

In [None]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.ones_(m.weight)
        m.bias.data.fill_(0.01)

for nn_model in nn_model_list:
    nn_model.apply(init_weights)

## HyperParameters

In [None]:
learning_rate = 0.1
batch_size = 256
epochs = 2
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(nn_model.parameters(), lr=learning_rate)

MAPE = MeanAbsolutePercentageError()

## DataLoaders

In [None]:
train_dataloader_list = []
val_dataloader_list = []

for dataset in dataset_list:
    train_split = 0.8
    train_data, val_data = random_split(dataset, [train_split, 1-train_split])

    train_dataloader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=False, sampler=None,
                    batch_sampler=None, num_workers=0, collate_fn=None,
                    pin_memory=False, drop_last=False, timeout=0,
                    worker_init_fn=None, prefetch_factor=None,
                    persistent_workers=False)
    
    val_dataloader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, shuffle=False, sampler=None,
                    batch_sampler=None, num_workers=0, collate_fn=None,
                    pin_memory=False, drop_last=False, timeout=0,
                    worker_init_fn=None, prefetch_factor=None,
                    persistent_workers=False)
    
    train_dataloader_list.append(train_dataloader)
    val_dataloader_list.append(val_dataloader)

## Training & Validation

In [None]:
def run_epoch(ep_id, action, loader, model, optimizer, criterion, early_stopper):
    losses = [] # Keep list of accuracies to track progress
    is_training = action == "train" # True when action == "train", else False 

    # Looping over all batches
    for batch_idx, batch in enumerate(loader): 
        dl_name, x, y = batch

        # Assert we are training the correct model with the correct dataset!
        assert dl_name[0] == model.name

        # Resetting the optimizer gradients
        optimizer.zero_grad()

        # Setting model to train or test
        with torch.set_grad_enabled(is_training):
            
            # Feed batch to model
            logits = model(x).squeeze(1)

            # Calculate the loss based on predictions and real labels
            loss = criterion(logits, y)
            mape_loss = MAPE(logits, y)

            # If training, perform backprop and update weights
            if is_training:
                loss.backward()
                optimizer.step()

            # Append current batch accuracy
            losses.append(mape_loss.detach().numpy())

            # Print some stats every 50th batch 
            if batch_idx % 50 == 0:
                print(f"{action.capitalize()}, Epoch: {ep_id+1}, Batch {batch_idx}: Loss = {loss.item()}")

        if not is_training:
            if early_stopper.early_stop(mape_loss.detach().numpy()):
                print("Entered Early Stopping")             
                break
                    
    # Return accuracies to main loop                 
    return losses

In [None]:
def train(epochs, train_dl, val_dl, model, optimizer, criterion, early_stopper):

    # Keep lists of accuracies to track performance on train and test sets
    train_losses = []
    val_losses = []

    # Looping over epochs
    for epoch in range(epochs):
        
        # Looping over train set and training
        train_loss = run_epoch(epoch, "train", train_dl, model, optimizer, criterion, early_stopper=early_stopper)

        # Looping over test set
        val_loss = run_epoch(epoch, "val", val_dl, model, optimizer, criterion, early_stopper=early_stopper) 

        # Collecting stats
        train_losses += train_loss
        val_losses += val_loss         
            
    return train_losses, val_losses

In [None]:
def main(epochs, train_dl_list, val_dl_list, model_list):
    all_train_losses = []
    all_val_losses = []

    for (model, train_dl, val_dl) in zip(model_list, train_dl_list, val_dl_list):

        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

        loss_fn = nn.MSELoss()

        early_stopper = EarlyStopper(patience=3, min_delta=10)
        
        train_losses, val_losses = train(epochs, train_dl, val_dl, model, optimizer=optimizer, criterion=loss_fn, early_stopper=early_stopper)

        all_train_losses.append((model.name, train_losses))
        all_val_losses.append((model.name, val_losses))

    return all_train_losses, all_val_losses

In [None]:
all_train_losses, all_val_losses = main(epochs=epochs, train_dl_list=train_dataloader_list, val_dl_list=val_dataloader_list, model_list=nn_model_list)

Train, Epoch: 1, Batch 0: Loss = 3756370944.0
Train, Epoch: 1, Batch 50: Loss = 82681696.0
Val, Epoch: 1, Batch 0: Loss = 82004256.0
Train, Epoch: 2, Batch 0: Loss = 66521488.0
Train, Epoch: 2, Batch 50: Loss = 73308576.0
Val, Epoch: 2, Batch 0: Loss = 71377728.0
Train, Epoch: 1, Batch 0: Loss = 692839488.0
Train, Epoch: 1, Batch 50: Loss = 11545230.0
Val, Epoch: 1, Batch 0: Loss = 13582371.0
Train, Epoch: 2, Batch 0: Loss = 10948454.0
Train, Epoch: 2, Batch 50: Loss = 5006808.0
Val, Epoch: 2, Batch 0: Loss = 8002607.0
Train, Epoch: 1, Batch 0: Loss = 4073209088.0
Train, Epoch: 1, Batch 50: Loss = 115157120.0
Val, Epoch: 1, Batch 0: Loss = 115397352.0
Train, Epoch: 2, Batch 0: Loss = 169623360.0
Train, Epoch: 2, Batch 50: Loss = 82706240.0
Val, Epoch: 2, Batch 0: Loss = 101010544.0
Train, Epoch: 1, Batch 0: Loss = 3758083072.0
Train, Epoch: 1, Batch 50: Loss = 76517464.0
Val, Epoch: 1, Batch 0: Loss = 78973496.0
Train, Epoch: 2, Batch 0: Loss = 94342360.0
Train, Epoch: 2, Batch 50: Los

### Loss Curves Plot

In [None]:
for model_train_losses in all_train_losses:
    (name, train_losses) = model_train_losses
    
    fig = px.line(train_losses, title="Train MAPE Loss Curve: "+name)

    fig.update_layout(
            autosize=False,
            width=1200,
            height=750,
            font=dict(size=20),
            )

    fig.show()

In [None]:
for model_val_losses in all_val_losses:
    (name, val_losses) = model_val_losses
    
    fig = px.line(val_losses, title="Validation MAPE Loss Curve: "+name, color_discrete_sequence=['red'])

    fig.update_layout(
        autosize=False,
        width=1200,
        height=750,
        font=dict(size=20),
        )
    
    fig.show()

### Random val samples

In [None]:
for nn_model, val_data in zip(nn_model_list, dataset_list):    
    
    preds = []
    ground_truth = []
    vehicle_counts = []


    for i in range(10):
        vehicle_count = 0
        random_idx = np.random.randint(0,len(val_data))
        dataset_name, x, y = val_data[random_idx]

        assert dataset_name == nn_model.name

        vehicle_count = x[0]
        vehicle_counts.append(vehicle_count)
        ground_truth.append(float(y))
        pred_y = float(nn_model(x)[0])
        preds.append(pred_y)

    df = pd.DataFrame({'vehicle_count': vehicle_counts, 'ground_truth': ground_truth, 'predictions': preds})

    fig = px.scatter(df, x='vehicle_count', y=['ground_truth', 'predictions'], title=nn_model.name)

    fig.show()

## Test Set

In [None]:
test_dataset_list = []

for df in processed_aadt_df_test_list:

    custom_dataset = CustomDataset(df, normalised=NORMALISED)
    test_dataset_list.append(custom_dataset)

In [None]:
test_dataloader_list = []

for test_data in test_dataset_list:
    test_dataloader = torch.utils.data.DataLoader(test_data, batch_size=1, shuffle=False, sampler=None,
                    batch_sampler=None, num_workers=0, collate_fn=None,
                    pin_memory=False, drop_last=False, timeout=0,
                    worker_init_fn=None, prefetch_factor=None,
                    persistent_workers=False)
    
    test_dataloader_list.append(test_dataloader)

In [None]:
def run_epoch_test(loader, model_list):
    losses = [] # Keep list of accuracies to track progress

    correct_model = None

    # Looping over all batches
    with torch.no_grad():
        for batch_idx, batch in enumerate(loader):
            dl_name, x, y = batch

            # find right model for dataset
            for model in model_list:
                if model.name == dl_name[0]:
                    correct_model = model
                    break

            # Setting model to evaluation mode
            correct_model.eval()

            # Assert we are testing the correct model with the correct dataset!
            assert dl_name[0] == model.name

            # Feed batch to model
            logits = correct_model(x).squeeze(1)

            mape_loss = MAPE(logits, y)

            # Append current batch accuracy
            losses.append(mape_loss.detach().numpy())

    # Return accuracies to main loop                 
    return correct_model, losses


In [None]:
def main_test(test_dl_list, model_list):
    all_test_losses = []

    for test_dl in test_dl_list:

        model, test_loss = run_epoch_test(test_dl, model_list)
        
        all_test_losses.append((model.name, test_loss))

    return all_test_losses

In [None]:
all_test_losses = main_test(test_dataloader_list, nn_model_list)

In [None]:
for model_test_losses in all_test_losses:
    (name, test_losses) = model_test_losses
    
    fig = px.line(test_losses, title="Test MAPE Loss Curve: "+name)

    fig.update_layout(
            autosize=False,
            width=1200,
            height=750,
            font=dict(size=20),
            )

    fig.show()

### Visualise Random Test Samples

In [None]:
for test_data in test_dataset_list:    
    
    preds = []
    ground_truth = []
    vehicle_counts = []

    model = None

    # find right model for dataset
    for i in range(len(nn_model_list)):
        if nn_model_list[i].name == test_data.name:
            model = nn_model_list[i]
            break

    for i in range(10):
        vehicle_count = 0
        random_idx = np.random.randint(0,len(test_data))
        dataset_name, x, y = test_data[random_idx]

        assert dataset_name == model.name

        vehicle_count = x[0]
        vehicle_counts.append(vehicle_count)
        ground_truth.append(float(y))
        pred_y = float(model(x)[0])
        preds.append(pred_y)

    df = pd.DataFrame({'vehicle_count': vehicle_counts, 'ground_truth': ground_truth, 'predictions': preds})

    fig = px.scatter(df, x='vehicle_count', y=['ground_truth', 'predictions'], title=model.name)

    fig.show()

## Save Model

In [None]:
for nn_model in nn_model_list:

  print(nn_model.name)

  if 'Blackburn with Darwen' in nn_model.name:
    nn_model.name = nn_model.name.replace('Blackburn with Darwen', 'blackburn')

  torch.save(nn_model.state_dict(), NN_MODEL_PATH+"nn_model_{}.pth".format(nn_model.name.lower()))

Havering_M25_5790A
Blackburn with Darwen_30361033
Luton_M1_2557A
Havering_M25_5790B
Blackburn with Darwen_30361032
Luton_M1_2557B
Hounslow_M4_2188A
Hounslow_M4_2188B
Trafford_M60_9083A
Trafford_M60_9086B
