In [7]:
import os
import sys
sys.path.append('..')

import pandas as pd

import config as cfg

In [8]:
df_train = pd.read_csv(os.path.join(cfg.DATA_PATH, 'cleaned_train.csv'))
df_test = pd.read_csv(os.path.join(cfg.DATA_PATH, 'cleaned_test.csv'))

df_train

Unnamed: 0,Age_band_of_driver_18-30,Age_band_of_driver_31-50,Age_band_of_driver_Over 51,Age_band_of_driver_Under 18,Cause_of_accident_Changing lane,Cause_of_accident_Driving at high speed,Cause_of_accident_Driving carelessly,Cause_of_accident_Driving to the left,Cause_of_accident_Driving under the influence,Cause_of_accident_Getting off the vehicle improperly,...,Vehicle_movement_Turnover,Vehicle_movement_U-Turn,Vehicle_movement_Waiting to go,Weather_conditions_Cloudy,Weather_conditions_Fog or mist,Weather_conditions_Normal,Weather_conditions_Other,Weather_conditions_Raining and/or Windy,Weather_conditions_Snow,Accident_severity
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,2
1,0,1,0,0,0,1,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
2,1,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,1,0,0,0,1
4,1,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,1,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
211,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
212,0,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,2
213,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
214,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1


In [9]:
df_test

Unnamed: 0,Age_band_of_driver_18-30,Age_band_of_driver_31-50,Age_band_of_driver_Over 51,Age_band_of_driver_Under 18,Cause_of_accident_Changing lane,Cause_of_accident_Driving at high speed,Cause_of_accident_Driving carelessly,Cause_of_accident_Driving to the left,Cause_of_accident_Driving under the influence,Cause_of_accident_Getting off the vehicle improperly,...,Vehicle_movement_Turnover,Vehicle_movement_U-Turn,Vehicle_movement_Waiting to go,Weather_conditions_Cloudy,Weather_conditions_Fog or mist,Weather_conditions_Normal,Weather_conditions_Other,Weather_conditions_Raining and/or Windy,Weather_conditions_Snow,Accident_severity
0,0,0,1,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
3,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
4,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
88,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
89,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,2
90,0,0,1,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
91,0,0,0,1,0,0,1,0,0,0,...,0,0,0,0,0,1,0,0,0,2


# Base Model

In [55]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score

from tqdm import tqdm

In [88]:
class BaseModel(nn.Module):
    def __init__(self, input_dim, output_dim):
        super().__init__()
        
        self.input_1 = nn.Linear(input_dim, 40)
        self.input_2 = nn.Linear(40, 25)
        self.input_3 = nn.Linear(25, 10)
        self.output = nn.Linear(10, output_dim)

    def forward(self, x):
        x = F.relu(self.input_1(x))
        x = F.relu(self.input_2(x))
        x = F.relu(self.input_3(x))
        y = torch.sigmoid(self.output(x))
        y = F.softmax(y, dim=-1)

        return y

class AccidentSeverityDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y

        self.n_samples = len(x)

    def __getitem__(self, index):
        return self.x[index], self.y[index]

    def __len__(self):
        return self.n_samples

In [89]:
class AccidentSeverityModelTrainer():
    def __init__(self, model_type, input_dim, output_dim, batch_size=5):
        self.model_type = model_type
        self.input_dim = input_dim
        self.output_dim = output_dim
        
        self.device = 'cpu'

        X_training = df_train.drop(columns=['Accident_severity']).values
        y_training = AccidentSeverityModelTrainer._one_hot_encoding(df_train['Accident_severity'].values)

        X_training = torch.tensor(X_training).float().to(self.device)
        y_training = torch.tensor(y_training).float().to(self.device)

        train_dataset = AccidentSeverityDataset(X_training, y_training)

        X_validation = df_test.drop(columns=['Accident_severity']).values
        y_validation = AccidentSeverityModelTrainer._one_hot_encoding(df_test['Accident_severity'].values)

        self.y_test = y_validation

        X_validation = torch.tensor(X_validation).float().to(self.device)
        y_validation = torch.tensor(y_validation).float().to(self.device)

        self.x_test = X_validation

        self.train_loader = DataLoader(
            train_dataset,
            batch_size=batch_size,
            shuffle=False,
            drop_last=False
        )

    @staticmethod
    def _one_hot_encoding(nd_array):
        ohe = []
        classes = max(nd_array) + 1
        for n in nd_array:
            encoding = [0] * classes
            encoding[n] = 1
            ohe.append(encoding)
        return np.array(ohe)

    @staticmethod
    def _convert_prob_to_deterministic(nd_array):
        one_hot_encoding_predictions = nd_array

        for i in range(len(nd_array)):
            max_pred = max(nd_array[i])

            for j in range(len(nd_array[i])):
                one_hot_encoding_predictions[i][j] = 1 if nd_array[i][j] == max_pred else 0

        return one_hot_encoding_predictions


    def train_new_model(self, epochs=10):
        model = self.model_type(self.input_dim, self.output_dim)

        optimizer = optim.Adam(model.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss().to(self.device)

        for epoch in range(epochs):
            # Training Step
            losses = []
            accuracies = []
            roc_auc = []
            for batch_idx, (data, targets) in enumerate(self.train_loader):
                data = data.to(self.device)
                targets = targets.to(self.device)

                predictions = model.forward(data)
                loss = criterion(predictions, targets)

                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                prob_pred = model.forward(self.x_test).detach().cpu().numpy()
                det_pred = AccidentSeverityModelTrainer._convert_prob_to_deterministic(
                    prob_pred
                )

                losses.append(loss.item())
                accuracies.append(accuracy_score(self.y_test, det_pred))
                roc_auc.append(roc_auc_score(self.y_test, prob_pred))

                avg_loss = sum(losses) / len(losses)
                avg_acc = sum(accuracies) / len(accuracies)
                avg_ra = sum(roc_auc) / len(roc_auc)

                print(f'Epoch: {epoch}\t-\tLoss: {avg_loss:.4f}\t-\tAccuracy: {avg_acc:.4f}\t-\tROC-AUC: {avg_ra:.4f}', end='\r')
            

        return model



In [90]:
mt = AccidentSeverityModelTrainer(BaseModel, 88, 3)
model = mt.train_new_model(epochs=100)

Epoch: 99	-	Loss: 0.9383	-	Accuracy: 0.3935	-	ROC-AUC: 0.5497

# Tunable Model

In [None]:
class TuningConfiguration():
    def __init__(
        self, input_dim, output_dim,
        decay_rate=1,
        num_hidden_layers=0, 
    ):
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.decay_rate = decay_rate
        self.num_hidden_layers = num_hidden_layers


class TunableModelFactory():
    def __init__(self, cfg: TuningConfiguration):
        self.cfg = cfg

    def



class TunableModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = []

    def add_layer(self, name, layer, activation):
        self.layers.append({
            'name': name,
            'layer': layer,
            'activation': activation
        })

    def forward(self, x):
        for layer in self.layers:
            x = layer['activation'](layer['layer'])
        return x

