# Modeling <a id='Modeling'></a>

### 1 Table of Contents<a id='Contents'></a>
* [Modeling](#Modeling)
  * [1 Contents](#Contents)
  * [2 Introduction](#2_Introduction)
  * [3 Imports](#3_Imports)
  * [4 Datasets and Dataloaders](#4_Datasets_and_Dataloaders)
  * [5 SpatioTemporal Model](#5_SpatioTemporal_Model)
  * [6 Spatial Model with Weather Features](#6_Spatial_Model_with_Weather_Features)
  * [8 Best Model](#8_Best_Model)
  * [Conclusion](#Conclusion)

### 2 Introduction <a id='2_Introduction'></a>

In the last notebook, we created the tensor, split the data, and stored it in an h5py file. In this notebook, we'll be building our models.

### 3 Imports <a id='3_Imports'>

In [1]:
import warnings
warnings.simplefilter('ignore')
import numpy as np
import torch as t
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric_temporal.nn.recurrent import GConvGRU
from torch_geometric_temporal.signal import StaticGraphTemporalSignal
from torch_geometric_temporal.nn.attention import stgcn
from torch_geometric.data import Dataset, Data
from torch_geometric.loader import DataLoader
from torch_geometric.utils import grid
from sklearn.metrics import classification_report
import torch.nn as nn
import torch.optim as optim
import gc
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import requests
import h5py
import googlemaps
import os.path as osp
import math
import csv

### 4 Datasets and Dataloaders <a id='4_Datasets_and_Dataloaders'>

In [2]:
class H5GeometricTemporal(object):
    def __init__(self, path, set_name):
        self.set_name = set_name
        self.path = path
        super().__init__()
        
    def _get_edges(self):
        edge_index, pos = grid(height = 50, width = 30)
        self.edges = edge_index
    
    def _generate_task(self):
        self.data = h5py.File(self.path, 'r')
        self.set = self.data[self.set_name]
        self.features = []
        self.targets = []
        for idx in range(self.set.shape[0]):
            x = self.set[idx,:-1]
            x = np.array(x)
            x = np.reshape(x, (x.shape[0], x.shape[1]*x.shape[2]))
            x = np.transpose(x)
            y = self.set[idx,-1]
            y = np.array(y)
            y = np.reshape(y, y.shape[0]*y.shape[1])
            y = np.expand_dims(y, axis = 1)
            self.features.append(x)
            self.targets.append(y)
            
    def get_dataset(self):
        self._get_edges()
        self._generate_task()
        dataset = StaticGraphTemporalSignal(self.edges, None, self.features, self.targets)
        return dataset

In [None]:
train_temporal_loader = H5GeometricTemporal('../Data/LA_windowed_split.hdf5', 'train_windowed')
train_temporal = train_temporal_loader.get_dataset()
test_temporal_loader = H5GeometricTemporal('../Data/LA_windowed_split.hdf5', 'test_windowed')
test_temporal = test_temporal_loader.get_dataset()

In [None]:
gc.collect()

In [None]:
class H5Geometric(Dataset):
    def __init__(self, root, set_name, transform = None, pre_transform = None):
        self.set_name = set_name
        self.processed_file_names = []
        if (self.set_name == 'train'):
            for idx in range(78888):
                self.processed_file_names.append(f'data_train_{idx}.pt')
        else:
            for idx in range(26296):
                self.processed_file_names.append(f'data_test_{idx}.pt')
        super().__init__(root, transform, pre_transform)
        
    def raw_file_names(self):
        return 'LA_data_split_random.hdf5'

    def processed_file_names(self):
        return self.processed_file_names

    def process(self):
        self.data = h5py.File(self.raw_paths[0], 'r')
        self.set = self.data[self.set_name]
        edge_index, pos = grid(height = 50, width = 30, device = 'cuda:0')
        for idx in range(self.set.shape[0]):
            x = self.set[idx,:,:,1:]
            x = t.tensor(x).reshape(x.shape[0]*x.shape[1], x.shape[2])
            y = self.set[idx,:,:,0]
            y = t.tensor(y).reshape(y.shape[0]*y.shape[1])
            y = y[:,None]
            
            data = Data(x = x,
                        edge_index = edge_index,
                        y = y
                       )
            t.save(data, osp.join(self.processed_dir, f'data_{self.set_name}_{idx}.pt'))
        
    def len(self):
        return len(self.processed_file_names)
    
    def get(self, idx):
        data = t.load(osp.join(self.processed_dir, f'data_{self.set_name}_{idx}.pt'))
        return data

In [None]:
train_random = H5Geometric('../Data/', 'train')
train_random_loader = DataLoader(train_random, shuffle = True, batch_size = 8)
test_random = H5Geometric('../Data/', 'test')
test_random_loader = DataLoader(test_random, shuffle = False, batch_size = 8)

In [None]:
gc.collect()

In [None]:
def ACC(pred, actual):
    actual_positives = 0
    correct_positives = 0
    for idx in range(pred.shape[0]):
        if (actual[idx] == 1):
            actual_positives += 1
            if (pred[idx] == 1):
                correct_positives += 1
    return actual_positives, correct_positives

### 5 SpatioTemporal Model <a id='5_SpatioTemporal_Model'>

In [None]:
class RecurrentGCN(nn.Module):
    def __init__(self, node_features, num_classes):
        super(RecurrentGCN, self).__init__()
        self.recurrent_1 = GConvGRU(node_features, 32, 5)
        self.recurrent_2 = GConvGRU(32, 16, 5)
        self.linear = nn.Linear(16, 1)

    def forward(self, x, edge_index, edge_weight):
        x = self.recurrent_1(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.recurrent_2(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.linear(x)
        x = t.sigmoid(x)
        return F.log_softmax(x, dim=1)

In [None]:
model = RecurrentGCN(node_features = 5, num_classes = 2)
model.cuda()
optimizer = optim.Adam(model.parameters(), lr = 0.001)
loss_fn = nn.BCELoss()

In [None]:
n_epochs = 10
train_BCE_totals = []
train_ACC_totals = []
test_BCE_totals = []
test_ACC_totals = []
for epoch in range(n_epochs):
    # Training
    model.train()
    for snapshot in train_temporal:
        x = snapshot.x.cuda()
        edge_index = snapshot.edge_index.cuda()
        edge_weight = snapshot.edge_weight
        y = snapshot.y.cuda()
        y_pred = model(x, edge_index, edge_weight)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    gc.collect()
    # Validation
    model.eval()
    with t.no_grad():
        train_BCEs = []
        train_total_positives = 0
        train_total_correct_positives = 0
        test_BCEs = []
        test_total_positives = 0
        test_total_correct_positives = 0
        for snapshot in train_temporal:
            x = snapshot.x.cuda() 
            edge_index = snapshot.edge_index.cuda()
            edge_weight = snapshot.edge_weight
            y = snapshot.y.cuda()
            y_pred = model(x, edge_index, edge_weight)
            train_BCE = loss_fn(y_pred, y)
            train_BCEs.append(train_BCE.cpu())
            actual_positives, correct_positives = ACC(y_pred, y)
            train_total_positives += actual_positives
            train_total_correct_positives += correct_positives
        for snapshot in test_temporal:
            x = snapshot.x.cuda()
            edge_index = snapshot.edge_index.cuda()
            edge_weight = snapshot.edge_weight
            y = snapshot.y.cuda()
            y_pred = model(x, edge_index, edge_weight)
            test_BCE = loss_fn(y_pred, y)
            test_BCEs.append(test_BCE.cpu())
            actual_positives, correct_positives = ACC(y_pred, y)
            test_total_positives += actual_positives
            test_total_correct_positives += correct_positives
        train_BCE_avg = np.array(train_BCEs).mean()
        train_ACC_avg = (train_total_correct_positives / train_total_positives) * 100
        test_BCE_avg = np.array(test_BCEs).mean()
        test_ACC_avg = (test_total_correct_positives / test_total_positives) * 100
    train_BCE_totals.append(train_BCE_avg)
    train_ACC_totals.append(train_ACC_avg)
    test_BCE_totals.append(test_BCE_avg)
    test_ACC_totals.append(test_ACC_avg)
    print("Epoch %d: train BCE %.4f, test BCE %.4f" % (epoch, train_BCE_avg, test_BCE_avg))
    print("Epoch %d: train ACC %.4f, test ACC %.4f" % (epoch, train_ACC_avg, test_ACC_avg))
    gc.collect()

In [None]:
fig, ax = plt.subplots(1, 2, figsize = (14, 12));
ax[0].plot(train_BCE_totals, train_ACC_totals)
ax[0].set_xlabel('BCE')
ax[0].set_ylabel('Accuracy')
ax[0].title.set_text('Train')
ax[1].plot(test_BCE_totals, test_ACC_totals)
ax[1].set_xlabel('BCE')
ax[1].set_ylabel('Accuracy')
ax[1].title.set_text('Test')

fig.suptitle('Accuracy vs Binary Cross Entropy Loss: Temporal GNN')

plt.show()

In [None]:
gc.collect()

### 6 Spatial Model with Weather Features <a id='6_Spatial_Model_with_Weather_Features'>

In [None]:
class GCN(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = GCNConv(11, 16)
        self.conv2 = GCNConv(16, 1)

    def forward(self, x, edge_index, train):
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=train)
        x = self.conv2(x, edge_index)
        x = t.sigmoid(x)
        return F.log_softmax(x, dim=1)

In [None]:
model2 = GCN()
model2.cuda()
optimizer = optim.Adam(model2.parameters(), lr = 0.001)
loss_fn = nn.BCELoss()

In [None]:
n_epochs = 10
train_BCE_totals = []
train_ACC_totals = []
test_BCE_totals = []
test_ACC_totals = []
for epoch in range(n_epochs):
    #Training
    model2.train()
    for data in train_random_loader:
        x = data.x.type(t.cuda.FloatTensor)
        edge_index = data.edge_index.type(t.cuda.LongTensor)
        y = data.y.type(t.cuda.FloatTensor)
        y_pred = model2(x, edge_index, True)
        loss = loss_fn(y_pred, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation
    model2.eval()
    with t.no_grad():
        train_BCEs = []
        train_total_positives = 0
        train_total_correct_positives = 0
        test_BCEs = []
        test_total_positives = 0
        test_total_correct_positives = 0
        for data in train_random_loader:
            x = data.x.type(t.cuda.FloatTensor)
            edge_index = data.edge_index.type(t.cuda.LongTensor)
            y = data.y.type(t.cuda.FloatTensor)
            y_pred = model2(x, edge_index, False)
            train_BCE = loss_fn(y_pred, y)
            train_BCEs.append(train_BCE.cpu())
            actual_positives, correct_positives = ACC(y_pred, y)
            train_total_positives += actual_positives
            train_total_correct_positives += correct_positives
        for data in test_random_loader:
            x = data.x.type(t.cuda.FloatTensor)
            edge_index = data.edge_index.type(t.cuda.LongTensor)
            y = data.y.type(t.cuda.FloatTensor)
            y_pred = model2(x, edge_index, False)
            test_BCE = loss_fn(y_pred, y)
            test_BCEs.append(test_BCE.cpu())
            actual_positives, correct_positives = ACC(y_pred, y)
            test_total_positives += actual_positives
            test_total_correct_positives += correct_positives
        train_BCE_avg = np.array(train_BCEs).mean()
        train_ACC_avg = (train_total_correct_positives / train_total_positives) * 100
        test_BCE_avg = np.array(test_BCEs).mean()
        test_ACC_avg = (test_total_correct_positives / test_total_positives) * 100
    train_BCE_totals.append(train_BCE_avg)
    train_ACC_totals.append(train_ACC_avg)
    test_BCE_totals.append(test_BCE_avg)
    test_ACC_totals.append(test_ACC_avg)
    print("Epoch %d: train BCE %.4f, test BCE %.4f" % (epoch, train_BCE_avg, test_BCE_avg))
    print("Epoch %d: train ACC %.4f, test ACC %.4f" % (epoch, train_ACC_avg, test_ACC_avg))
    gc.collect()

In [None]:
fig, ax = plt.subplots(1, 2, figsize = (14, 12));
ax[0].plot(train_BCE_totals, train_ACC_totals)
ax[0].set_xlabel('BCE')
ax[0].set_ylabel('Accuracy')
ax[0].title.set_text('Train')
ax[1].plot(test_BCE_totals, test_ACC_totals)
ax[1].set_xlabel('BCE')
ax[1].set_ylabel('Accuracy')
ax[1].title.set_text('Test')

fig.suptitle('Accuracy vs Binary Cross Entropy Loss: GNN with Weather Features')

plt.show()

### 8 Best Model <a id='8_Best_Model'>

### Conclusion <a id='Conclusion'>

The final model...