# Modeling <a id='Modeling'></a>

### 1 Table of Contents<a id='Contents'></a>
* [Modeling](#Modeling)
  * [1 Contents](#Contents)
  * [2 Introduction](#2_Introduction)
  * [3 Imports](#3_Imports)
  * [4 Datasets and Dataloaders](#4_Datasets_and_Dataloaders)
  * [5 Model1](#5_Model1)
  * [6 Model2](#6_Model2)
  * [7 Model3](#7_Model3)
  * [8 Best Model](#8_Best_Model)
  * [Conclusion](#Conclusion)

### 2 Introduction <a id='2_Introduction'></a>

In the last notebook, we created the tensor, split the data, and stored it in an h5py file. In this notebook, we'll be building our models.

### 3 Imports <a id='3_Imports'>

In [2]:
import warnings
warnings.simplefilter('ignore')
import numpy as np
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import GConvGRU
from torch_geometric_temporal.nn.temporal import STConv
from torch_geometric_temporal.signal import StaticGraphTemporalSignal
import torch.utils.data as dt
import torch.nn as nn
import torch.optim as optim
import gc
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import requests
import h5py
import googlemaps
import os
import math
import csv

ModuleNotFoundError: No module named 'torch_geometric_temporal'

### 4 Datasets and Dataloaders <a id='4_Datasets_and_Dataloaders'>

In [None]:
class H5dataset(dt.Dataset):
    def __init__(self, h5_path, set_name):
        self.h5 = h5py.File(h5_path, 'r')
        self.set = self.h5[set_name]
        self.chunk_dict = {}
        for ch, sl in enumerate(self.set.iter_chunks()):
            self.chunk_dict[ch] = sl
        
    def __len__(self):
        return self.set.shape[0]
    
    def __getitem__(self, idx):
        chunk = math.floor(idx / 32)
        chunk_idx = idx%32
        sample = self.set[self.chunk_dict[chunk]][chunk_idx]
        return t.tensor(sample[:,:,:], device = 'cuda:0'), #t.tensor(sample[:,:,:,0], device = 'cuda:0')
    
    def CLOSE(self):
        self.h5.close()

In [None]:
train = H5dataset('../Data/LA_windowed_split_collisions_only.hdf5', 'train_windowed')
test = H5dataset('../Data/LA_windowed_split_collisions_only.hdf5', 'test_windowed')

### 5 Model1 <a id='5_Model1'>

In [None]:
class STGCN(nn.Module):

    def __init__(self, node_features, num_classes):
        super(RecurrentGCN, self).__init__()
        self.temporal_1 = STConv(node_features, 32, 5)
        self.temporal_2 = STConv(32, 16, 5)
        self.linear = torch.nn.Linear(16, num_classes)

    def forward(self, x, edge_index, edge_weight):
        x = self.temporal_1(x, edge_index, edge_weight)
        x = F.relu(x)
        x = self.temporal_2(x, edge_index, edge_weight)
        x = F.relu(x)
        x = self.linear(x)
        return F.log_softmax(x, dim=1)

In [None]:
model = RecurrentGCN(node_features = 1500, num_classes = 2)

In [None]:
class LA_Traffic_Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size = 2, hidden_size = 11, num_layers = 1, batch_first=True)
        self.linear  = nn.Linear(11, 2)
    def forward(self, x):
        #needs work. make x work with lstm and linear. Turn the 50x30 into a single dimension?
        #needs to be able to pack and unpack nicely.
        #x is a ... shape tensor. 
        x, _ = self.lstm(x)
        x = self.linear(x)
        return x

In [None]:
model = LA_Traffic_Model()
optimizer = optim.SGD(model.parameters(), lr = 0.001, momentum = 0.9)
loss_fn = nn.CrossEntropyLoss()
train_loader = dt.DataLoader(train, shuffle = True, batch_size = 8)
classes = (0, 1)

In [None]:
X_batch1, y_batch1 = next(iter(train_loader))

In [None]:
X_batch1.shape

In [None]:
y_batch1.shape

In [None]:
X_batch1.device

In [None]:
X_batch1 = t.reshape(X_batch1, (8, 16500, 2))

In [None]:
X_batch1.shape

In [None]:
y_batch1 = t.reshape(y_batch1, (8, 1500, 2))

In [None]:
y_batch1.shape

In [None]:
model.cuda()
y_pred = model(X_batch1.float())

In [None]:
# Training
model.train()
for X_batch, y_batch in train_loader:
    y_pred = model(X_batch)
    loss = loss_fn(y_pred, y_batch)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

In [None]:
# Validation
model.eval()
with torch.no_grad():
    y_pred = model(X_train)
    train_CEL = loss_fn(y_pred, y_train)
    y_pred = model(test[:,:,:,:,1:])
    test_CEL = loss_fn(y_pred, test[:,:,:,:,0])
print("Epoch %d: train CEL %.4f, test CEL %.4f" % (epoch, train_CEL, test_CEL))

In [None]:
gc.collect()

In [None]:
n_epochs = 2000
for epoch in range(n_epochs):
    model.train()
    for X_batch, y_batch in train_loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    # Validation
    if epoch % 100 != 0:
        continue
    model.eval()
    with torch.no_grad():
        y_pred = model(X_train)
        train_CEL = loss_fn(y_pred, y_train)
        y_pred = model(test[:,:,:,:,1:])
        test_CEL = loss_fn(y_pred, test[:,:,:,:,0])
    print("Epoch %d: train CEL %.4f, test CEL %.4f" % (epoch, train_CEL, test_CEL))
    gc.collect()

### 6 Model2 <a id='6_Model2'>

In [None]:
class Model2(nn.Module):

### 7 Model3 <a id='7_Model3'>

In [None]:
class Model3(nn.Module):

### 8 Best Model <a id='8_Best_Model'>

In [None]:
class Model1(nn.Module):

### Conclusion <a id='Conclusion'>

The final model...