In [None]:
import pickle
import numpy as np
import torch_geometric as tg
from scipy.sparse import csr_matrix 
from sklearn.model_selection import train_test_split
import torch
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, ChebConv, GATConv
from torch.nn import functional as F
from torch_geometric.nn import GCNConv, GAE, VGAE, GATConv, AGNNConv
from torch_geometric.utils import dropout_adj
import torch_geometric.transforms as T
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from collections import Counter
import torch.nn as nn
from torch_geometric_temporal.nn.recurrent import DCRNN, GConvGRU, GConvLSTM
from torch_geometric_temporal.data.splitter import discrete_train_test_split
from tqdm import tqdm
import matplotlib.pyplot as plt


In [None]:
data_path = 'graph_data.pkl'
data_in = pickle.load(open(data_path, 'rb'))

In [None]:
npzfile = np.load('trade_savez_files.npz', allow_pickle = True)

In [None]:
trade_edge_attr.shape

In [None]:
device = torch.device('cpu' if torch.cuda.is_available() else 'cpu')
print('Device: '.ljust(32), device)
#print('Model Name: '.ljust(32), str(model_name.__name__))
#print('Model params:{:19} lr: {:.4f}     weight_decay: {:.4f}'.format('',lr, weight_decay))    
#print('Total number of epochs to run: '.ljust(32), epochs)
print('*' * 65)
infeat = 1
outfeat = 1

In [None]:
def make_graph_torch_data(graph_rawdata):
    list_from     = np.array([x[0] for x in graph_rawdata['edges']])
    list_to       = np.array([x[1] for x in graph_rawdata['edges']])
    list_features = np.array([x[2] for x in graph_rawdata['edges']])

    edge_index = torch.tensor([list_from, list_to], dtype = torch.long)
    edge_attr = torch.tensor(list_features, dtype = torch.float32)

    x = np.array([x['NetWeight'] for x in graph_rawdata['nodes'].values()])[:, np.newaxis]
    y = np.array([x['L1_soybean'] for x in graph_rawdata['nodes'].values()])[:, np.newaxis]
    node_x = torch.tensor(x, dtype = torch.float32)
    node_y = torch.tensor(y, dtype = torch.float32)
    torch_data = Data(x = node_x, y = node_y, edge_index = edge_index, edge_attr = edge_attr)
    graph_rawdata['torch'] = torch_data.clone().to(device)
    return graph_rawdata

In [None]:
temporal_data = [make_graph_torch_data(v) for k, v in data_in.items()]

In [None]:
temporal_data[2]['torch'].edge_index.device

In [None]:
train_dataset, test_dataset = temporal_data[:-6], temporal_data[-6:]

In [None]:
# n = len(y)
# test_size = int(n * 0.2)
# train_idx, test_idx = train_test_split(range(n), test_size=test_size, random_state=1651516)
# torch_data.train_idx = torch.tensor(train_idx, dtype = torch.long)
# torch_data.test_idx = torch.tensor(test_idx, dtype = torch.long)

In [None]:
def RMSELoss(yhat,y):
    return torch.sqrt(torch.mean((yhat-y)**2))


In [None]:
class cheb_net(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(cheb_net, self).__init__()
        
        self.conv1 = ChebConv(in_channels, 1, K = 5)#, cached=True)
        self.linear_out = nn.Linear(1, out_channels)
        # self.conv1 = ChebConv(data.num_features, 16, K=2)
        # self.conv2 = ChebConv(16, data.num_features, K=2)

    def forward(self, x, edge_index, edge_weight):
        x = F.relu(self.conv1(x, edge_index, edge_weight))
        x = F.dropout(x, p = 0.2, training=self.training)
        x = self.linear_out(x)
        return x
model = cheb_net(infeat, outfeat).to(device)

In [None]:
class GCNet(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNet, self).__init__()
        
        self.conv1 = GCNConv(in_channels, 20, add_self_loops = False)#, cached=True)
        self.conv2 = GCNConv(20, 5, add_self_loops = False) #data.num_classes)#, cached=True)
        self.conv3 = GCNConv(5, 3, add_self_loops = False)#data.num_classes)#, cached=True)
        self.linear_out = nn.Linear(3, out_channels)
        # self.conv1 = ChebConv(data.num_features, 16, K=2)
        # self.conv2 = ChebConv(16, data.num_features, K=2)

    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr
        x = F.relu(self.conv1(x, edge_index, edge_weight))
        x = F.dropout(x, p = 0.2, training=self.training)
        x = F.relu(self.conv2(x, edge_index, edge_weight))
        x = F.dropout(x, p = 0.2, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        x = self.linear_out(x)
        return x
model = GCNet(infeat, outfeat).to(device)

In [None]:
class RecurrentGCN(torch.nn.Module):

    def __init__(self, node_features, dropout):
        self.dropout = dropout
        super(RecurrentGCN, self).__init__()
        self.recurrent_1 = GConvGRU(node_features, 64, 10)
        self.recurrent_2 = GConvGRU(64, 32, 5)
        self.recurrent_3 = GConvGRU(32, 16, 5)
        self.linear = torch.nn.Linear(16, 1)

    def forward(self, x, edge_index, edge_weight):
        x = self.recurrent_1(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=0.3, training=self.training)
        x = self.recurrent_2(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=0.3, training=self.training)
        x = self.recurrent_3(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=0.3, training=self.training)
        x = self.linear(x)
        return x

In [None]:
class RecurrentGCN_Small(torch.nn.Module):

    def __init__(self, node_features, K, dropout):
        self.dropout = dropout
        super(RecurrentGCN_Small, self).__init__()
        self.recurrent_1 = GConvGRU(node_features, 16,K)
        self.linear = torch.nn.Linear(16, 1)

    def forward(self, x, edge_index, edge_weight):
        x = self.recurrent_1(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.linear(x)
        return x

In [None]:
class RecurrentGCN_Large(torch.nn.Module):

    def __init__(self, node_features, K, dropout):
        self.dropout = dropout
        super(RecurrentGCN_Large, self).__init__()
        self.recurrent_1 = GConvGRU(node_features, 16,K)
        self.recurrent_2 = GConvGRU(16, 32, K)
        self.recurrent_3 = GConvGRU(32, 64, K)
        self.recurrent_4 = GConvGRU(64, 32, K)
        self.recurrent_5 = GConvGRU(32, 16, K)
        self.linear = torch.nn.Linear(16, 1)

    def forward(self, x, edge_index, edge_weight):
        x = self.recurrent_1(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.recurrent_2(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.recurrent_3(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.recurrent_4(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.recurrent_5(x, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.linear(x)
        return x

In [None]:
class LSTM_Mod_Small(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, dropout):
        self.dropout = dropout
        super(LSTM_Mod_Small, self).__init__()
        self.recurrent_1 =  GConvLSTM(in_channels, 16, K)
        self.linear = torch.nn.Linear(16, out_channels)

    def forward(self, x_in, edge_index, edge_weight):
        h, c = self.recurrent_1(x_in, edge_index, edge_weight)
        h = F.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        x = self.linear(x)
        return x

In [None]:
class LSTM_Mod_Large(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K):
        self.dropout = dropout
        super(LSTM_Mod_Large, self).__init__()
        self.recurrent_1 =  GConvLSTM(in_channels, 16, K)

        self.linear = torch.nn.Linear(16, out_channels)

    def forward(self, x_in, edge_index, edge_weight):
        h, c = self.recurrent_1(x_in, edge_index, edge_weight)
        h = F.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        h, c = self.recurrent_2(h, edge_index, edge_weight)
        h = F.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        h, c = self.recurrent_3(h, edge_index, edge_weight)
        h = F.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        h, c = self.recurrent_4(h, edge_index, edge_weight)
        h = F.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        x, c = self.recurrent_5(h, edge_index, edge_weight)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.linear(x)
        return x

In [None]:
class DCRNN_Mod(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K):
        super(DCRNN_Mod, self).__init__()
        self.recurrent_1 =  DCRNN(in_channels, 16, K)
        self.linear = torch.nn.Linear(16, out_channels)
        self.dropout = dropout
    def forward(self, x_in, edge_index, edge_weight):
        h = self.recurrent_1(x_in, edge_index, edge_weight)
        h = F.relu(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        y = self.linear(h)
        return y

In [None]:
class GCNet(nn.Module):
    def __init__(self, in_channels, out_channels, dropout):
        super(GCNet, self).__init__()
        self.dropout = dropout
        self.conv1 = GCNConv(in_channels, 20, add_self_loops = False)#, cached=True)
        self.conv2 = GCNConv(20, 5, add_self_loops = False) #data.num_classes)#, cached=True)
        self.conv3 = GCNConv(5, 3, add_self_loops = False)#data.num_classes)#, cached=True)
        self.linear_out = nn.Linear(3, out_channels)
        # self.conv1 = ChebConv(data.num_features, 16, K=2)
        # self.conv2 = ChebConv(16, data.num_features, K=2)

    def forward(self, x, edge_index, edge_weight):
        x = F.relu(self.conv1(x, edge_index, edge_weight))
        x = F.dropout(x, p = self.dropout, training=self.training)
        x = F.relu(self.conv2(x, edge_index, edge_weight))
        x = F.dropout(x, p = self.dropout, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        x = self.linear_out(x)
        return x

In [None]:
K = [5,3,2]
dropout = [0.1,0.3,0.5]
import itertools
model_options = []
for element in itertools.product(dropout, K):
    model_options.append(element)

In [None]:
model_options

In [None]:
lr = [10,1,0.5,0.1]
weight_decay = [0.5,0.1]
epochs = [100]
models = [LSTM_Mod_Small(in_channels = 1, out_channels = 1, K = 5).to(device),
          LSTM_Mod_Small(in_channels = 1, out_channels = 1, K = 3).to(device),
          LSTM_Mod_Small(in_channels = 1, out_channels = 1, K = 2).to(device),
          LSTM_Mod_Large(in_channels = 1, out_channels = 1, K = 5).to(device),
          LSTM_Mod_Large(in_channels = 1, out_channels = 1, K = 3).to(device),
          LSTM_Mod_Large(in_channels = 1, out_channels = 1, K = 2).to(device),
          RecurrentGCN_Small(node_features = 1, K = 5).to(device),
          RecurrentGCN_Small(node_features = 1, K = 3).to(device),
          RecurrentGCN_Small(node_features = 1, K = 2).to(device),
          RecurrentGCN_Large(node_features = 1, K = 5).to(device),
          RecurrentGCN_Small(node_features = 1, K = 3).to(device),
          RecurrentGCN_Small(node_features = 1, K = 2).to(device),
          GCNet(in_channels = 1, out_channels = 1, K = 5).to(device),
          GCNet(in_channels = 1, out_channels = 1, K = 3).to(device),
          GCNet(in_channels = 1, out_channels = 1, K = 2).to(device)
         ]
split_location = [-6, -1, -24]

In [None]:
import itertools
model_options = []
for element in itertools.product(models, lr, weight_decay, epochs, split_location):
    model_options.append(element)

In [None]:
import random
random.shuffle(train_dataset)
random.shuffle(test_dataset)

In [None]:
train_dataset.reverse()
test_dataset.reverse()

In [None]:
def model_execution(settings):
    model, lr, weight_decay, epochs, split_location = settings
    print( lr, weight_decay, epochs, split_location)
    train_dataset, test_dataset = temporal_data[:split_location], temporal_data[split_location:]
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay = weight_decay)

    train_performance = []
    test_performance = []

    for epoch in tqdm(range(epochs)):
        model.train()
        cost = 0
        for time, snapshot in enumerate(train_dataset):
            y_hat = model(snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr)
            cost = cost + torch.mean((y_hat-snapshot['torch'].y)**2)
        cost = cost / (time+1)
        cost.backward()
        optimizer.step()
        optimizer.zero_grad()
        train_performance.append(float(cost))

        model.eval()
        test_cost = 0
        for time, snapshot in enumerate(test_dataset):
            y_hat = model(snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr)
            test_cost = test_cost + torch.mean((y_hat-snapshot['torch'].y)**2)
        test_cost = test_cost / (time+1)
        test_performance.append(float(cost))

        if (epoch % int(epochs/5) == 0): 
            print('Epoch: {}           Train loss: {:.4f}   Test RMSE: {:.4f}'.format(epoch, cost, test_cost))
        if (epoch == epochs - 1):
            print('-'*65,'\nFinal epoch: {}     Train loss: {:.4f}   Test RMSE: {:.4f}'.format(epoch, cost, test_cost))
    print('-'*65)
    
    return (train_performance, test_performance, model)

In [None]:
model_results = {}
for i, element in enumerate(model_options):
    results = model_execution(element)
    model_results[i] = {
        'model' : element[0],
        'lr' : element[1],
        'weight_decay' : element[2],
        'epochs' : element[3],
        'split_location' : element[4],
        'train_loss_trace' : results[0],
        'test_loss_trace' : results[1],
        'trained_model' : results[2]
    }

In [None]:
import pickle
pickle.dump(model_results,open('model_results.pkl','wb'))

In [None]:
class LSTM_Mod_Small(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, dropout):
        self.dropout = dropout
        super(LSTM_Mod_Small, self).__init__()
        self.recurrent_1a =  GConvLSTM(in_channels, 24, K)
        self.recurrent_1b =  GConvLSTM(in_channels, 24, K)
        self.linear1 = torch.nn.Linear(48,12)
        self.linear2 = torch.nn.Linear(12, out_channels)

    def forward(self, snapshot_list):
        for i, snapshot in enumerate(snapshot_list):
            x_in, edge_index, edge_weight = snapshot
            if i == 0:
                ha, ca = self.recurrent_1a(x_in, edge_index, edge_weight, None, None)
                hb, cb = self.recurrent_1b(x_in, edge_index, edge_weight, None, None)
            else:
                ha, ca = self.recurrent_1a(x_in, edge_index, edge_weight, ha, ca)
                hb, cb = self.recurrent_1b(x_in, edge_index, edge_weight, hb, cb)
                
            xa = F.relu(ha)
            xa = F.dropout(xa, p=self.dropout, training=self.training)
            xb = F.relu(hb)
            xb = F.dropout(xb, p=self.dropout, training=self.training)
            x = self.linear1(torch.cat((xa,xb),1))
            #x = self.linear1(torch.cat((x,x_in),1))
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
            x = self.linear2(x)
            if i == 0 :
                x_out = x
            else:
                x_out = torch.cat((x_out, x),1)
        return x_out

In [None]:
class LSTM_Mod_Medium(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, dropout):
        self.dropout = dropout
        super(LSTM_Mod_Small, self).__init__()
        self.recurrent_1a =  GConvLSTM(in_channels, 24, K)
        self.recurrent_1b =  GConvLSTM(in_channels, 24, K)
        self.linear1 = torch.nn.Linear(48,12)
        self.linear2 = torch.nn.Linear(12, out_channels)

    def forward(self, snapshot_list):
        for i, snapshot in enumerate(snapshot_list):
            x_in, edge_index, edge_weight = snapshot
            if i == 0:
                ha, ca = self.recurrent_1a(x_in, edge_index, edge_weight, None, None)
                hb, cb = self.recurrent_1b(x_in, edge_index, edge_weight, None, None)
            else:
                ha, ca = self.recurrent_1a(x_in, edge_index, edge_weight, ha, ca)
                hb, cb = self.recurrent_1b(x_in, edge_index, edge_weight, hb, cb)
                
            xa = F.relu(ha)
            xa = F.dropout(xa, p=self.dropout, training=self.training)
            xb = F.relu(hb)
            xb = F.dropout(xb, p=self.dropout, training=self.training)
            x = self.linear1(torch.cat((xa,xb),1))
            #x = self.linear1(torch.cat((x,x_in),1))
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
            x = self.linear2(x)
            if i == 0 :
                x_out = x
            else:
                x_out = torch.cat((x_out, x),1)
        return x_out

In [None]:
class LSTM_Mod_Small(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, dropout):
        self.dropout = dropout
        super(LSTM_Mod_Small, self).__init__()
        self.recurrent_1a =  GConvLSTM(in_channels, 24, K)
        self.recurrent_1b =  GConvLSTM(in_channels, 24, K)
        self.linear1 = torch.nn.Linear(48,12)
        self.linear2 = torch.nn.Linear(12, out_channels)

    def forward(self, snapshot_list):
        for i, snapshot in enumerate(snapshot_list):
            x_in, edge_index, edge_weight = snapshot
            if i == 0:
                h, c = self.recurrent_1a(x_in, edge_index, edge_weight, None, None)

            else:
                h, c = self.recurrent_1a(x_in, edge_index, edge_weight, h, c)
                
            x = F.relu(h)
            x = F.dropout(x, p=self.dropout, training=self.training)
            #x = self.linear1(torch.cat((x,x_in),1))
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)
            x = self.linear2(x)
            if i == 0 :
                x_out = x
            else:
                x_out = torch.cat((x_out, x),1)
        return x_out

In [None]:
class LSTM_Mod_Small(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, dropout):
        self.dropout = dropout
        super(LSTM_Mod_Small, self).__init__()
        self.recurrent_1 =  GConvLSTM(in_channels, 128, K)
        self.linear = torch.nn.Linear(128, out_channels)

    def forward(self, snapshot_list):
        for i, snapshot in enumerate(snapshot_list):
            x_in, edge_index, edge_weight = snapshot
            if i == 0:
                h, c = self.recurrent_1(x_in, edge_index, edge_weight, None, None)

            else:
                h, c = self.recurrent_1(x_in, edge_index, edge_weight, h, c)
                
            x = F.relu(h)
            x = F.dropout(x, p=self.dropout, training=self.training)
            x = self.linear(x)
            if i == 0 :
                x_out = x
            else:
                x_out = torch.cat((x_out, x),1)
        return x_out

In [None]:
class RecurrentGCN_Small(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, dropout):
        self.dropout = dropout
        super(RecurrentGCN_Small, self).__init__()
        self.recurrent_1 = GConvGRU(in_channels, 16,1)
        self.recurrent_2 = GConvGRU(16, 32,2)
        self.recurrent_3 = GConvGRU(32, 16,3)
        self.linear = torch.nn.Linear(16, 1)

    def forward(self, snapshot_list):
        for i, snapshot in enumerate(snapshot_list):
            x_in, edge_index, edge_weight = snapshot
            if i == 0:
                h = self.recurrent_1(x_in, edge_index, edge_weight, None)
            else:
                h = self.recurrent_1(x_in, edge_index, edge_weight, h)
            x = F.relu(h)
            x = F.dropout(x, p=self.dropout, training=self.training)
            x = self.linear(x)
            if i == 0 :
                x_out = x
            else:
                x_out = torch.cat((x_out, x),1)
        return x_out

In [None]:
for i, snapshot in enumerate(train_dataset):
    if i == 0:
        y_out = snapshot['torch'].y
    else:
        y_out = torch.cat((y_out, snapshot['torch'].y),1)
    

In [None]:
model

In [None]:
from tqdm import tqdm

model = LSTM_Mod_Small(in_channels = 1,out_channels = 1, K = 3, dropout = 0.15).to(device)

for i, snapshot in enumerate(test_dataset):
    if i == 0:
        y_test = snapshot['torch'].y
    else:
        y_test = torch.cat((y_test, snapshot['torch'].y),1)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.08, weight_decay = 0.01)
epochs = 500
train_performance = []
test_performance = []

for epoch in tqdm(range(epochs)):
    model.train()
    cost = 0
    y_hat = model([[snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr] for snapshot in train_dataset])
    cost = torch.sqrt(torch.mean((y_hat - y_out)**2))
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()
    train_performance.append(cost)
    
    model.eval()
    test_cost = 0
    size_test = len(test_dataset)
    y_hat = model([[snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr] for snapshot in train_dataset + test_dataset])
    y_hat = y_hat[:,-size_test:]
    test_cost = torch.sqrt(torch.mean((y_hat - y_test)**2))
    test_performance.append(test_cost)
    
    if (epoch % int(epochs/100) == 0): 
        print('Epoch: {}           Train loss: {:.4f}   Test RMSE: {:.4f}'.format(epoch, cost, test_cost))
    if (epoch == epochs - 1):
        print('-'*65,'\nFinal epoch: {}     Train loss: {:.4f}   Test RMSE: {:.4f}'.format(epoch, cost, test_cost))
print('-'*65)

In [None]:
plt.plot(test_performance)
plt.plot(train_performance)

In [None]:
model_save = {
        'model' : model,
        'train_loss_trace' : train_performance,
        'test_loss_trace' : test_performance,
        'trained_model' : model.state_dict(),
    }
import pickle
pickle.dump(model_save,open('model_lstm_recurrent.pkl','wb'))

In [None]:
import pandas as pd
df = pd.DataFrame({'test_rmse' : [x.detach().numpy() for x in test_performance], 'train_rmse' : [x.detach().numpy() for x in train_performance]}).reset_index().rename(columns = {'index' : 'epoch'})

df.to_csv('model_train_performance-gc-lstm.csv')

In [None]:
list_perf = []
model.eval()
y_hat = model([[snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr] for snapshot in train_dataset])
for time, snapshot in enumerate(train_dataset):
    y = snapshot['torch'].y.cpu().detach().numpy()
    last_prediction = y_hat[:,time].cpu().detach().numpy()
    for i, val in enumerate(last_prediction):
        #print(f"Country: {snapshot['country_dict'][i]}, Period: {snapshot['period']}, Actual: {y[i][0]}, Predicted: {last_prediction[i]}")
        list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : y[i][0], 'type' : 'actual'})
        list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : last_prediction[i], 'type' : 'train-predict'})
y_hat = model([[snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr] for snapshot in train_dataset + test_dataset])

for time, snapshot in enumerate(test_dataset):
    y = snapshot['torch'].y.cpu().detach().numpy()
    last_prediction = y_hat[:,time + len(train_dataset)].cpu().detach().numpy()
    for i, val in enumerate(last_prediction):
        #print(f"Country: {snapshot['country_dict'][i]}, Period: {snapshot['period']}, Actual: {y[i][0]}, Predicted: {last_prediction[i]}")
        list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : y[i][0], 'type' : 'actual'})
        list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : last_prediction[i], 'type' : 'test-predict'})

In [None]:
import pandas as pd
df_perf = pd.DataFrame(list_perf)
df_perf[df_perf['country'] == 'USA']

In [None]:
import plotly.express as px
df_perf.to_csv('model_prediction-s-gc-lstm.csv')

In [None]:
fig = px.line(df_perf, x = 'date', y = 'val', color = 'country')
fig.write_html('plot4.html')

In [None]:
snapshot['period'] in period_list

In [None]:
import copy

reporter = 'China'
partner = 'United States of America'
period_list = [201901,201902,201903,201904,201905,201906,201907,201908,201909,201910,201911,201912]

mod_dataset = {}
for time, snapshot in enumerate(data_in.values()):
    if time == 0:
        reporter_num = [k for k,v in snapshot['country_dict'].items() if v == reporter][0]
        partner_num = [k for k,v in snapshot['country_dict'].items() if v == partner][0]
    snapshot_mod = copy.deepcopy(snapshot)
    if snapshot['period'] in period_list:
        snapshot_mod['edges'] = [x for x in snapshot_mod['edges'] if not (x[0] == partner_num and x[1] == reporter_num)]
        snapshot_mod['edges'] = [x for x in snapshot_mod['edges'] if not (x[0] == reporter_num and x[1] == partner_num)]
        if len(snapshot_mod['edges']) != len(snapshot['edges']):
            print('dropped edge',snapshot['period'])
        print('mod period',snapshot['period'])
    mod_dataset[time] = snapshot_mod
    
    

In [None]:
alt_data = [make_graph_torch_data(v) for k, v in mod_dataset.items()]

In [None]:
list_perf = []
model.eval()
y_hat = model([[snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr] for snapshot in alt_data])
for time, snapshot in enumerate(train_dataset):
    if snapshot['period'] in period_list:
        y = snapshot['torch'].y.cpu().detach().numpy()
        last_prediction = y_hat[:,time].cpu().detach().numpy()
        for i, val in enumerate(last_prediction):
            #print(f"Country: {snapshot['country_dict'][i]}, Period: {snapshot['period']}, Actual: {y[i][0]}, Predicted: {last_prediction[i]}")
            list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : y[i][0], 'type' : 'actual'})
            list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : last_prediction[i], 'type' : 'alt-predict'})
            
y_hat = model([[snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr] for snapshot in temporal_data])
for time, snapshot in enumerate(train_dataset):
    if snapshot['period'] in period_list:
        y = snapshot['torch'].y.cpu().detach().numpy()
        last_prediction = y_hat[:,time].cpu().detach().numpy()
        for i, val in enumerate(last_prediction):
            #print(f"Country: {snapshot['country_dict'][i]}, Period: {snapshot['period']}, Actual: {y[i][0]}, Predicted: {last_prediction[i]}")
            list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : y[i][0], 'type' : 'actual'})
            list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : last_prediction[i], 'type' : 'base-predict'})

In [None]:
df_alt = pd.DataFrame(list_perf)
df_alt.to_csv('model_prediction_scenario.csv')

In [None]:
class LSTM_Mod_Small(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, dropout):
        self.dropout = dropout
        super(LSTM_Mod_Small, self).__init__()
        self.recurrent_1 =  GConvLSTM(in_channels, 64, K)
        self.linear = torch.nn.Linear(64, out_channels)

    def forward(self, snapshot_list):
        for i, snapshot in enumerate(snapshot_list):
            x_in, edge_index, edge_weight = snapshot
            if i == 0:
                h, c = self.recurrent_1(x_in, edge_index, edge_weight, None, None)
            else:
                h, c = self.recurrent_1(x_in, edge_index, edge_weight, h, c)
            x = F.sigmoid(h)
            x = F.dropout(x, p=self.dropout, training=self.training)
            x = self.linear(x)
        return x

In [None]:
from tqdm import tqdm

model = LSTM_Mod_Small(in_channels = 1,out_channels = 1, K = 3, dropout = 0.1).to(device)

for i, snapshot in enumerate(test_dataset):
    if i == 0:
        y_test = snapshot['torch'].y
    else:
        y_test = torch.cat((y_test, snapshot['torch'].y),1)

optimizer = torch.optim.Adam(model.parameters(), lr=0.08, weight_decay = 0.01)
epochs = 100
train_performance = []
test_performance = []

for epoch in tqdm(range(epochs)):
    model.train()
    cost = 0
    for time, snapshot in enumerate(train_dataset):
        y_hat = model([[snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr] for snapshot in train_dataset[:time+1]])
        cost = cost + torch.sqrt(torch.mean((y_hat - snapshot['torch'].y)**2))
        #print(time)
    cost = cost / (time+1)
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()
    train_performance.append(cost)
    
    model.eval()
    test_cost = 0
    size_test = len(test_dataset)
    for time, snapshot in enumerate(test_dataset):
        y_hat = model([[snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr] for snapshot in train_dataset + test_dataset[:time+1]])
        test_cost = test_cost + torch.sqrt(torch.mean((y_hat - snapshot['torch'].y)**2))
        #print(time)
    test_cost = test_cost / (time+1)
    test_performance.append(test_cost)
    
    if (epoch % int(epochs/100) == 0): 
        print('Epoch: {}           Train loss: {:.4f}   Test RMSE: {:.4f}'.format(epoch, cost, test_cost))
    if (epoch == epochs - 1):
        print('-'*65,'\nFinal epoch: {}     Train loss: {:.4f}   Test RMSE: {:.4f}'.format(epoch, cost, test_cost))
print('-'*65)

In [None]:
y_hat[:,-6:]

In [None]:
for i, snapshot in enumerate(test_dataset):
    if i == 0:
        y_out = snapshot['torch'].y
    else:
        y_out = torch.cat((y_out, snapshot['torch'].y),1)
    

In [None]:
import pandas as pd

In [None]:
df_perf = pd.DataFrame(list_perf)

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr = lr, weight_decay = weight_decay)
train_losses = []
accs = []
model.train()
for epoch in range(1, epochs + 1):
    optimizer.zero_grad()
    out = model(data)
    loss = F.mse_loss(out[data.train_idx], data.y[data.train_idx])
    train_losses.append(loss.item())
    loss.backward()
    optimizer.step()

    model.eval()
    pred = model(data)
    RMSE  = RMSELoss(pred[data.test_idx], data.y[data.test_idx])
    
    accs.append(RMSE)
    if (epoch % int(epochs/10) == 0): 
        print('Epoch: {}           Train loss: {:.4f}   Test RMSE: {:.4f}'.format(epoch, loss, RMSE))
    if (epoch == epochs):
        print('-'*65,'\nFinal epoch: {}     Train loss: {:.4f}   Test RMSE: {:.4f}'.format(epoch, loss, RMSE))
print('-'*65)
print('\033[1mBest Accuracy\nEpoch: {}     Train loss: {:.4f}   Test RMSE: {:.4f}\n'
      .format(accs.index(min(accs))+1, train_losses[accs.index(min(accs))], min(accs)))