In [None]:
import pickle
import numpy as np
import torch_geometric as tg
from scipy.sparse import csr_matrix 
from sklearn.model_selection import train_test_split
import torch
import matplotlib.pyplot as plt
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv, ChebConv, GATConv
from torch.nn import functional as F
from torch_geometric.nn import GCNConv, GAE, VGAE, GATConv, AGNNConv
from torch_geometric.utils import dropout_adj
import torch_geometric.transforms as T
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from collections import Counter
import torch.nn as nn
from torch_geometric_temporal.nn.recurrent import DCRNN, GConvGRU, GConvLSTM
from torch_geometric_temporal.data.splitter import discrete_train_test_split
from tqdm import tqdm

In [None]:
data_path = 'graph_data.pkl'
data_in = pickle.load(open(data_path, 'rb'))

In [None]:
npzfile = np.load('trade_savez_files.npz', allow_pickle = True)

In [None]:
trade_edge_attr.shape

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device: '.ljust(32), device)
#print('Model Name: '.ljust(32), str(model_name.__name__))
#print('Model params:{:19} lr: {:.4f}     weight_decay: {:.4f}'.format('',lr, weight_decay))    
#print('Total number of epochs to run: '.ljust(32), epochs)
print('*' * 65)
infeat = 1
outfeat = 1

In [None]:
def make_graph_torch_data(graph_rawdata):
    list_from     = np.array([x[0] for x in graph_rawdata['edges']])
    list_to       = np.array([x[1] for x in graph_rawdata['edges']])
    list_features = np.array([x[2] for x in graph_rawdata['edges']])

    edge_index = torch.tensor([list_from, list_to], dtype = torch.long)
    edge_attr = torch.tensor(list_features, dtype = torch.float32)

    x = np.array([x['NetWeight'] for x in graph_rawdata['nodes'].values()])[:, np.newaxis]
    y = np.array([x['L1_soybean'] for x in graph_rawdata['nodes'].values()])[:, np.newaxis]
    node_x = torch.tensor(x, dtype = torch.float32)
    node_y = torch.tensor(y, dtype = torch.float32)
    torch_data = Data(x = node_x, y = node_y, edge_index = edge_index, edge_attr = edge_attr)
    graph_rawdata['torch'] = torch_data.clone().to(device)
    return graph_rawdata

In [None]:
temporal_data = [make_graph_torch_data(v) for k, v in data_in.items()]

In [None]:
temporal_data[2]['torch'].edge_index.device

In [None]:
class GCNet(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(GCNet, self).__init__()
        
        self.conv1 = GCNConv(in_channels, 20, add_self_loops = False)#, cached=True)
        self.conv2 = GCNConv(20, 5, add_self_loops = False) #data.num_classes)#, cached=True)
        self.conv3 = GCNConv(5, 3, add_self_loops = False)#data.num_classes)#, cached=True)
        self.linear_out = nn.Linear(3, out_channels)
        # self.conv1 = ChebConv(data.num_features, 16, K=2)
        # self.conv2 = ChebConv(16, data.num_features, K=2)

    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_attr
        x = F.relu(self.conv1(x, edge_index, edge_weight))
        x = F.dropout(x, p = 0.2, training=self.training)
        x = F.relu(self.conv2(x, edge_index, edge_weight))
        x = F.dropout(x, p = 0.2, training=self.training)
        x = self.conv3(x, edge_index, edge_weight)
        x = self.linear_out(x)
        return x
model = GCNet(infeat, outfeat).to(device)

In [None]:
class RecurrentGCN_Small(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, dropout, act_f):
        self.dropout = dropout
        self.act_f = act_f
        super(RecurrentGCN_Small, self).__init__()
        self.recurrent_1 = GConvGRU(in_channels, 16,K)
        self.linear = torch.nn.Linear(16, 1)

    def forward(self, x, edge_index, edge_weight):
        x = self.recurrent_1(x, edge_index, edge_weight)
        x = self.act_f(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.linear(x)
        return x

In [None]:
class RecurrentGCN_Large(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, dropout, act_f):
        self.dropout = dropout
        self.act_f = act_f
        super(RecurrentGCN_Large, self).__init__()
        self.recurrent_1 = GConvGRU(in_channels, 16,K)
        self.recurrent_2 = GConvGRU(16, 32, K)
        self.recurrent_3 = GConvGRU(32, 64, K)
        self.recurrent_4 = GConvGRU(64, 32, K)
        self.recurrent_5 = GConvGRU(32, 16, K)
        self.linear = torch.nn.Linear(16, 1)

    def forward(self, x, edge_index, edge_weight):
        x = self.recurrent_1(x, edge_index, edge_weight)
        x = self.act_f(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.recurrent_2(x, edge_index, edge_weight)
        x = self.act_f(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.recurrent_3(x, edge_index, edge_weight)
        x = self.act_f(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.recurrent_4(x, edge_index, edge_weight)
        x = self.act_f(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.recurrent_5(x, edge_index, edge_weight)
        x = self.act_f(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.linear(x)
        return x

In [None]:
class LSTM_Mod_Small(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, dropout, act_f):
        self.dropout = dropout
        self.act_f = act_f
        super(LSTM_Mod_Small, self).__init__()
        self.recurrent_1 =  GConvLSTM(in_channels, 16, K)
        self.linear = torch.nn.Linear(16, out_channels)

    def forward(self, x_in, edge_index, edge_weight):
        h, c = self.recurrent_1(x_in, edge_index, edge_weight)
        h = self.act_f(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        x = self.linear(h)
        return x

In [None]:
class LSTM_Mod_Large(torch.nn.Module):

    def __init__(self, in_channels, out_channels, K, act_f, dropout):
        self.dropout = dropout
        self.act_f = act_f
        super(LSTM_Mod_Large, self).__init__()
        self.recurrent_1 =  GConvLSTM(in_channels, 16, K)
        self.recurrent_2 =  GConvLSTM(16, 32, K)
        self.recurrent_3 =  GConvLSTM(32, 64, K)
        self.recurrent_4 =  GConvLSTM(64, 32, K)
        self.recurrent_5 =  GConvLSTM(32, 16, K)
        self.linear = torch.nn.Linear(16, out_channels)

    def forward(self, x_in, edge_index, edge_weight):
        h, c = self.recurrent_1(x_in, edge_index, edge_weight)
        h = self.act_f(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        h, c = self.recurrent_2(h, edge_index, edge_weight)
        h = self.act_f(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        h, c = self.recurrent_3(h, edge_index, edge_weight)
        h = self.act_f(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        h, c = self.recurrent_4(h, edge_index, edge_weight)
        h = self.act_f(h)
        h = F.dropout(h, p=self.dropout, training=self.training)
        x, c = self.recurrent_5(h, edge_index, edge_weight)
        x = self.act_f(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.linear(x)
        return x

In [None]:
K = [3, 5]
dropout = [0.1, 0, 0.25, 0.5]
activation_functions = [F.tanh, F.relu, F.leaky_relu, F.sigmoid]
lr = [0.1, 0.01, 1]
weight_decay = [0, 0.05] #0.05
epochs = [100]
split_location = [-6,-24,-1]
import itertools
model_options = []
for element in itertools.product(dropout, K, activation_functions, lr, weight_decay, epochs, split_location):
    model_options.append(element)

In [None]:
model_param_options = [({'dropout' : v[0], 'K' : v[1], 'act_f' : v[2]},
                        {'lr' : v[3], 'weight_decay' : v[4], 'epochs' : v[5], 'split_location' : v[6]}) for v in model_options]

In [None]:
model_exec_options = []
for model_param_i in model_param_options:
    for model in [LSTM_Mod_Large, RecurrentGCN_Small, LSTM_Mod_Small, , RecurrentGCN_Large]:
        model_exec_options.append((model(in_channels = 1, out_channels = 1, **model_param_i[0]).to(device),model_param_i))

In [None]:
def model_execution(settings):
    model = settings[0]
    lr, weight_decay, epochs, split_location = settings[1][1].values()
    print(model)
    print(settings[1][0])
    print( lr, weight_decay, epochs, split_location)
    model_params = settings[1][0]

    train_dataset, test_dataset = temporal_data[:split_location], temporal_data[split_location:]
    
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay = weight_decay)

    train_performance = []
    test_performance = []

    for epoch in tqdm(range(epochs)):
        model.train()
        cost = 0
        for time, snapshot in enumerate(train_dataset):
            y_hat = model(snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr)
            cost = cost + torch.mean((y_hat - snapshot['torch'].y)**2)
        cost = torch.sqrt(cost / (time+1))
        cost.backward()
        optimizer.step()
        optimizer.zero_grad()
        train_performance.append(float(cost))
        
        test_cost = 0

        model.eval()

        for time, snapshot in enumerate(test_dataset):
            y_hat = model(snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr)
            test_cost = test_cost + torch.mean((y_hat - snapshot['torch'].y)**2)
        test_cost = torch.sqrt(test_cost / (time+1))
        test_performance.append(float(test_cost))

        if (epoch % int(epochs/10) == 0): 
            print('Epoch: {}           Train loss: {:.4f}   Test RMSE: {:.4f}'.format(epoch, cost, test_cost))
        if (epoch == epochs - 1):
            print('-'*65,'\nFinal epoch: {}     Train loss: {:.4f}   Test RMSE: {:.4f}'.format(epoch, cost, test_cost))
    print('-'*65)
    print(y_hat)
    return (train_performance, test_performance, model, model_params)

In [None]:
model_results = {}
for i, element in enumerate(model_exec_options):
    results = model_execution(element)
    model_results[i] = {
        'model' : element[0],
        'lr' : element[1][1]['lr'],
        'weight_decay' : element[1][1]['weight_decay'],
        'epochs' : element[1][1]['epochs'],
        'split_location' : element[1][1]['split_location'],
        'train_loss_trace' : results[0],
        'test_loss_trace' : results[1],
        'model_obj' : results[2],
        'trained_model' : results[2].state_dict(),
        'model_params' : results[3]
    }

In [None]:
# selecting model = model_results[0]['model_obj']
plt.plot(model_results[0]['test_loss_trace'])
plt.plot(model_results[0]['train_loss_trace'])

In [None]:
model_results[0].keys()

In [None]:
import pandas as pd

In [None]:
# first item in list is the best model
for i in [0]:
    df = pd.DataFrame({'test_rmse' : [x for x in model_results[i]['test_loss_trace']], 'train_rmse' : [x for x in model_results[i]['train_loss_trace']]}).reset_index().rename(columns = {'index' : 'epoch'})
    print(i, df.test_rmse.min(), model_results[i]['model_params'], model_results[i]['lr'], model_results[i]['weight_decay'], model_results[i]['split_location'])
df.to_csv('model_train_performance.csv')

In [None]:
i = 0
model = model_results[i]['model_obj']
train_dataset, test_dataset = temporal_data[: model_results[i]['split_location']], temporal_data[ model_results[i]['split_location']:]

In [None]:
list_perf = []
for time, snapshot in enumerate(train_dataset):
    y_hat = model(snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr)
    y = snapshot['torch'].y.cpu().detach().numpy()
    last_prediction = y_hat.cpu().detach().numpy()
    for i, val in enumerate(last_prediction):
        list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : y[i][0], 'type' : 'actual'})
        list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : last_prediction[i][0], 'type' : 'train-predict'})
for time, snapshot in enumerate(test_dataset):
    y_hat = model(snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr)
    y = snapshot['torch'].y.cpu().detach().numpy()
    last_prediction = y_hat.cpu().detach().numpy()
    for i, val in enumerate(last_prediction):
        list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : y[i][0], 'type' : 'actual'})
        list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : last_prediction[i][0], 'type' : 'test-predict'})
df_perf = pd.DataFrame(list_perf)
df_perf.to_csv('model_prediction.csv')

### Alternative Scenario

In [None]:
import copy

reporter = 'China'
partner = 'United States of America'
period_list = [201901,201902,201903,201904,201905,201906,201907,201908,201909,201910,201911,201912]

mod_dataset = {}
for time, snapshot in enumerate(data_in.values()):
    if time == 0:
        reporter_num = [k for k,v in snapshot['country_dict'].items() if v == reporter][0]
        partner_num = [k for k,v in snapshot['country_dict'].items() if v == partner][0]
    snapshot_mod = copy.deepcopy(snapshot)
    if snapshot['period'] in period_list:
        snapshot_mod['edges'] = [x for x in snapshot_mod['edges'] if not (x[0] == partner_num and x[1] == reporter_num)]
        snapshot_mod['edges'] = [x for x in snapshot_mod['edges'] if not (x[0] == reporter_num and x[1] == partner_num)]
        if len(snapshot_mod['edges']) != len(snapshot['edges']):
            print('dropped edge',snapshot['period'])
        print('mod period',snapshot['period'])
    mod_dataset[time] = snapshot_mod
    
    

In [None]:
alt_data = [make_graph_torch_data(v) for k, v in mod_dataset.items()]

In [None]:
list_perf = []
model.eval()

for time, snapshot in enumerate(train_dataset):
    y_hat = model(snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr)
    if snapshot['period'] in period_list:
        y = snapshot['torch'].y.cpu().detach().numpy()
        last_prediction = y_hat.cpu().detach().numpy()
        for i, val in enumerate(last_prediction):
            #print(f"Country: {snapshot['country_dict'][i]}, Period: {snapshot['period']}, Actual: {y[i][0]}, Predicted: {last_prediction[i]}")
            list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : y[i][0], 'type' : 'actual'})
            list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : last_prediction[i], 'type' : 'alt-predict'})
            
for time, snapshot in enumerate(train_dataset):
    y_hat = model(snapshot['torch'].x, snapshot['torch'].edge_index, snapshot['torch'].edge_attr)
    if snapshot['period'] in period_list:
        y = snapshot['torch'].y.cpu().detach().numpy()
        last_prediction = y_hat[:,time].cpu().detach().numpy()
        for i, val in enumerate(last_prediction):
            #print(f"Country: {snapshot['country_dict'][i]}, Period: {snapshot['period']}, Actual: {y[i][0]}, Predicted: {last_prediction[i]}")
            list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : y[i][0], 'type' : 'actual'})
            list_perf.append({'country' : snapshot['country_dict'][i], 'date' : snapshot['date'], 'val' : last_prediction[i], 'type' : 'base-predict'})

In [None]:
df_alt = pd.DataFrame(list_perf)
df_alt.to_csv('model_prediction_scenario.csv')