In [None]:
import dgl
import dgl.function as fn
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
from torch.utils.data import Dataset, DataLoader
import glob
import json
from tqdm.notebook import tqdm
import torch.optim as optim
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import time
from torch.optim import lr_scheduler
import os
import copy

# path_to_ds = 'Data/LessData/training_data/training_data/'
# path_to_test_ds = 'Data/LessData/test_data/'
path_to_ds = 'Data/training_data/training_data/'
path_to_test_ds = 'Data/test_data/'
node_hidden1 = 12
edge_hidden1 = 12
num_hops = 15
batch_size = 400
# pos_weights = torch.tensor([[22]]).to(torch.device('cuda'))
# pos_weights = torch.tensor([[7.35]]).to(torch.device('cuda'))
pos_weights = torch.tensor([[7.5]]).to(torch.device('cuda'))

class CustomDataset(Dataset):
    
    def __init__(self, path):
        filelist = glob.glob(path+'/*.json')
        self.graphs = []
        for fname in tqdm(filelist):
            with open(fname) as jfile:
                graph = nx.node_link_graph(json.load(jfile))
                g = dgl.DGLGraph()
                g.from_networkx(graph,node_attrs=['node_features'],edge_attrs=['distance','on_path'])        
                self.graphs.append(g)
        
    def __len__(self):      
        return len(self.graphs)

    def __getitem__(self, idx):    
        return self.graphs[idx], self.graphs[idx].edata['on_path']
    
    
train_ds = CustomDataset(path_to_ds)
test_ds = CustomDataset(path_to_test_ds)

def collate(samples):
    # The input `samples` is a list, a batch of whatever comes out of your dataset object   
    graphs = [x[0] for x in samples]
    labels = [x[1] for x in samples]
    batched_graph = dgl.batch(graphs,node_attrs=['node_features'],edge_attrs=['distance'])
    targets = torch.cat(labels)    
    return batched_graph, targets.unsqueeze(1).float()



#this function is the edge update function - 

class EdgeNetwork(nn.Module):
    def __init__(self):
        super(EdgeNetwork, self).__init__()
        in_features = 2*(2+node_hidden1)+1
        self.layer1 = nn.Sequential(
            nn.Dropout(0.1),
            nn.Linear(in_features,edge_hidden1-2),
            nn.LeakyReLU(0.1),
            nn.Linear(edge_hidden1-2,edge_hidden1),
            nn.ReLU()
        )
        
    def forward(self, x):
        
        #your input x is an object with the following properties:
        #x.dst['node_features'], x.dst['node_hidden_state']
        #x.src['node_features'], x.src['node_hidden_state']
        #x.data['distance']
        
        #put them together with torch.cat
        
        #use a neural network to create an edge hidden represetation - 
        
        #you return a dictionary with what you want to "send" to the reciving node
        
#         print("dst: ", x.dst['node_features'].shape)
#         print("dst_h: ", x.dst['node_hidden_state'].shape)
#         print("src: ", x.src['node_features'].shape)
#         print("src_h: ", x.src['node_hidden_state'].shape)
#         print("distance: ", torch.unsqueeze(x.data['distance'],1).shape)        
        x_cat = torch.cat((x.dst['node_features'], 
                  x.dst['node_hidden_state'], 
                  x.src['node_features'], 
                  x.src['node_hidden_state'],
                  torch.unsqueeze(x.data['distance'],1)), 1)
#         print("here3: ", x_cat.shape)
        output = self.layer1(x_cat)
#         print("here4 - output: ", output.shape)
        return {'edge hidden represetation': output }

    
class NodeNetwork(nn.Module):
    def __init__(self):
        super(NodeNetwork, self).__init__()
        in_features = 2+edge_hidden1+node_hidden1
        self.layer1 = nn.Sequential(
            nn.Linear(in_features,node_hidden1),
            nn.LeakyReLU(0.1),
#             nn.Linear(node_hidden1-2,node_hidden1),
#             nn.ReLU()
        )
        
    def forward(self, x):
        #this time your input x has:
        # x.mailbox['edge hidden represetation'] -> this is what you send with the edge update function above - 
        # it will have the size of the node neighborhood - 
        # (Batch size, number of nodes in neighborhood, edge hidden rep size), so you need to sum/mean over dim=1 
        # x.data['node_hidden_state'] and x.data['node_features'] (this is the existing state of your node)
        # you need to torch.cat the message sum, node hidden state, and node features 
        #- and then apply some fully connected neural network

#         print("malibox: ", x.mailbox['edge hidden represetation'].sum(1).shape)
#         print("data: ", x.data['node_features'].shape)
#         print("data_h: ", x.data['node_hidden_state'].shape)
 
        x_cat = torch.cat((
            x.mailbox['edge hidden represetation'].sum(1),
            x.data['node_features'],
            x.data['node_hidden_state']
        ), 1)
        
#         print("here1: ", x_cat.shape)
        # return a new hidden state for the node
        out = self.layer1(x_cat)
#         print("here2 - out: ", out.shape)
        return {'node_hidden_state': out }


class EdgeClassifier(nn.Module):
    def __init__(self):
        super(EdgeClassifier, self).__init__()
        in_features = 2*(2+node_hidden1)+1
        self.layer1 = nn.Sequential(
#             nn.Dropout(0.1),
            nn.Linear(in_features,in_features),
            nn.LeakyReLU(0.1),
            nn.Linear(in_features,in_features),
            nn.LeakyReLU(),
            nn.Linear(in_features,1),
#             nn.ReLU(),
#             nn.Linear(4,1),
#             nn.LeakyReLU(0.5),
#             nn.Sigmoid()
#             nn.ReLU6()
#             nn.Tanh()
        )
       
    def forward(self, x):

#         print("dst: ", x.dst['node_features'].shape)
#         print("dst_h: ", x.dst['node_hidden_state'].shape)
#         print("src: ", x.src['node_features'].shape)
#         print("src_h: ", x.src['node_hidden_state'].shape)
#         print("distance: ", torch.unsqueeze(x.data['distance'],1).shape)        
        
        x_cat = torch.cat((x.dst['node_features'], 
                  x.dst['node_hidden_state'], 
                  x.src['node_features'], 
                  x.src['node_hidden_state'],
                  torch.unsqueeze(x.data['distance'],1)), 1)
#         print(x_cat.shape)
        out = self.layer1(x_cat)#torch.round(self.layer1(x_cat))
        return {'edge_class_prediction': out }


class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        
        # you need to create a network that 
        # will initialize your node hidden state based only on the node features -
        self.node_init = nn.Sequential(
            nn.Linear(2,node_hidden1),
#             nn.ReLU(),
        )
        
        self.edge_network = EdgeNetwork()
        self.node_network = NodeNetwork()
        
        #this edge classifier is also an edge update function - 
        #but it needs to return something of size 1 (the edge class prediction)
        #so either create a different model for this, or make the EdgeNetwork configurable
        self.edge_classifier = EdgeClassifier()
        
    def forward(self, g):
        
        g.ndata['node_hidden_state'] = self.node_init(g.ndata['node_features'])
        
        for i in range(num_hops):
#             print(i, ": ", g.ndata['node_hidden_state'].shape)
            g.update_all(self.edge_network,self.node_network)
            
        #we want to classify the edges - so finally apply your edge classifier -
#         print("here")
        g.apply_edges(self.edge_classifier)
        
        #and extract its output 
        out = g.edata['edge_class_prediction']
        return out
    

net = Classifier()
net = net.to(torch.device('cuda'))
data_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, collate_fn=collate)
test_data_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=True, collate_fn=collate)
# optimizer = optim.Adam(net.parameters(), lr=1e-2)
# optimizer = optim.Adam(net.parameters(),lr=0.005,betas=(0.9, 0.999),eps=1e-08,weight_decay=0,amsgrad=False)
optimizer = optim.SGD(net.parameters(), lr=0.0001, momentum=0.9)
scheduler = lr_scheduler.StepLR(optimizer, 10, gamma=0.1)
# loss_func = nn.BCEWithLogitsLoss(pos_weight=pos_weights, reduction='mean')
loss_func = nn.BCEWithLogitsLoss(pos_weight=pos_weights, reduction='mean')





for epoch in range(40):

    since = time.time()
    val_loss_history = []
    best_model_wts = copy.deepcopy(net.state_dict())
    best_loss = 10000.0
    best_loss_TP = 0
    best_loss_TN = 0
    best_loss_FP = 0
    best_loss_FN = 0

    net.train()
    true_positive = 0
    false_positive = 0
    true_negative = 0
    false_negative = 0
    total_positives = 0
    total_negatives = 0
    epoch_loss = 0
    for x,y in data_loader:
        x = x.to(torch.device('cuda'))
        y = y.cuda()
        predicted = net(x)
#         batch_false_positive = (predicted!=y) & (y==0) )[0])
#         batch_false_negative = len(np.where( (predicted_values!=y_values) & (y_values==1) )[0])
        batch_loss = loss_func(predicted, y) #+ weighted_false_positive +  weighted_false_negative
        epoch_loss += batch_loss.detach().item()

        optimizer.zero_grad()
        with torch.autograd.set_detect_anomaly(True):
            batch_loss.backward(retain_graph=True)
        optimizer.step()
        scheduler.step()
#         with torch.no_grad():
#             for param in net.parameters():
#                 param.clamp_(-1, 1)   
                
        y_values = y.cpu().data.numpy()
#         predicted_values = torch.round(predicted.cpu()).data.numpy()
        predicted_values = nn.Sigmoid()(predicted.cpu()).data.numpy()
        predicted_values[predicted_values >= 0.5] = 1
        predicted_values[predicted_values <0.5] = 0
        total_positives+=len(np.where( y_values==1 )[0])
        total_negatives+=len(np.where( y_values==0 )[0])
        true_positive+= len(np.where( (predicted_values==y_values) & (y_values==1) )[0])
        true_negative+= len(np.where( (predicted_values==y_values) & (y_values==0) )[0])
        false_positive+= len(np.where( (predicted_values!=y_values) & (y_values==0) )[0])
        false_negative+= len(np.where( (predicted_values!=y_values) & (y_values==1) )[0])

#     sample_data = predicted.cpu().data.numpy()
#     sample_y = y.cpu().data.numpy()

    TP = true_positive/total_positives
    FN = false_negative/total_positives
    TN = true_negative/total_negatives
    FP = false_positive/total_negatives
    print("Epoch ", epoch)
    sample_data = torch.cat((predicted,nn.Sigmoid()(predicted), torch.round(nn.Sigmoid()(predicted)),y),1)
    print("   Sample data (pred  |  Sigmoid(pred)  |  1/0 pred  |  y): ")
    print("  ", sample_data)
    print("   Training:   loss: {:.6f}  |  TP: {:.6f} | TN: {:.6f} | FP: {:.6f} | FN: {:.6f}".format(
                epoch_loss, TP, TN, FP, FN))
#     print("               TP: ", true_positive, false_negative)
    
    net.eval()
    true_positive = 0
    false_positive = 0
    true_negative = 0
    false_negative = 0
    total_positives = 0
    total_negatives = 0
    epoch_val_loss = 0
    for x,y in test_data_loader:
        x = x.to(torch.device('cuda'))
        y = y.cuda()
        predicted = net(x)
        batch_val_loss = loss_func(predicted, y)
        epoch_val_loss += batch_loss.detach().item()
        y_values = y.cpu().data.numpy()
#         predicted_values = torch.round(predicted.cpu()).data.numpy()
#         predicted_values = predicted.cpu().data.numpy()
        predicted_values = nn.Sigmoid()(predicted.cpu()).data.numpy()
        predicted_values[predicted_values >= 0.5] = 1
        predicted_values[predicted_values <0.5] = 0
        total_positives+=len(np.where( y_values==1 )[0])
        total_negatives+=len(np.where( y_values==0 )[0])
        true_positive+= len(np.where( (predicted_values==y_values) & (y_values==1) )[0])
        true_negative+= len(np.where( (predicted_values==y_values) & (y_values==0) )[0])
        false_positive+= len(np.where( (predicted_values!=y_values) & (y_values==0) )[0])
        false_negative+= len(np.where( (predicted_values!=y_values) & (y_values==1) )[0])
    TP = true_positive/total_positives
    FN = false_negative/total_positives
    TN = true_negative/total_negatives
    FP = false_positive/total_negatives
    print("   Validation: loss: {:.6f}  |  TP: {:.6f} | TN: {:.6f} | FP: {:.6f} | FN: {:.6f}".format(
                epoch_val_loss, TP, TN, FP, FN))

    # deep copy the model
    if epoch_val_loss < best_loss:
        best_loss = epoch_val_loss
        best_model_wts = copy.deepcopy(net.state_dict())
        val_loss_history.append(epoch_val_loss)
        best_loss_TP = TP
        best_loss_TN = TN
        best_loss_FP = FP
        best_loss_FN = FN

print()

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Loss: {:4f}'.format(best_loss))
print('With TP: {:.6f}% | TN: {:.6f}% | FP: {:.6f}% | FN: {:.6f}%'.format(
                best_loss_TP*100, best_loss_TN*100, best_loss_FP*100, best_loss_FN*100))

# load best model weights
net.load_state_dict(best_model_wts)

torch.save(net.state_dict(), 'hw3_graphs_final.pt')
