In [None]:
from time import time
import networkx as nx
import pickle
import time
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.manifold import TSNE
import os
import torch
from collections import defaultdict
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils import clip_grad_norm
from torch.utils.data import DataLoader
import torch.utils.data as Data
import numpy as np
%matplotlib inline
from sklearn.manifold import TSNE

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class args(object):
    
    #### DATA  ####
    node_size = 21 # equals to the number of nodes + 1 (zero_padding)
    seq_len = 5
    
    #### Training ####
    batch_size = 32
    lr = 0.0002
    beta1 = 0.9
    beta2 = 0.999
    LSTM_maxnorm = 0.25
    acc_threshold = 0.5
    try_valid = 20
    running_loss = True
    n_epoch = 50
    weight_reg = 0
    
    
    #### Model ####
    RNN_model = 'gru'
    hidden_size = 50
    num_layers = 5
    embedding_dim = 50
    embedding_maxnorm = None
    bidirectional = True
    model_name = 'Siamese' 
    model_path ='./'+ model_name +'/Model/'
    
if not os.path.exists(args.model_path):
    os.makedirs(args.model_path)

'''
TODO:

1. Add GRU
2. Add Algo
'''   

with open("Planar10thData.txt", "rb") as fp:   # Unpickling
    df = pickle.load(fp)
X = df[['left','right']]     
Y = df['target']    
del df

#Seperate to training, validation, and test set
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state = 64)
X_train, X_validation, Y_train, Y_validation = train_test_split(X_train, Y_train, test_size= 0.05,random_state= 64)
Y_test = Y_test.values
Y_train = Y_train.values
Y_validation = Y_validation.values

#Check shape
assert X_train['left'].shape == X_train['right'].shape
assert len(X_train['left']) == len(Y_train)

def padding(data):
    left = [] 
    for i in range(data.shape[0]):
        left.append((data.iloc[i]['left']))
    right = [] 
    for i in range(data.shape[0]):
        right.append((data.iloc[i]['right']))
    return torch.tensor(np.array([right,left])).transpose(1,0)


def plot_train_hist(train_hist, step = None, ):
    fig = plt.figure(figsize=(20, 10))
    for name in train_hist.keys():
        if 'Loss' in name:
            plt.subplot(211)
            plt.plot(train_hist[name],marker='o',label= name)
            plt.ylabel('Loss',fontsize=15)
            plt.xlabel('Number of epochs',fontsize=15)
            plt.title('Loss',fontsize=20,fontweight ="bold")
            plt.legend(loc='upper left')
        else:
            plt.subplot(212)
            plt.plot(train_hist[name],marker='o',label= name)
            plt.ylabel('Accuracy',fontsize=15)
            plt.xlabel('Number of epochs',fontsize=15)
            plt.title('Accuracy',fontsize=20,fontweight ="bold")
            plt.legend(loc='upper left')
        
    plt.tight_layout()
    plt.show()
    if step is not None:
        fig.savefig("Train_Hist"+str(step)+".png") 


#Padding and creat the loaders
X_train = padding(X_train)
Y_train = torch.FloatTensor(np.array(Y_train))
train_dataset  = Data.TensorDataset(X_train,Y_train)

X_validation = padding(X_validation)
Y_validation = torch.FloatTensor(np.array(Y_validation))
val_dataset  = Data.TensorDataset(X_validation,Y_validation)

X_test = padding(X_test)
Y_test = torch.FloatTensor(np.array(Y_test))
test_dataset  = Data.TensorDataset(X_test,Y_test)


In [None]:
X_validation.shape

In [None]:
class RNN_Encoder(nn.Module):
    def __init__(self, model = 'lstm'):
        super(RNN_Encoder, self).__init__()
        
        # No need for padding, but if we need the padding, we have to set the idx as 0
        # So that's why we need to make the graph start from 0
        self.model = model
        self.embedding = nn.Embedding(args.node_size, embedding_dim = args.embedding_dim, padding_idx=0,
                                      max_norm = args.embedding_maxnorm)
        

        if args.bidirectional:
            num_dir = 2
            fc_size1= args.hidden_size*args.seq_len*2
            fc_size2= 128
        else:
            num_dir = 1
            fc_size1= args.hidden_size
            fc_size2= 16
            
        self.fc= nn.Sequential(
            nn.Linear(fc_size1,fc_size2),
            nn.BatchNorm1d(fc_size2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(fc_size2,2),
        )
        
        self.h0 = nn.Parameter(torch.randn(args.num_layers*num_dir, 1, args.hidden_size))
        
        if self.model == 'lstm':
            self.rnn = nn.LSTM(input_size = args.embedding_dim, hidden_size = args.hidden_size,
                           num_layers = args.num_layers, bidirectional = args.bidirectional, batch_first = True)
            self.c0 = nn.Parameter(torch.ones(args.num_layers*num_dir, 1, args.hidden_size))
            print('Using LSTM')
        elif self.model == 'gru':
            self.rnn = nn.GRU(input_size= args.embedding_dim, hidden_size= args.hidden_size,
                              num_layers = args.num_layers, bidirectional = args.bidirectional, batch_first = True)
            print('Using GRU')
        else:
            raise NotImplementedError

        
    def forward(self, x):
        
        B = x.size(0)
        
        out = self.embedding(x)
        
        if self.model == 'lstm':
            h_ = (self.h0.repeat(1,B,1), self.c0.repeat(1,B,1))
        elif self.model == 'gru':
            h_ = self.h0.repeat(1,B,1)
        else:
            raise NotImplementedError
        
        out, hidden = self.rnn(out, h_)
        
        if args.bidirectional:
            out = self.fc(out.contiguous().view(B,-1))
        else:
            out = self.fc(out[:,-1,:])
        return out
    
    

class SiameseNet(nn.Module):
    def __init__(self,):
        super(SiameseNet, self).__init__()
        
        self.encoder = RNN_Encoder(args.RNN_model)
        self.mse = nn.MSELoss()
        self.train_hist = defaultdict(list)
        self.acc_hist = []
        self.apply(self.weight_init)
        
        self.optim = optim.Adam(self.encoder.parameters(), lr = args.lr, betas= (args.beta1, args.beta2),
                                weight_decay = args.weight_reg)
        
        
    def forward(self, x, y):
        
        self.optim.zero_grad()
        left = x[:,0,:]
        right = x[:,1,:]
        
        left_out = self.encoder(left)
        right_out = self.encoder(right)
        self.prediction = torch.exp(-torch.norm((left_out - right_out),1,-1))
        self.loss = self.mse(self.prediction, y)
        self.take = left_out - right_out
        
        if args.LSTM_maxnorm is not None:
            nn.utils.clip_grad_norm_(self.encoder.rnn.parameters(), args.LSTM_maxnorm)
        self.acc = torch.mean(((self.prediction>args.acc_threshold) == (y.byte())).float())
        self.train_hist['Loss'].append(self.loss.item())
        self.train_hist['Accuracy'].append(self.acc.item())
        self.loss.backward()
        self.optim.step()
        
    def weight_init(self,m):
        if type(m) in [nn.Conv2d, nn.ConvTranspose2d, nn.Linear]:
            nn.init.kaiming_normal_(m.weight,0.2,nonlinearity='leaky_relu')
        elif type(m) in [nn.LSTM]:
            for name, value in m.named_parameters():
                if 'weight' in name :
                    nn.init.xavier_normal_(value.data)
                if 'bias'in name:
                    value.data.normal_()
                    
    def model_save(self,step):
        
        path = args.model_path + args.model_name+'_Step_' + str(step) + '.pth'
        torch.save({args.model_name:self.state_dict()}, path)
        print('Model Saved')
        
    def load_step_dict(self, step):
        
        path = args.model_path + args.model_name +'_Step_' + str(step) + '.pth'
        self.load_state_dict(torch.load(path, map_location = lambda storage, loc: storage)[args.model_name])
        print('Model Loaded')
        
           
    def plot_all_loss(self, step):
        fig = plt.figure(figsize=(20, 10))
        num_loss = 2
        i = 0
        for name in self.train_hist.keys():
            if 'V' not in name:
                i+= 1
                fig.add_subplot(num_loss,1,i)
                plt.plot(self.train_hist[name], label = name)
                plt.xlabel('Number of Steps',fontsize=15)
                plt.ylabel( name, fontsize=15)
                plt.title(name, fontsize=30, fontweight ="bold")
                plt.legend(loc = 'upper left')
        plt.tight_layout()
        plt.show()
        fig.savefig("Train_Hist"+str(step)+".png") 
        
    def test_step(self, x, y):
        
        left = x[:,0,:]
        right = x[:,0,:]
        
        left_out = self.encoder(left)
        right_out = self.encoder(right)
        
        self.v_prediction = torch.exp(-torch.norm((left_out - right_out),1,-1)).detach()
        self.v_loss = self.mse(self.v_prediction, y)
        self.v_acc = torch.mean(((self.v_prediction>args.acc_threshold) == (y.byte())).float())
        self.train_hist['V_Loss'].append(self.v_loss.item())
        self.train_hist['V_Accuracy'].append(self.v_acc.item())    

In [None]:
train_loader = DataLoader(train_dataset, batch_size = args.batch_size, shuffle=True, drop_last = True)
valid_loader = DataLoader(val_dataset, batch_size = args.batch_size, shuffle=True, drop_last= True)

In [None]:
args.weight_reg = 0
siamese = SiameseNet().to(device)
siamese.train()
scheduler = optim.lr_scheduler.MultiStepLR(siamese.optim, milestones=[20,40,60,80], gamma=0.5)
siamese.optim.param_groups[0]['lr']= 0.0002

In [None]:
train_hist = defaultdict(list)
args.running_loss = True
args.n_epoch = 50
all_step = 0
epoch = 0

In [None]:
while epoch < args.n_epoch:
    siamese.train()
    for i,(data, label) in enumerate(train_loader):

        
        start_t = time.time()
        data = data.to(device)
        label = label.to(device)
        siamese(data, label)
        end_t = time.time()
        all_step += 1
        print('| Epoch [%d] | Step [%d] | lr [%.6f] | Loss: [%.4f] | Acc: [%.4f] | Time: %.1fs' %\
              ( epoch, all_step, siamese.optim.param_groups[0]['lr'], siamese.loss.item() , siamese.acc.item() ,
                end_t - start_t))
    
    if i % 1 == 0:
        siamese.eval()
        for j, (v_data, v_label) in enumerate(valid_loader):

            start_t = time.time()
            v_data = v_data.to(device)
            v_label = v_label.to(device)
            siamese.test_step(v_data, v_label)
            end_t = time.time()
            print('| Epoch [%d] | Validation | Step [%d] |  Loss: [%.4f] | Acc: [%.4f] | Time: %.1fs' %\
                  ( epoch, j, siamese.v_loss.item() , siamese.v_acc.item() ,end_t - start_t))
                
    siamese.plot_all_loss('Training_0reg')
    
    if epoch >= 1:
        plot_train_hist(train_hist, 'Epoch_0reg')

    for name in siamese.train_hist.keys():
        train_hist[name].append(sum(siamese.train_hist[name])/len(siamese.train_hist[name]))

    if not args.running_loss:
        for name in siamese.train_hist.keys():
            siamese.train_hist[name] = []
    epoch += 1
    
    if epoch % 1 == 0:
        siamese.model_save(epoch)
    
    if epoch >= 0:
        raise StopIteration

In [None]:
##### This Algorithm can solve the problem and make the graph in a batch of training data #####

class Graph_Alg(object):
    
    def __init__(self,):
        self.G = nx.Graph()
    
    def forward(self, x, y):
        
        paired_data = np.array(x[y.byte()])
        for i in paired_data:
            for j in i:
                for k in range(len(j)):
                    if not j[k] in self.G.nodes:
                        self.G.add_node(j[k])
                    if k >= 1:
                        if not (j[k],j[k-1]) in self.G.edges:
                            self.G.add_edge(j[k],j[k-1])
                            
    def draw(self,):
        
        nx.draw(self.G, with_labels=True, font_weight='bold')
        plt.show()
        
    def test(self, x, y):
        
        out = []
        paired_data = np.array(x)
        
        for i in paired_data:
            out.append(self.check_step(i))
        match = list(np.array(out) == np.array(y).astype(int))
        self.acc = sum(match) / len(match)
             
    def check_step(self, i):
        
        # Check the path is in the graph
        for j in i:
            for k in range(len(j)):
                if not j[k] in self.G.nodes:
                    return 0
                if k >= 1:
                    if not (j[k],j[k-1]) in self.G.edges:
                        return 0
                        
        # Check the the pair is possible to have the same st_node   
        for node in list(self.G.adj[i[0][0].item()]):
            if node in list(self.G.adj[i[1][0].item()]):
                return 1

        return 0


In [None]:
graph_algo = Graph_Alg()
algo_train_hist = defaultdict(list)
num_steps = 20
for i in range(num_steps):
    data, label = iter(train_loader).next()

    # Training
    graph_algo.forward(data, label)
    
    # Testing
    v_data, v_label = iter(valid_loader).next()
    graph_algo.test(v_data, v_label)
    algo_train_hist['Accuracy'].append(graph_algo.acc)
    
plot_train_hist(algo_train_hist, 'Graph_algo')
graph_algo.draw()

In [None]:
siamese.encoder.h0.shape