# -*- coding: utf-8 -*-
"""
Created on Tue May 24 16:01:42 2022

@author: Pasula
"""
import numpy as np
import sklearn
from sklearn.metrics import confusion_matrix
import time
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.loader import DataLoader
from torch_geometric.data import Data
from torch_geometric.nn import Linear, Sequential, GINConv, VGAE, GCNConv
from torch_geometric.nn import global_mean_pool
import matplotlib.pyplot as plt

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
batch_size = 32
t = time.time()
#%%
class VGAE(nn.Module):
    def __init__(self):
        super(VGAE,self).__init__()
        
        # self.init_weights()
        dim1 = 30
        dim2 = 45
        dim3 = 60
        dim4 = 40
        dim5 = 20
        dim6 = 15
        dim7 = 9
        
        self.mlp1 = nn.Sequential(Linear(dim1, dim2, weight_initializer=('glorot')),\
                                  nn.LeakyReLU(), Linear(dim2, dim3))
        
        self.mlp2 = nn.Sequential(Linear(dim3, dim4),nn.LeakyReLU(),Linear(dim4, dim5))
        
        self.mlp3 = nn.Sequential(Linear(dim1, dim2, weight_initializer=('glorot')),\
                                  nn.LeakyReLU(), Linear(dim2, dim3))
                                 
        self.mlp4 = nn.Sequential(Linear(dim3, dim4),nn.LeakyReLU(),Linear(dim4, dim5))
            
        self.mlp5 = nn.Sequential(Linear(dim5, dim4),nn.LeakyReLU(),Linear(dim4, dim2))
            
        self.mlp6 = nn.Sequential(Linear(dim2, dim1),nn.LeakyReLU(),Linear(dim1, dim5))
        
        self.mlp7 = nn.Sequential(Linear(dim5, dim6),nn.LeakyReLU(),Linear(dim6, dim7))
        
        self.conv1 = GINConv(self.mlp1)
        self.conv2 = GINConv(self.mlp2)
        self.conv3 = GINConv(self.mlp3)
        self.conv4 = GINConv(self.mlp4)
        self.conv5 = GINConv(self.mlp5)
        self.conv6 = GINConv(self.mlp6)
        self.conv7 = GINConv(self.mlp7)
        
    def encode(self, x, edge_index):
        
        x = x.float()
        mu = F.relu(self.conv1(x,edge_index)) 
        mu = self.conv2(mu, edge_index)
        
        log_var = F.relu(self.conv3(x,edge_index))
        log_var = self.conv4(log_var,edge_index)

        std = torch.exp(0.5*log_var) # reparameterization
        epsilon = torch.randn_like(std)
        z = mu + epsilon*std
        
        return z, mu, log_var
    
    def decode(self,z, edge_index):

        adj_pred = self.conv5(z, edge_index)
        adj_pred = self.conv6(adj_pred, edge_index)
        adj_pred = self.conv7(adj_pred, edge_index)
        
        return adj_pred
    
    def total_loss(self, adj_pred, adj, mu, log_var):
        ns_loss = nn.BCEWithLogitsLoss()
        recons_loss = ns_loss(adj_pred, adj)
        kld_loss = -0.5 * torch.mean(torch.sum(1+2*log_var - mu**2 -\
                                               log_var.exp()**2, dim = 1))
        total_loss = recons_loss + kld_loss 
        return total_loss, recons_loss, kld_loss
#%% data loading
total_states = torch.load('states.pt').to(device)
#%% data processing
states = total_states[:256]

x_o = states[:,:270].reshape(-1,9,30).to(device)
edge_index_o = states[:,391:431].reshape(len(x_o),2,-1).to(torch.long).to(device)
prev_job_o = states[:,270:390].reshape(-1,4,30).to(device)
cur_mach = states[:,390].reshape(len(x_o),1).to(device)
active_edges_o = states[:,431].reshape(len(x_o),1).to(device)

dataset = []

for _x,_edg,_active_edges in\
    zip(x_o,edge_index_o,active_edges_o):
        # generating the adjacency matrix using edge indices
    adj = torch.zeros(9,9).to(device)   
    edges = _edg[:,:int(_active_edges)]
    for i in edges.t():
        adj[i[0]][i[1]] = 1
    dataset.append(Data(x =_x, edge_index = _edg[:,:int(_active_edges)],\
                        adj = adj))

#%% dataloading
random.shuffle(dataset)
split = int(len(dataset)*0.9)
train_dataset = dataset[:split]
test_dataset = dataset[split:]
test_loader = DataLoader(test_dataset, batch_size = 32, drop_last = True,\
                         shuffle = True) # only test loader is loaded.


#%% training 
model = VGAE().to(device)
model_optim = optim.Adamax(model.parameters(), lr = 8e-04, weight_decay = 1e-06)
lr_scheduler = optim.lr_scheduler.StepLR(model_optim, step_size=1000, gamma=0.01)
model.train()
torch.autograd.set_detect_anomaly(True)
epochs = 5000
tot_loss = []
tot_r_loss = []
tot_kl_loss = []
link_pred_acc = []
for ep in range(epochs):
    # print('\rEpoch Number ', ep, end = '')
    train_loader = DataLoader(train_dataset, batch_size = 32, drop_last = True,\
                              shuffle = True)
    ep_loss = 0
    ep_r_loss = 0
    ep_kl_loss = 0
    count = 0
    acc = 0 
    for data in train_loader:
        count+=1
        x, edge_index,adj = data.x, data.edge_index, data.adj
        z, mu, log_var = model.encode(x, edge_index)
        adj_pred = model.decode(z, edge_index)
        loss, r_loss, kl_loss = model.total_loss(adj_pred, adj, mu, log_var)
       
        # print('loss is ', loss)
        ep_loss += loss.item()
        # print('loss is ', loss.item())
        ep_r_loss += r_loss.item()
        ep_kl_loss += kl_loss.item()
        model_optim.zero_grad()
        loss.backward()
        
        # for name, params in model.named_parameters():
        #     print('gradients', params.grad.mean())
        #     print('weights', params.data)
        model_optim.step()
# prediciting the probabaility of the link between the nodes
    rounded_pred = torch.round(torch.sigmoid(adj_pred)).detach()
    link = torch.mul(rounded_pred, adj)
    link_prob = ((link.sum())/(adj.sum())).cpu().numpy()

    link_pred_acc.append(link_prob)
    tot_loss.append(ep_loss)
    tot_r_loss.append(ep_r_loss)
    tot_kl_loss.append(ep_kl_loss)
    lr_scheduler.step()
    if ep % 100 == 0:
        print("Epoch:", '%04d' % (ep + 1), "train_loss=", "{:.5f}".format(ep_loss),\
                "recon_loss=", "{:.5f}".format(ep_r_loss),"time=", "{:.5f}".format(time.time() - t))

# confusion_matrix(adj.view(-1).long().cpu(), adj_pred.view(-1).detach().long().cpu(), labels =['0','1'])

plt.title('tot_loss')
plt.plot(tot_loss)
plt.show()

plt.title('recons_loss')
plt.plot(tot_r_loss)
plt.show()

plt.title('kl_loss')
plt.plot(tot_kl_loss)
plt.show()

plt.title('link_prob')
plt.plot(link_pred_acc)
plt.show()