In [2]:
import numpy as np
import pandas as pd

In [3]:
import torch
import torch.nn as nn
from torch.nn import init
from torch.autograd import Variable

import numpy as np
import time
import random
from sklearn.metrics import f1_score,roc_auc_score
from collections import defaultdict

from encoders import Encoder
from aggregators import MeanAggregator
from torch.optim.lr_scheduler import OneCycleLR


In [4]:
class SupervisedGraphSage(nn.Module):

    def __init__(self, num_classes,num_nodes,num_fts,adj_lists,hidden_dim=256,use_gpu=False):
        super(SupervisedGraphSage, self).__init__()
        self.features = nn.Embedding(num_nodes, num_fts)
        # features.cuda()

        self.agg1 = MeanAggregator(self.features, cuda=True)
        self.enc1 = Encoder(self.features, num_fts, 512, adj_lists, self.agg1, gcn=True, cuda=use_gpu)
        self.agg2 = MeanAggregator(lambda nodes : self.enc1(nodes).t(), cuda=use_gpu)
        self.enc2 = Encoder(lambda nodes : self.enc1(nodes).t(), self.enc1.embed_dim, 512, adj_lists, self.agg2,
                base_model=self.enc1, gcn=True, cuda=use_gpu)
        self.enc1.num_samples = 5
        self.enc2.num_samples = 5
        
        self.fc1 = nn.Linear(self.enc2.embed_dim*2,hidden_dim)
        self.fc2 = nn.Linear(hidden_dim,hidden_dim)
        self.fc3 = nn.Linear(hidden_dim,hidden_dim)
        self.fc4 = nn.Linear(hidden_dim,hidden_dim)
        self.fc5 = nn.Linear(hidden_dim,hidden_dim)
        self.fc6 = nn.Linear(hidden_dim,num_classes)



    def forward(self, nodes_u,nodes_v):
        embeds_u = self.enc2(nodes_u).t()
        embeds_v = self.enc2(nodes_v).t()
        
        embeds = torch.cat((embeds_u,embeds_v),1)
        
        embeds = self.fc1(embeds)
        embeds = self.fc2(F.relu(embeds))
        embeds = self.fc3(F.relu(embeds))
        embeds = self.fc4(F.relu(embeds))
        embeds = self.fc5(F.relu(embeds))
        scores = self.fc6(F.relu(embeds))
        
        return scores

    

In [5]:
import networkx as nx
import pandas as pd
import numpy as np
import scipy.sparse as sp
import torch
import torch.nn as nn
import torch.nn.functional as F
import itertools
import pandas as pd

In [6]:
from collections import defaultdict


In [7]:
train_pos_u = np.load("train_pos_u.npy")
train_pos_v = np.load("train_pos_v.npy")
train_neg_u = np.load("train_neg_u.npy")
train_neg_v = np.load("train_neg_v.npy")


In [8]:
train_graph = pd.DataFrame()
train_graph["src"] = train_pos_u
train_graph["dst"] = train_pos_v

In [9]:
G_main = nx.from_pandas_edgelist(train_graph, source='src', target='dst') 


In [10]:
test_pos_u = np.load("test_pos_u.npy")
test_pos_v = np.load("test_pos_v.npy")
test_neg_u = np.load("test_neg_u.npy")
test_neg_v = np.load("test_neg_v.npy")
train_pos_df = pd.DataFrame()
train_pos_df["src"] = train_pos_u
train_pos_df["dst"] = train_pos_v
train_pos_df["labels"] = [1]*train_pos_u.shape[0]
train_neg_df = pd.DataFrame()
train_neg_df["src"] = train_neg_u
train_neg_df["dst"] = train_neg_v
train_neg_df["labels"] = [0]*train_neg_u.shape[0]
train_df = train_pos_df.append(train_neg_df)
test_pos_df = pd.DataFrame()
test_pos_df["src"] = test_pos_u
test_pos_df["dst"] = test_pos_v
test_pos_df["labels"] = [1]*test_pos_u.shape[0]
test_neg_df = pd.DataFrame()
test_neg_df["src"] = test_neg_u
test_neg_df["dst"] = test_neg_v
test_neg_df["labels"] = [0]*test_neg_u.shape[0]
test_df = test_pos_df.append(test_neg_df)
test_df = test_df.sample(frac=1,replace=False)
train_df = train_df.sample(frac=1,replace=False)
test_df = test_df.reset_index(drop=True)
train_df = train_df.reset_index(drop=True)


In [11]:
d = defaultdict(set)
for s, nbrs in G_main.adjacency():
    d[s] = set()
    for t, data in nbrs.items():
            d[s].add(t)


In [12]:
num_nodes = G_main.number_of_nodes()
num_classes=1
num_ftrs = 2048
hidden_dim = 512
num_epochs =100
bsz = train_df.shape[0]
bs =4096
use_gpu=True

In [13]:
if use_gpu:
    device = torch.device('cuda')
else:
    device = torch.device('cpu')
import warnings
warnings.filterwarnings('ignore')

In [14]:
#    graphsage.cuda()

model= SupervisedGraphSage(num_classes,num_nodes,num_ftrs,d,hidden_dim,use_gpu).to(device)
rand_indices = np.random.permutation(num_nodes)

train = list(rand_indices)
learning_rate = 0.001

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=0.001)
scheduler= OneCycleLR(optimizer, max_lr=learning_rate, 
                                            steps_per_epoch=int(bsz/bs),
                                            epochs=num_epochs,final_div_factor = 1,
                                            anneal_strategy='linear')

In [15]:
test_bsz = test_df.shape[0]

In [None]:
save_path = "./"
loss_fn = nn.BCEWithLogitsLoss()
best_auc = 0
wait = 0
for ep in range(1,num_epochs+1):
    print("strating epoch :",ep)
    train_df = train_df.sample(frac=1,replace=False)
    labels_train = []
    predictions = []
    loss_train =[]
    for inds in range(int(bsz/bs)):
        nodes_u = torch.tensor(np.array(train_df["src"])[inds*bs:(inds+1)*bs]).to(device)
        nodes_v = torch.tensor(np.array(train_df["dst"])[inds*bs:(inds+1)*bs]).to(device)
        labels = torch.tensor(np.array(train_df["labels"])[inds*bs:(inds+1)*bs]).to(device)

        scores = model(nodes_u,nodes_v)
        labels = labels.type_as(scores)
        
        optimizer.zero_grad()
        labels_train.extend(list(labels.cpu().numpy()))
        predictions.extend(list(torch.sigmoid(scores).squeeze().detach().cpu().numpy()))
        
        
        loss = loss_fn(scores.squeeze(),torch.tensor(labels).squeeze())
        loss.backward()
        optimizer.step()
        scheduler.step()
        
        loss_train.append(loss.item())
    print("Train Loss: ",np.mean(loss_train))
    print("Train AUC: ",roc_auc_score(labels_train,predictions))
    labels_test = []
    predictions = []
    loss_test =[]
    
    for inds in range(int(test_bsz/bs)):
        nodes_u = torch.tensor(np.array(test_df["src"])[inds*bs:(inds+1)*bs]).to(device)
        nodes_v = torch.tensor(np.array(test_df["dst"])[inds*bs:(inds+1)*bs]).to(device)
        labels = torch.tensor(np.array(test_df["labels"])[inds*bs:(inds+1)*bs]).to(device)

        scores = model(nodes_u,nodes_v)
        labels = labels.type_as(scores)
        
        labels_test.extend(list(labels.cpu().numpy()))
        predictions.extend(list(torch.sigmoid(scores).squeeze().detach().cpu().numpy()))
        
        
        loss = loss_fn(scores.squeeze(),torch.tensor(labels).squeeze())
        
        loss_test.append(loss.item())
    test_auc = roc_auc_score(labels_test,predictions)
    print("test Loss: ",np.mean(loss_test))
    print("test AUC: ",roc_auc_score(labels_test,predictions))
    
    if test_auc > best_auc:
        best_auc = test_auc
        torch.save(model.state_dict(), save_path+"best_auc_model_4.prm")
        wait = 0
    else:
        wait+=1
    print("Waiting for ",wait, "best AUC is ",best_auc)
    
    

strating epoch : 1
Train Loss:  0.6918777615190989
Train AUC:  0.6293569107762017
test Loss:  0.6875803917646408
test AUC:  0.7612928764670616
Waiting for  0 best AUC is  0.7612928764670616
strating epoch : 2
Train Loss:  0.4823405602610255
Train AUC:  0.8880271208935899
test Loss:  0.5954254865646362
test AUC:  0.8016492607172923
Waiting for  0 best AUC is  0.8016492607172923
strating epoch : 3
Train Loss:  0.3116117934864688
Train AUC:  0.9420546044343987
test Loss:  0.6457246690988541
test AUC:  0.8128090130630261
Waiting for  0 best AUC is  0.8128090130630261
strating epoch : 4
Train Loss:  0.27739345560590906
Train AUC:  0.9531424963724657
test Loss:  0.6425265446305275
test AUC:  0.8338793685947696
Waiting for  0 best AUC is  0.8338793685947696
strating epoch : 5
Train Loss:  0.24493296067398715
Train AUC:  0.9633031520176325
test Loss:  0.5437791123986244
test AUC:  0.8674367184010807
Waiting for  0 best AUC is  0.8674367184010807
strating epoch : 6
Train Loss:  0.23280211582959

Train Loss:  0.15599570665732923
Train AUC:  0.9848154821468484
test Loss:  0.5831589847803116
test AUC:  0.8866270807157306
Waiting for  0 best AUC is  0.8866270807157306
strating epoch : 45
Train Loss:  0.1515853146472609
Train AUC:  0.985643866397619
test Loss:  0.5933141484856606
test AUC:  0.8855983059839945
Waiting for  1 best AUC is  0.8866270807157306
strating epoch : 46
Train Loss:  0.15684825684650835
Train AUC:  0.9846145374124238
test Loss:  0.6222728416323662
test AUC:  0.8813046903083692
Waiting for  2 best AUC is  0.8866270807157306
strating epoch : 47
Train Loss:  0.14855189962559437
Train AUC:  0.9861734182433025
test Loss:  0.6753022074699402
test AUC:  0.884586764123986
Waiting for  3 best AUC is  0.8866270807157306
strating epoch : 48
Train Loss:  0.1500744345676468
Train AUC:  0.9858671406300906
test Loss:  0.5302377790212631
test AUC:  0.8801036374121107
Waiting for  4 best AUC is  0.8866270807157306
strating epoch : 49
Train Loss:  0.14679017896393695
Train AUC: 