In [10]:
import torch
from torch import nn

from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_curve,auc

from sklearn.model_selection import train_test_split

from gcn-model import BotGCN_Single
from utils import accuracy, init_weights

In [11]:
device = 'cpu'
embedding_size = 32
dropout = 0.1
lr = 1e-2
weight_decay = 5e-2

In [12]:
path = "/Dataset/"
#Re calculate Des Tensor
des_tensor = torch.load(path + "filtered_des_tensor.pt").t().to(device)

num_prop = torch.load(path + "filtered_num_properties_tensor.pt").t().to(device)
category_prop = torch.load(path + "filtered_cat_properties_tensor.pt").t().to(device)
labels = torch.load(path + "filtered_label.pt").t().to(device)

tweets_tensor = torch.load("/Users/ketanjadhav/Documents/BotRGCN/processed_data/tweets_tensor.pt").to(device)

train_idx = torch.load(path + "filtered_train_idx.pt").t().to(device)
val_idx = torch.load(path + "filtered_val_idx.pt").t().to(device)
test_idx = torch.load(path + "filtered_test_idx.pt").t().to(device)

In [13]:
followers_edge_index = torch.load("./dataset/New Dataset/filtered_followers_edge_index.pt").to(device)
following_edge_index = torch.load("./dataset/New Dataset/filtered_following_edge_index.pt").to(device)
interactions_edge_index = torch.load("./dataset/New Dataset/filtered_interaction_edge_index.pt").to(device)

In [14]:
gcn_model=BotGCN_Single(cat_prop_size=3,embedding_dimension=30).to(device)
loss=nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(gcn_model.parameters(),
                    lr=lr,weight_decay=weight_decay)

gcn_model.apply(init_weights)

BotGCN_Single(
  (linear_relu_des): Sequential(
    (0): Linear(in_features=768, out_features=10, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_relu_num_prop): Sequential(
    (0): Linear(in_features=4, out_features=10, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_relu_cat_prop): Sequential(
    (0): Linear(in_features=3, out_features=10, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_relu_input): Sequential(
    (0): Linear(in_features=30, out_features=30, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (rgcn1): GCNConv(30, 30)
  (rgcn2): GCNConv(30, 30)
  (linear_relu_output1): Sequential(
    (0): Linear(in_features=30, out_features=30, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_output2): Linear(in_features=30, out_features=2, bias=True)
)

In [15]:
def train_gcn(epoch, optimizer, train_edge_index):
    gcn_model.train()
    output = gcn_model(des_tensor, tweets_tensor, num_prop, category_prop, train_edge_index)
    loss_train = loss(output[train_idx], labels[train_idx])
    acc_train = accuracy(output[train_idx], labels[train_idx])
    acc_val = accuracy(output[val_idx], labels[val_idx])
    
    optimizer.zero_grad()
    loss_train.backward()
    optimizer.step()
    
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'acc_val: {:.4f}'.format(acc_val.item()))
    
    return acc_train, loss_train

In [16]:
import numpy as np

def test_gcn(test_edge_index):
    gcn_model.eval()
    output = gcn_model(des_tensor, tweets_tensor, num_prop, category_prop, test_edge_index)
    loss_test = loss(output[test_idx], labels[test_idx])
    
    acc_test = accuracy(output[test_idx], labels[test_idx])
    
    output = output.max(1)[1].to('cpu').detach().numpy()

    label = labels.to('cpu').detach().numpy()
    
    f1 = f1_score(label[test_idx], output[test_idx])
    precision = precision_score(label[test_idx], output[test_idx])
    recall = recall_score(label[test_idx], output[test_idx])
    
    fpr, tpr, thresholds = roc_curve(label[test_idx], output[test_idx], pos_label=1)
    auc_val = auc(fpr, tpr)
    
    print("Test set results:",
          "test_loss= {:.4f}".format(loss_test.item()),
          "test_accuracy= {:.4f}".format(acc_test.item()),
          "precision= {:.4f}".format(precision.item()),
          "recall= {:.4f}".format(recall.item()),
          "f1_score= {:.4f}".format(f1.item()),
          "auc= {:.4f}".format(auc_val.item()))

In [17]:
#In case we want to split edges

# num_edges = followers_edge_index.size(1)
# indices = torch.arange(num_edges)

# train_indices, test_indices = train_test_split(indices.numpy(), test_size=0.2, random_state=42)

# train_edge_index = followers_edge_index[:, train_indices]
# test_edge_index = followers_edge_index[:, test_indices]

FOLLOWERS

In [18]:
epochs = 50

for epoch in range(epochs):
    train_gcn(epoch, optimizer, followers_edge_index)
    
test_gcn(followers_edge_index)

Epoch: 0001 loss_train: 0.6809 acc_train: 0.6589 acc_val: 0.0430
Epoch: 0002 loss_train: 0.9107 acc_train: 0.4229 acc_val: 0.9507
Epoch: 0003 loss_train: 0.6180 acc_train: 0.6840 acc_val: 0.0682
Epoch: 0004 loss_train: 0.6748 acc_train: 0.6718 acc_val: 0.0089
Epoch: 0005 loss_train: 0.6457 acc_train: 0.6717 acc_val: 0.0084
Epoch: 0006 loss_train: 0.6068 acc_train: 0.6764 acc_val: 0.0315
Epoch: 0007 loss_train: 0.6122 acc_train: 0.6856 acc_val: 0.1748
Epoch: 0008 loss_train: 0.6213 acc_train: 0.6845 acc_val: 0.2491
Epoch: 0009 loss_train: 0.6014 acc_train: 0.6975 acc_val: 0.1508
Epoch: 0010 loss_train: 0.5893 acc_train: 0.6902 acc_val: 0.0682
Epoch: 0011 loss_train: 0.5920 acc_train: 0.6804 acc_val: 0.0349
Epoch: 0012 loss_train: 0.5944 acc_train: 0.6775 acc_val: 0.0237
Epoch: 0013 loss_train: 0.5886 acc_train: 0.6784 acc_val: 0.0274
Epoch: 0014 loss_train: 0.5819 acc_train: 0.6838 acc_val: 0.0435
Epoch: 0015 loss_train: 0.5776 acc_train: 0.6935 acc_val: 0.0751
Epoch: 0016 loss_train: 0

FOLLOWING

In [19]:
epochs = 50

for epoch in range(epochs):
    train_gcn(epoch, optimizer, following_edge_index)
    
test_gcn(following_edge_index)

Epoch: 0001 loss_train: 0.5318 acc_train: 0.7489 acc_val: 0.3062
Epoch: 0002 loss_train: 0.5282 acc_train: 0.7501 acc_val: 0.3144
Epoch: 0003 loss_train: 0.5261 acc_train: 0.7503 acc_val: 0.3235
Epoch: 0004 loss_train: 0.5245 acc_train: 0.7512 acc_val: 0.3183
Epoch: 0005 loss_train: 0.5228 acc_train: 0.7505 acc_val: 0.3023
Epoch: 0006 loss_train: 0.5232 acc_train: 0.7502 acc_val: 0.2978
Epoch: 0007 loss_train: 0.5227 acc_train: 0.7510 acc_val: 0.3148
Epoch: 0008 loss_train: 0.5220 acc_train: 0.7517 acc_val: 0.3188
Epoch: 0009 loss_train: 0.5199 acc_train: 0.7512 acc_val: 0.2997
Epoch: 0010 loss_train: 0.5200 acc_train: 0.7510 acc_val: 0.2852
Epoch: 0011 loss_train: 0.5174 acc_train: 0.7514 acc_val: 0.3049
Epoch: 0012 loss_train: 0.5170 acc_train: 0.7523 acc_val: 0.3131
Epoch: 0013 loss_train: 0.5153 acc_train: 0.7523 acc_val: 0.2937
Epoch: 0014 loss_train: 0.5146 acc_train: 0.7515 acc_val: 0.2867
Epoch: 0015 loss_train: 0.5131 acc_train: 0.7528 acc_val: 0.3043
Epoch: 0016 loss_train: 0

INTERACTIONS

In [20]:
epochs = 50

for epoch in range(epochs):
    train_gcn(epoch, optimizer, interactions_edge_index)
    
test_gcn(interactions_edge_index)

#Cross Validation


Epoch: 0001 loss_train: 0.3128 acc_train: 0.8692 acc_val: 0.6489
Epoch: 0002 loss_train: 0.3124 acc_train: 0.8674 acc_val: 0.6936
Epoch: 0003 loss_train: 0.3249 acc_train: 0.8657 acc_val: 0.5878
Epoch: 0004 loss_train: 0.3375 acc_train: 0.8595 acc_val: 0.7573
Epoch: 0005 loss_train: 0.3140 acc_train: 0.8643 acc_val: 0.5800
Epoch: 0006 loss_train: 0.3254 acc_train: 0.8579 acc_val: 0.5306
Epoch: 0007 loss_train: 0.3113 acc_train: 0.8681 acc_val: 0.6353
Epoch: 0008 loss_train: 0.3205 acc_train: 0.8653 acc_val: 0.7054
Epoch: 0009 loss_train: 0.3109 acc_train: 0.8682 acc_val: 0.6224
Epoch: 0010 loss_train: 0.3133 acc_train: 0.8680 acc_val: 0.6055
Epoch: 0011 loss_train: 0.3076 acc_train: 0.8694 acc_val: 0.6726
Epoch: 0012 loss_train: 0.3130 acc_train: 0.8694 acc_val: 0.6920
Epoch: 0013 loss_train: 0.3046 acc_train: 0.8706 acc_val: 0.6336
Epoch: 0014 loss_train: 0.3092 acc_train: 0.8701 acc_val: 0.6112
Epoch: 0015 loss_train: 0.3030 acc_train: 0.8708 acc_val: 0.6672
Epoch: 0016 loss_train: 0