In [1]:
import torch
from torch import nn

from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_curve,auc

from sklearn.model_selection import train_test_split

from model import BotGAT
from utils import accuracy, init_weights

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = 'cpu'
embedding_size = 32
dropout = 0.1
lr = 1e-2
weight_decay = 5e-2

In [3]:
#Re calculate Des Tensor
path = "/Dataset/"
des_tensor = torch.load(path + "filtered_des_tensor.pt").t().to(device)

num_prop = torch.load(path + "filtered_num_properties_tensor.pt").t().to(device)
category_prop = torch.load(path + "filtered_cat_properties_tensor.pt").t().to(device)
labels = torch.load(path + "filtered_label.pt").t().to(device)

tweets_tensor = torch.load("/Users/ketanjadhav/Documents/BotRGCN/processed_data/tweets_tensor.pt").t().to(device)

train_idx = torch.load(path + "filtered_train_idx.pt").to(device)
val_idx = torch.load(path + "filtered_val_idx.pt").to(device)
test_idx = torch.load(path + "filtered_test_idx.pt").to(device)

In [4]:
follower_edge_index = torch.load(path + "filtered_followers_edge_index.pt").to(device)
following_edge_index = torch.load(path + "filtered_following_edge_index.pt").to(device)
interaction_edge_index = torch.load(path + "filtered_interaction_edge_index.pt").to(device)

combined_edge_index = torch.load(path + "filtered_edge_index.pt").to(device)
combined_edge_type = torch.load(path + "filtered_edge_type.pt").to(device)

all_edge_index = torch.load(path + "all_combined_edge_index.pt").to(device)
all_edge_type = torch.load(path + "all_combined_edge_type.pt").to(device)

In [5]:
gat_model=BotGAT(cat_prop_size=3,embedding_dimension=30).to(device)
loss=nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(gat_model.parameters(),
                    lr=lr,weight_decay=weight_decay)

gat_model.apply(init_weights)

BotGAT(
  (linear_relu_des): Sequential(
    (0): Linear(in_features=768, out_features=10, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_relu_num_prop): Sequential(
    (0): Linear(in_features=4, out_features=10, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_relu_cat_prop): Sequential(
    (0): Linear(in_features=3, out_features=10, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_relu_input): Sequential(
    (0): Linear(in_features=30, out_features=30, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_relu_output1): Sequential(
    (0): Linear(in_features=30, out_features=30, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_output2): Linear(in_features=30, out_features=2, bias=True)
  (gat1): GATConv(30, 10, heads=3)
  (gat2): GATConv(30, 30, heads=1)
)

In [6]:
def train_gat(epoch, optimizer, train_edge_index):
    gat_model.train()
    output = gat_model(des_tensor, tweets_tensor, num_prop, category_prop, train_edge_index)
    loss_train = loss(output[train_idx], labels[train_idx])
    acc_train = accuracy(output[train_idx], labels[train_idx])
    acc_val = accuracy(output[val_idx], labels[val_idx])
    
    optimizer.zero_grad()
    loss_train.backward()
    optimizer.step()
    
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'acc_val: {:.4f}'.format(acc_val.item()))
    
    return acc_train, loss_train

In [7]:
import numpy as np

def test_gat(test_edge_index):
    gat_model.eval()
    output = gat_model(des_tensor, tweets_tensor, num_prop, category_prop, test_edge_index)
    loss_test = loss(output[test_idx], labels[test_idx])
    acc_test = accuracy(output[test_idx], labels[test_idx])
    
    output = output.max(1)[1].to('cpu').detach().numpy()
    label = labels.to('cpu').detach().numpy()
    
    f1 = f1_score(label[test_idx], output[test_idx])
    precision = precision_score(label[test_idx], output[test_idx])
    recall = recall_score(label[test_idx], output[test_idx])
    
    fpr, tpr, thresholds = roc_curve(label[test_idx], output[test_idx], pos_label=1)
    auc_val = auc(fpr, tpr)
    
    print("Test set results:",
          "test_loss= {:.4f}".format(loss_test.item()),
          "test_accuracy= {:.4f}".format(acc_test.item()),
          "precision= {:.4f}".format(precision.item()),
          "recall= {:.4f}".format(recall.item()),
          "f1_score= {:.4f}".format(f1.item()),
          "auc= {:.4f}".format(auc_val.item()))

FOLLOWER

In [8]:
epochs = 50

print(len(follower_edge_index[1]))
for epoch in range(epochs):
    train_gat(epoch, optimizer, follower_edge_index)
    
test_gat(follower_edge_index)

508539
Epoch: 0001 loss_train: 0.9720 acc_train: 0.4015 acc_val: 0.7006
Epoch: 0002 loss_train: 0.8029 acc_train: 0.6653 acc_val: 0.1331
Epoch: 0003 loss_train: 0.6505 acc_train: 0.6756 acc_val: 0.0310
Epoch: 0004 loss_train: 0.6069 acc_train: 0.6891 acc_val: 0.0867
Epoch: 0005 loss_train: 0.5870 acc_train: 0.6864 acc_val: 0.3201
Epoch: 0006 loss_train: 0.5925 acc_train: 0.6719 acc_val: 0.4912
Epoch: 0007 loss_train: 0.5670 acc_train: 0.7050 acc_val: 0.4007
Epoch: 0008 loss_train: 0.5635 acc_train: 0.7224 acc_val: 0.2936
Epoch: 0009 loss_train: 0.5555 acc_train: 0.7278 acc_val: 0.2934
Epoch: 0010 loss_train: 0.5498 acc_train: 0.7303 acc_val: 0.3674
Epoch: 0011 loss_train: 0.5420 acc_train: 0.7191 acc_val: 0.5009
Epoch: 0012 loss_train: 0.5416 acc_train: 0.7100 acc_val: 0.5578
Epoch: 0013 loss_train: 0.5238 acc_train: 0.7318 acc_val: 0.4358
Epoch: 0014 loss_train: 0.5221 acc_train: 0.7390 acc_val: 0.3364
Epoch: 0015 loss_train: 0.5227 acc_train: 0.7413 acc_val: 0.3194
Epoch: 0016 loss_t

FOLLOWING

In [9]:
epochs = 50
#(279886x4 and 6x10)
for epoch in range(epochs):
    train_gat(epoch, optimizer, following_edge_index)
    
test_gat(following_edge_index)

Epoch: 0001 loss_train: 0.4775 acc_train: 0.7678 acc_val: 0.3695
Epoch: 0002 loss_train: 0.4758 acc_train: 0.7689 acc_val: 0.3833
Epoch: 0003 loss_train: 0.4748 acc_train: 0.7693 acc_val: 0.3887
Epoch: 0004 loss_train: 0.4721 acc_train: 0.7708 acc_val: 0.3917
Epoch: 0005 loss_train: 0.4700 acc_train: 0.7720 acc_val: 0.4015
Epoch: 0006 loss_train: 0.4672 acc_train: 0.7731 acc_val: 0.4118
Epoch: 0007 loss_train: 0.4651 acc_train: 0.7746 acc_val: 0.4234
Epoch: 0008 loss_train: 0.4622 acc_train: 0.7747 acc_val: 0.4373
Epoch: 0009 loss_train: 0.4594 acc_train: 0.7752 acc_val: 0.4373
Epoch: 0010 loss_train: 0.4562 acc_train: 0.7751 acc_val: 0.4372
Epoch: 0011 loss_train: 0.4532 acc_train: 0.7745 acc_val: 0.4521
Epoch: 0012 loss_train: 0.4497 acc_train: 0.7760 acc_val: 0.4464
Epoch: 0013 loss_train: 0.4465 acc_train: 0.7754 acc_val: 0.4564
Epoch: 0014 loss_train: 0.4433 acc_train: 0.7770 acc_val: 0.4743
Epoch: 0015 loss_train: 0.4395 acc_train: 0.7782 acc_val: 0.4627
Epoch: 0016 loss_train: 0

INTERACTIONS

In [10]:
epochs = 50

for epoch in range(epochs):
    train_gat(epoch, optimizer, interaction_edge_index)
    
test_gat(interaction_edge_index)

Epoch: 0001 loss_train: 0.2772 acc_train: 0.8868 acc_val: 0.7508
Epoch: 0002 loss_train: 0.2760 acc_train: 0.8883 acc_val: 0.7338
Epoch: 0003 loss_train: 0.2769 acc_train: 0.8861 acc_val: 0.7671
Epoch: 0004 loss_train: 0.2760 acc_train: 0.8885 acc_val: 0.6883
Epoch: 0005 loss_train: 0.2753 acc_train: 0.8875 acc_val: 0.7646
Epoch: 0006 loss_train: 0.2687 acc_train: 0.8900 acc_val: 0.7263
Epoch: 0007 loss_train: 0.2733 acc_train: 0.8882 acc_val: 0.6640
Epoch: 0008 loss_train: 0.2677 acc_train: 0.8901 acc_val: 0.7426
Epoch: 0009 loss_train: 0.2671 acc_train: 0.8904 acc_val: 0.7419
Epoch: 0010 loss_train: 0.2657 acc_train: 0.8914 acc_val: 0.6842
Epoch: 0011 loss_train: 0.2648 acc_train: 0.8915 acc_val: 0.6870
Epoch: 0012 loss_train: 0.2632 acc_train: 0.8909 acc_val: 0.7485
Epoch: 0013 loss_train: 0.2631 acc_train: 0.8909 acc_val: 0.7611
Epoch: 0014 loss_train: 0.2610 acc_train: 0.8936 acc_val: 0.7183
Epoch: 0015 loss_train: 0.2615 acc_train: 0.8935 acc_val: 0.7052
Epoch: 0016 loss_train: 0

COMBINED FOLLOWER_FOLLOWING

In [11]:
epochs = 50

for epoch in range(epochs):
    train_gat(epoch, optimizer, combined_edge_index)
    
test_gat(combined_edge_index)

Epoch: 0001 loss_train: 0.4256 acc_train: 0.8210 acc_val: 0.5611
Epoch: 0002 loss_train: 0.5415 acc_train: 0.7221 acc_val: 0.8891
Epoch: 0003 loss_train: 0.4024 acc_train: 0.8322 acc_val: 0.5817
Epoch: 0004 loss_train: 0.4811 acc_train: 0.7839 acc_val: 0.3616
Epoch: 0005 loss_train: 0.4040 acc_train: 0.8198 acc_val: 0.7097
Epoch: 0006 loss_train: 0.4658 acc_train: 0.7882 acc_val: 0.8478
Epoch: 0007 loss_train: 0.4087 acc_train: 0.8273 acc_val: 0.7269
Epoch: 0008 loss_train: 0.4120 acc_train: 0.8065 acc_val: 0.4635
Epoch: 0009 loss_train: 0.4266 acc_train: 0.8042 acc_val: 0.4221
Epoch: 0010 loss_train: 0.4008 acc_train: 0.8504 acc_val: 0.6126
Epoch: 0011 loss_train: 0.4002 acc_train: 0.8488 acc_val: 0.7168
Epoch: 0012 loss_train: 0.3929 acc_train: 0.8346 acc_val: 0.6697
Epoch: 0013 loss_train: 0.4004 acc_train: 0.8282 acc_val: 0.6419
Epoch: 0014 loss_train: 0.3757 acc_train: 0.8452 acc_val: 0.6923
Epoch: 0015 loss_train: 0.3846 acc_train: 0.8461 acc_val: 0.6867
Epoch: 0016 loss_train: 0

COMBINED ALL

In [12]:
epochs = 50

for epoch in range(epochs):
    train_gat(epoch, optimizer, all_edge_index)
    
test_gat(all_edge_index)

Epoch: 0001 loss_train: 0.3272 acc_train: 0.8768 acc_val: 0.7474
Epoch: 0002 loss_train: 0.3307 acc_train: 0.8787 acc_val: 0.7673
Epoch: 0003 loss_train: 0.3300 acc_train: 0.8774 acc_val: 0.7184
Epoch: 0004 loss_train: 0.3254 acc_train: 0.8794 acc_val: 0.7559
Epoch: 0005 loss_train: 0.3259 acc_train: 0.8793 acc_val: 0.7634
Epoch: 0006 loss_train: 0.3256 acc_train: 0.8790 acc_val: 0.7318
Epoch: 0007 loss_train: 0.3251 acc_train: 0.8810 acc_val: 0.7704
Epoch: 0008 loss_train: 0.3233 acc_train: 0.8799 acc_val: 0.7365
Epoch: 0009 loss_train: 0.3213 acc_train: 0.8816 acc_val: 0.7533
Epoch: 0010 loss_train: 0.3219 acc_train: 0.8807 acc_val: 0.7637
Epoch: 0011 loss_train: 0.3236 acc_train: 0.8797 acc_val: 0.7233
Epoch: 0012 loss_train: 0.3247 acc_train: 0.8833 acc_val: 0.7726
Epoch: 0013 loss_train: 0.3272 acc_train: 0.8782 acc_val: 0.7162
Epoch: 0014 loss_train: 0.3216 acc_train: 0.8823 acc_val: 0.7724
Epoch: 0015 loss_train: 0.3186 acc_train: 0.8839 acc_val: 0.7401
Epoch: 0016 loss_train: 0