In [1]:
import torch
from torch import nn

from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_curve,auc

from sklearn.model_selection import train_test_split

from rgcn-model import BotRGCN
from utils import accuracy, init_weights

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = 'cpu'
embedding_size = 32
dropout = 0.1
lr = 1e-2
weight_decay = 5e-2

In [3]:
#Re calculate Des Tensor
path = "/Dataset/"
des_tensor = torch.load(path + "filtered_des_tensor.pt").t().to(device)

num_prop = torch.load(path + "filtered_num_properties_tensor.pt").t().to(device)
category_prop = torch.load(path + "filtered_cat_properties_tensor.pt").t().to(device)
labels = torch.load(path + "filtered_label.pt").t().to(device)

tweets_tensor = torch.load("/Users/ketanjadhav/Documents/BotRGCN/processed_data/tweets_tensor.pt").t().to(device)

train_idx = torch.load(path + "filtered_train_idx.pt").to(device)
val_idx = torch.load(path + "filtered_val_idx.pt").to(device)
test_idx = torch.load(path + "filtered_test_idx.pt").to(device)

In [4]:
edge_index = torch.load(path + "filtered_edge_index.pt").to(device)
edge_type = torch.load(path + "filtered_edge_type.pt").to(device)

# perm = torch.randperm(edge_index.size(1))

# Shuffle edge_index and edge_type tensors using the same permutation
# shuffled_edge_index = edge_index[:, perm]
# shuffled_edge_type = edge_type[perm]

In [5]:
print(num_prop)
print(category_prop)
print(edge_index)
print(edge_type)
print(des_tensor)
# print(shuffled_edge_index)
# print(shuffled_edge_type)

tensor([[-0.0563, -1.1613, -0.1290, -0.1546],
        [-0.0683,  0.0780, -0.0735, -0.1652],
        [-0.0685, -1.2439, -0.1387, -0.1798],
        ...,
        [-0.0645,  0.5017, -0.1154,  2.0183],
        [-0.0666,  1.2083, -0.0832, -0.1526],
        [-0.0682,  0.4337, -0.1212,  3.6935]])
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        ...,
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
tensor([[204015, 129076, 129076,  ..., 164201, 167506, 181147],
        [ 53479, 129501, 120774,  ...,  31536, 257550, 159618]])
tensor([0, 0, 0,  ..., 1, 1, 1])
tensor([[-0.0212,  0.1255, -0.0082,  ..., -0.0461, -0.0250, -0.1196],
        [-0.0107, -0.0601,  0.0522,  ..., -0.0407, -0.0450, -0.1029],
        [-0.0375,  0.0321,  0.0392,  ..., -0.1060, -0.0376, -0.0408],
        ...,
        [-0.0551,  0.0950,  0.0320,  ..., -0.1207,  0.0556, -0.0173],
        [ 0.0541, -0.0424,  0.0753,  ..., -0.3016,  0.1198, -0.0377],
        [-0.0848,  0.2171, -0.0835,  ..

In [6]:
print(num_prop.shape)
print(category_prop.shape)
print(edge_index.shape)
print(edge_type.shape)
print(des_tensor.shape)
# print(shuffled_edge_index.shape)
# print(shuffled_edge_type.shape)

torch.Size([279886, 4])
torch.Size([279886, 3])
torch.Size([2, 2298309])
torch.Size([2298309])
torch.Size([279886, 768])


In [12]:
rgcn_model=BotRGCN(cat_prop_size=3,embedding_dimension=30, num_relations=3).to(device)
loss=nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(rgcn_model.parameters(),
                    lr=lr,weight_decay=weight_decay)

rgcn_model.apply(init_weights)

BotRGCN(
  (linear_relu_des): Sequential(
    (0): Linear(in_features=768, out_features=10, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_relu_num_prop): Sequential(
    (0): Linear(in_features=4, out_features=10, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_relu_cat_prop): Sequential(
    (0): Linear(in_features=3, out_features=10, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_relu_input): Sequential(
    (0): Linear(in_features=30, out_features=30, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (rgcn): RGCNConv(30, 30, num_relations=3)
  (linear_relu_output1): Sequential(
    (0): Linear(in_features=30, out_features=30, bias=True)
    (1): LeakyReLU(negative_slope=0.01)
  )
  (linear_output2): Linear(in_features=30, out_features=2, bias=True)
)

In [8]:
def train_rgcn(epoch, optimizer, train_edge_index, edge_type):
    rgcn_model.train()
    output = rgcn_model(des_tensor, tweets_tensor, num_prop, category_prop, train_edge_index, edge_type)
    loss_train = loss(output[train_idx], labels[train_idx])
    acc_train = accuracy(output[train_idx], labels[train_idx])
    acc_val = accuracy(output[val_idx], labels[val_idx])
    
    optimizer.zero_grad()
    loss_train.backward()
    optimizer.step()
    
    print('Epoch: {:04d}'.format(epoch+1),
          'loss_train: {:.4f}'.format(loss_train.item()),
          'acc_train: {:.4f}'.format(acc_train.item()),
          'acc_val: {:.4f}'.format(acc_val.item()))
    
    return acc_train, loss_train

In [9]:
import numpy as np

def test_rgcn(test_edge_index, edge_type):
    rgcn_model.eval()
    output = rgcn_model(des_tensor, tweets_tensor, num_prop, category_prop, test_edge_index, edge_type)
    loss_test = loss(output[test_idx], labels[test_idx])
    acc_test = accuracy(output[test_idx], labels[test_idx])
    
    output = output.max(1)[1].to('cpu').detach().numpy()
    label = labels.to('cpu').detach().numpy()
    
    f1 = f1_score(label[test_idx], output[test_idx])
    precision = precision_score(label[test_idx], output[test_idx])
    recall = recall_score(label[test_idx], output[test_idx])
    
    fpr, tpr, thresholds = roc_curve(label[test_idx], output[test_idx], pos_label=1)
    auc_val = auc(fpr, tpr)
    
    print("Test set results:",
          "test_loss= {:.4f}".format(loss_test.item()),
          "test_accuracy= {:.4f}".format(acc_test.item()),
          "precision= {:.4f}".format(precision.item()),
          "recall= {:.4f}".format(recall.item()),
          "f1_score= {:.4f}".format(f1.item()),
          "auc= {:.4f}".format(auc_val.item()))

In [10]:
# #In case we want to split edges

# num_edges = edge_index.size(1)
# indices = torch.arange(num_edges)

# train_indices, test_indices = train_test_split(indices.numpy(), test_size=0.2, random_state=42)

# train_edge_index = edge_index[:, train_indices]
# test_edge_index = edge_index[:, test_indices]

FOLLOWER-FOLLOWING

In [11]:
epochs = 50

for epoch in range(epochs):
    train_rgcn(epoch, optimizer, edge_index, edge_type)
    
test_rgcn(edge_index, edge_type)

Epoch: 0001 loss_train: 1.5536 acc_train: 0.4790 acc_val: 0.4852
Epoch: 0002 loss_train: 1.4190 acc_train: 0.6676 acc_val: 0.0097
Epoch: 0003 loss_train: 0.8478 acc_train: 0.6708 acc_val: 0.0304
Epoch: 0004 loss_train: 0.6673 acc_train: 0.6546 acc_val: 0.2120
Epoch: 0005 loss_train: 0.6580 acc_train: 0.6288 acc_val: 0.4957
Epoch: 0006 loss_train: 0.6364 acc_train: 0.6501 acc_val: 0.5496
Epoch: 0007 loss_train: 0.5994 acc_train: 0.6980 acc_val: 0.4374
Epoch: 0008 loss_train: 0.5664 acc_train: 0.7202 acc_val: 0.3138
Epoch: 0009 loss_train: 0.5492 acc_train: 0.7276 acc_val: 0.2489
Epoch: 0010 loss_train: 0.5372 acc_train: 0.7371 acc_val: 0.2651
Epoch: 0011 loss_train: 0.5186 acc_train: 0.7511 acc_val: 0.3615
Epoch: 0012 loss_train: 0.5052 acc_train: 0.7568 acc_val: 0.4932
Epoch: 0013 loss_train: 0.4943 acc_train: 0.7631 acc_val: 0.5583
Epoch: 0014 loss_train: 0.4737 acc_train: 0.7807 acc_val: 0.5688
Epoch: 0015 loss_train: 0.4580 acc_train: 0.7952 acc_val: 0.5585
Epoch: 0016 loss_train: 0

ALL

In [13]:
all_edge_index = torch.load(path + "all_combined_edge_index.pt").to(device)
all_edge_type = torch.load(path + "all_combined_edge_type.pt").to(device)


In [14]:
epochs = 50

for epoch in range(epochs):
    train_rgcn(epoch, optimizer, all_edge_index, all_edge_type)
    
test_rgcn(all_edge_index, all_edge_type)

Epoch: 0001 loss_train: 0.8601 acc_train: 0.6176 acc_val: 0.1929
Epoch: 0002 loss_train: 1.0340 acc_train: 0.4519 acc_val: 0.8656
Epoch: 0003 loss_train: 0.6664 acc_train: 0.6601 acc_val: 0.2434
Epoch: 0004 loss_train: 0.6968 acc_train: 0.6734 acc_val: 0.1209
Epoch: 0005 loss_train: 0.6157 acc_train: 0.6852 acc_val: 0.1539
Epoch: 0006 loss_train: 0.5631 acc_train: 0.7067 acc_val: 0.4353
Epoch: 0007 loss_train: 0.5507 acc_train: 0.7230 acc_val: 0.5624
Epoch: 0008 loss_train: 0.5009 acc_train: 0.7631 acc_val: 0.4811
Epoch: 0009 loss_train: 0.4837 acc_train: 0.7803 acc_val: 0.4060
Epoch: 0010 loss_train: 0.4569 acc_train: 0.7936 acc_val: 0.4699
Epoch: 0011 loss_train: 0.4372 acc_train: 0.8017 acc_val: 0.5795
Epoch: 0012 loss_train: 0.4246 acc_train: 0.8078 acc_val: 0.6336
Epoch: 0013 loss_train: 0.4026 acc_train: 0.8222 acc_val: 0.6199
Epoch: 0014 loss_train: 0.3883 acc_train: 0.8318 acc_val: 0.5863
Epoch: 0015 loss_train: 0.3788 acc_train: 0.8374 acc_val: 0.5891
Epoch: 0016 loss_train: 0