In [5]:
# import torch

# # Load data
# features = torch.load('MGTAB/features.pt')
# labels = torch.load('MGTAB/labels_bot.pt')
# edge_index = torch.load('MGTAB/edge_index.pt')

# print(f"User Feature Matrix: {features.shape}") # (Number of users, Feature dimension)
# print(f"Bot Labels: {labels.shape}")           # 0 for human, 1 for bot

In [6]:
import torch
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE


In [7]:
import torch

features = torch.load("./MGTAB/features.pt")          # shape: [num_users, feature_dim]
edge_index = torch.load("./MGTAB/edge_index.pt")      # shape: [2, num_edges]
edge_weight = torch.load("./MGTAB/edge_weight.pt")    # shape: [num_edges]
labels = torch.load("./MGTAB/labels_bot.pt")           # 0 = human, 1 = bot

print(features.shape, labels.shape)


torch.Size([10199, 788]) torch.Size([10199])


In [8]:
X_raw = features.numpy()
y = labels.numpy()
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = scaler.fit_transform(X_raw)

In [9]:
num_users = X.shape[0]

G = nx.Graph()

# ðŸ”‘ ADD ALL NODES FIRST (CRITICAL)
G.add_nodes_from(range(num_users))

edges = edge_index.numpy().T

for i, (u, v) in enumerate(edges):
    u = int(u)
    v = int(v)
    w = edge_weight[i].item() if edge_weight is not None else 1.0
    G.add_edge(u, v, weight=w)

print("Graph nodes:", G.number_of_nodes())
print("Graph edges:", G.number_of_edges())


Graph nodes: 10199
Graph edges: 908460


In [10]:
def graph_features(G, num_nodes):
    degree_dict = dict(G.degree())
    clustering_dict = nx.clustering(G)

    degree = np.zeros(num_nodes)
    clustering = np.zeros(num_nodes)

    for node in range(num_nodes):
        degree[node] = degree_dict.get(node, 0)
        clustering[node] = clustering_dict.get(node, 0.0)

    return np.column_stack([degree, clustering])


In [11]:
X_graph = graph_features(G, X.shape[0])
print(X_graph.shape)


(10199, 2)


In [12]:
X_final = np.hstack([X, X_graph])

print("X_final shape:", X_final.shape)
print("y shape:", y.shape)
print("Bot ratio:", y.mean())

X_final shape: (10199, 790)
y shape: (10199,)
Bot ratio: 0.26943818021374644


In [16]:
OUT_X = "X_mgtab_large.npy"
OUT_Y = "y_mgtab_large.npy"
np.save(OUT_X, X_final)
np.save(OUT_Y, y)

In [13]:
# idx = np.random.choice(len(X_final), size=3000, replace=False)
# X_sub = X_final[idx]
# y_sub = y[idx]

# tsne = TSNE(n_components=2, perplexity=30, random_state=42)
# X_tsne = tsne.fit_transform(X_sub)

# plt.figure(figsize=(7,5))
# plt.scatter(X_tsne[y_sub==0,0], X_tsne[y_sub==0,1], s=8, alpha=0.5, label="Human")
# plt.scatter(X_tsne[y_sub==1,0], X_tsne[y_sub==1,1], s=8, alpha=0.5, label="Bot")
# plt.legend()
# plt.title("t-SNE Visualization of MGTAB Features")
# plt.show()


In [14]:
# import random
# import networkx as nx

# bot_nodes = np.where(y == 1)[0]
# center = random.choice(bot_nodes)

# ego = nx.ego_graph(G, center, radius=2)

# plt.figure(figsize=(6,6))
# nx.draw(
#     ego,
#     node_size=40,
#     node_color=["red" if y[n]==1 else "blue" for n in ego.nodes()],
#     with_labels=False
# )
# plt.title("Bot-Centered  Network")
# plt.show()

In [15]:
# import seaborn as sns
# import pandas as pd

# df_plot = pd.DataFrame({
#     "degree": X_graph[:,0],
#     "label": y
# })

# plt.figure(figsize=(6,4))
# sns.boxplot(x="label", y="degree", data=df_plot)
# plt.xticks([0,1], ["Human", "Bot"])
# plt.title("Network Degree Comparison")
# plt.show()
