In [2]:
!pip install torch-geometric

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting torch-geometric
  Downloading torch_geometric-2.3.1.tar.gz (661 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/661.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m661.6/661.6 kB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: torch-geometric
  Building wheel for torch-geometric (pyproject.toml) ... [?25l[?25hdone
  Created wheel for torch-geometric: filename=torch_geometric-2.3.1-py3-none-any.whl size=910476 sha256=c3b79163614f4a85373ac81f163f2162ceac1fbd401298cb4a5e7e6dd6c7dde7
  Stored in directory: /root/.cache/pip/wheels/ac/dc/30/e2874821ff308ee67dcd7a66dbde912411e19e35a1addda028
Su

In [4]:
from google.colab import drive
drive.mount('/content/drive')

import os
os.chdir('/content/drive/My Drive/social_network')

Mounted at /content/drive


In [28]:
import pandas as pd
import torch
from torch_geometric.data import Data
from torch_geometric.utils import from_networkx

In [29]:
users_df = pd.read_csv('users_1.csv')
edges_df = pd.read_csv('edges_1.csv')
df = pd.read_csv('df_1.csv')

In [30]:
node_features = df[['comment_karma', 'link_karma', 'avg_upvotes']].values

In [32]:
edges = [(username_to_index[source], username_to_index[dest]) for source, dest in zip(edges_df['source'], edges_df['dest'])]

In [33]:
import networkx as nx

# Create an empty graph
graph = nx.Graph()

# Add nodes with features
for index, row in df.iterrows():
    graph.add_node(index, **row[1:-1].to_dict())

# Add edges
graph.add_edges_from(edges)

# Convert to PyTorch Geometric Data object
data = from_networkx(graph)
data.x = torch.tensor(node_features, dtype=torch.float)

In [35]:
from torch_geometric.utils import train_test_split_edges

data = train_test_split_edges(data, val_ratio=0.05, test_ratio=0.1)



In [36]:
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super().__init__()
        self.conv1 = GCNConv(in_channels, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index).relu()
        return self.conv2(x, edge_index)

model = Net(node_features.shape[1], 128, 64)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
data = data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [37]:
def train():
    model.train()
    optimizer.zero_grad()
    out = model(data.x, data.train_pos_edge_index)
    loss = torch.nn.functional.binary_cross_entropy_with_logits(out, data.train_y)
    loss.backward()
    optimizer.step()
    return loss.item()

@torch.no_grad()
def test():
    model.eval()
    out = model(data.x, data.test_pos_edge_index)
    y_true = data.test_y
    y_pred = (torch.sigmoid(out) > 0.5).float()
    accuracy = (y_pred == y_true).sum().item() / y_true.size(0)
    return accuracy

In [None]:
for epoch in range(1, 101):
    loss = train()
    accuracy = test()
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}')

In [None]:
model.eval()
out = model(data.x, data.test_pos_edge_index)
probabilities = torch.sigmoid(out)

# Extract the positive edges from the test set
test_pos_edges = data.test_pos_edge_index.t().cpu().numpy()

# Filter out the already existing edges in the test set
predicted_edges = []
for i, prob in enumerate(probabilities):
    src, dst = test_pos_edges[i]
    if (src, dst) not in edges:
        predicted_edges.append((src, dst, prob.item()))

# Sort the predicted edges based on the probabilities in descending order
predicted_edges = sorted(predicted_edges, key=lambda x: x[2], reverse=True)

# Print the top 10 predicted edges
print("Top 10 Predicted Edges:")
for src, dst, prob in predicted_edges[:10]:
    username_src = df.loc[src, 'username']
    username_dst = df.loc[dst, 'username']
    print(f"{username_src} --> {username_dst} (Probability: {prob:.4f})")