In [None]:
%pip install torch

In [8]:
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F

In [4]:
class Encoder(nn.Module):
    def __init__(self, in_dim, hidden_dim=64, out_dim=32):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, out_dim)
        )

    def forward(self, x):
        # x: (N, in_dim)
        return self.net(x)  # (N, out_dim)


In [24]:
class MPNNLayer(nn.Module):
    def __init__(self, dim):
        super().__init__()
        # Message function φ: R^dim -> R^dim
        self.msg_mlp = nn.Sequential(
            nn.Linear(dim, dim),
            nn.ReLU(),
            nn.Linear(dim, dim)
        )
        # Update function ψ: R^(2*dim) -> R^dim
        self.update_mlp = nn.Sequential(
            nn.Linear(2 * dim, dim),
            nn.ReLU(),
            nn.Linear(dim, dim)
        )

    def forward(self, H, edge_index):
        """
        H: (N, dim) node embeddings at current layer
        edge_index: (2, E) tensor with [src; dst]
        """
        src, dst = edge_index  # each: (E,)

        # 1. Messages from src nodes along edges
        h_src = H[src]              # (E, dim)
        m = self.msg_mlp(h_src)     # (E, dim)

        # 2. Aggregate messages at dst nodes by summation
        N, dim = H.shape
        agg = torch.zeros_like(H)   # (N, dim)
        agg.index_add_(0, dst, m)   # sum messages into dst indices

        # 3. Update node states using previous state + aggregated message
        h_cat = torch.cat([H, agg], dim=-1)  # (N, 2*dim)
        H_next = self.update_mlp(h_cat)      # (N, dim)

        return H_next


In [29]:
class Decoder(nn.Module):
    def __init__(self, in_dim, hidden_dim=32):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)   # output logit per node
        )

    def forward(self, H):
        # H: (N, in_dim)
        logits = self.net(H).squeeze(-1)  # (N,)
        return logits


In [30]:
class SocialGNN(nn.Module):
    def __init__(self, in_dim, hidden_dim=64, emb_dim=32, num_layers=3):
        super().__init__()
        self.encoder = Encoder(in_dim, hidden_dim, emb_dim)

        self.layers = nn.ModuleList([
            MPNNLayer(emb_dim) for _ in range(num_layers)
        ])

        self.decoder = Decoder(emb_dim, hidden_dim=hidden_dim)

    def forward(self, x, edge_index):
        """
        x: (N, in_dim) node features
        edge_index: (2, E) edge list
        """
        # 1. Encode features to latent space
        H = self.encoder(x)  # (N, emb_dim)

        # 2. Apply K message passing layers
        for layer in self.layers:
            H = layer(H, edge_index)

        # 3. Decode to logits
        logits = self.decoder(H)  # (N,)

        return logits


In [19]:
nodes_path = "./data/train/000/nodes.csv"
edges_path = "./data/train/000/edges.csv"

nodes_df = pd.read_csv(nodes_path)

# Separate features and labels
feature_names = [
    'normalized_degree',
    'clustering_coefficient',
    'posts_per_day',
    'likes_per_post',
    'follower_ratio',
    'age',
    'years_riding',
    'miles_ridden',
    'bikes_owned',
    'avg_displacement',
    'avg_msrp',
]


X_np = nodes_df[feature_names].values        # shape (N, 11)
y_np = nodes_df['label'].values             # shape (N,)

# Convert to torch tensors
X = torch.tensor(X_np, dtype=torch.float32)  # (N, 11)
y = torch.tensor(y_np, dtype=torch.float32)  # (N,)
print("X shape:", X.shape)
print("y shape:", y.shape)

# 2) Load edge data
edges_df = pd.read_csv(edges_path)          # expects 'src', 'dst' columns

src = torch.tensor(edges_df['src'].values, dtype=torch.long)
dst = torch.tensor(edges_df['dst'].values, dtype=torch.long)

edge_index = torch.stack([src, dst], dim=0)  # shape (2, E)

print("edge_index shape:", edge_index.shape)
print("First 5 edges:\n", edge_index[:, :5])

X shape: torch.Size([100, 11])
y shape: torch.Size([100])
edge_index shape: torch.Size([2, 291])
First 5 edges:
 tensor([[0, 0, 0, 0, 0],
        [1, 2, 3, 4, 5]])


In [None]:
in_dim = X.shape[1]      # should be 11
model = SocialGNN(in_dim=in_dim, hidden_dim=64, emb_dim=32, num_layers=3)

print(model)

SocialGNN(
  (encoder): Encoder(
    (net): Sequential(
      (0): Linear(in_features=11, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=32, bias=True)
    )
  )
  (layers): ModuleList(
    (0-2): 3 x MPNNLayer(
      (msg_mlp): Sequential(
        (0): Linear(in_features=32, out_features=32, bias=True)
        (1): ReLU()
        (2): Linear(in_features=32, out_features=32, bias=True)
      )
      (update_mlp): Sequential(
        (0): Linear(in_features=64, out_features=32, bias=True)
        (1): ReLU()
        (2): Linear(in_features=32, out_features=32, bias=True)
      )
    )
  )
  (decoder): Decoder(
    (net): Sequential(
      (0): Linear(in_features=32, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=1, bias=True)
    )
  )
)


In [32]:
model.eval()  # we're just inspecting, not training right now

with torch.no_grad():
    logits = model(X, edge_index)   # shape (N,)

print("Logits shape:", logits.shape)
print("First 5 logits:", logits[:5])


Logits shape: torch.Size([100])
First 5 logits: tensor([-0.5848, -1.0858, -1.6992, -1.0711, -2.4428])


In [33]:
probs = torch.sigmoid(logits)       # (N,)
pred_labels = (probs >= 0.5).float()  # (N,)

print("First 5 probabilities:", probs[:5])
print("First 5 predicted labels:", pred_labels[:5])
print("First 5 true labels:", y[:5])


First 5 probabilities: tensor([0.3578, 0.2524, 0.1546, 0.2552, 0.0800])
First 5 predicted labels: tensor([0., 0., 0., 0., 0.])
First 5 true labels: tensor([1., 1., 1., 0., 1.])
