In [13]:
 
# Temporal Graph Convolutional Network (TGCN)
# Depression Detection using BERT embeddings  
 

import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
from tqdm import tqdm

# ----------------------------------------------------------
# Step 1. Load BERT embeddings
# --------------------------------- 
INPUT_DIR = ""  

bert_train = np.load(os.path.join(INPUT_DIR, "BERT_X_train_emb.npy"))
bert_test  = np.load(os.path.join(INPUT_DIR, "BERT_X_test_emb.npy"))
y_train = np.load(os.path.join(INPUT_DIR, "BERT_y_train.npy"))
y_test  = np.load(os.path.join(INPUT_DIR, "BERT_y_test.npy"))

print("\n===== Data Info =====")
print(f"BERT train: {bert_train.shape}")
print(f"BERT test : {bert_test.shape}")

# Convert to torch tensors
x_train = torch.from_numpy(bert_train.astype(np.float32))
x_test  = torch.from_numpy(bert_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.int64))
y_test  = torch.from_numpy(y_test.astype(np.int64))

# Create validation split (10% of training)
val_size = int(0.1 * len(x_train))
x_val, y_val = x_train[-val_size:], y_train[-val_size:]
x_train, y_train = x_train[:-val_size], y_train[:-val_size]

# ----------------------------------------------------------
# Step 2. Create temporal sliding windows
# ----------------------------- 
WINDOW_SIZE = 10

def make_sequences(x_all, y_all, window=WINDOW_SIZE):
    seqs, labels = [], []
    for i in range(len(x_all) - window + 1):
        seqs.append(x_all[i:i+window])       
        labels.append(y_all[i+window-1])    
    return torch.stack(seqs), torch.stack(labels)

train_seq, train_y = make_sequences(x_train, y_train)
val_seq, val_y     = make_sequences(x_val, y_val)
test_seq, test_y   = make_sequences(x_test, y_test)

print(f"\nTrain seq: {train_seq.shape}, Val seq: {val_seq.shape}, Test seq: {test_seq.shape}")

# ----------------------------------------------------------
# Step 3. TemporalGCN model (auto node detection)
# ----------------------------------------------------------
class TemporalGCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, window_size, n_nodes=None):
        super().__init__()
        self.gcn = GCNConv(in_channels, hidden_channels)
        self.gru = nn.GRU(hidden_channels, hidden_channels, batch_first=True)
        self.fc = nn.Linear(hidden_channels, out_channels)
        self.window_size = window_size
        self.n_nodes = n_nodes
        self.edge_nodes = None   

    def _build_edges(self, n_nodes, device):
        src = torch.arange(n_nodes - 1)
        dst = torch.arange(1, n_nodes)
        edge_index = torch.stack([torch.cat([src, dst]), torch.cat([dst, src])]).to(device)
        return edge_index

    def forward(self, x_seq):
        """
        x_seq: [B, W, N, F]
        - GCN operates on N nodes per timestep
        - GRU models temporal sequence across W steps
        """
        B, W, N, Fdim = x_seq.shape
        device = x_seq.device

        # Build edges dynamically if needed
        if (self.edge_nodes is None) or (self.n_nodes != N):
            self.n_nodes = N
            self.edge_nodes = self._build_edges(N, device)

        outputs = []
        for b in range(B):
            temporal_feats = []
            for t in range(W):
                h_nodes = F.relu(self.gcn(x_seq[b, t], self.edge_nodes))   
                h_mean = h_nodes.mean(dim=0)   
                temporal_feats.append(h_mean.unsqueeze(0))
            temporal_feats = torch.cat(temporal_feats, dim=0).unsqueeze(0)   
            _, h_n = self.gru(temporal_feats)   
            out = self.fc(h_n.squeeze(0).squeeze(0))
            outputs.append(out)
        return torch.stack(outputs, dim=0)

# ----------------------------------------------------------
# Step 4. Training setup 



device = "cuda" if torch.cuda.is_available() else "cpu"
IN_DIM = bert_train.shape[2]   # 768

model = TemporalGCN(
    in_channels=IN_DIM,
    hidden_channels=128,
    out_channels=2,
    window_size=WINDOW_SIZE
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

def batched_iter(X, y, batch_size=8):
    for i in range(0, len(X), batch_size):
        yield X[i:i+batch_size], y[i:i+batch_size]

# ----------------------------------------------------------
# Step 5. Train loop
# ---------------- 
EPOCHS = 10
best_val_f1 = 0
best_state = None

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for xb, yb in batched_iter(train_seq, train_y):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(xb)
        loss = F.cross_entropy(out, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * len(xb)

    # Validation
    model.eval()
    preds = []
    with torch.no_grad():
        for xb, yb in batched_iter(val_seq, val_y):
            logits = model(xb.to(device))
            preds.append(logits.argmax(dim=1).cpu())
    val_pred = torch.cat(preds)
    val_acc = accuracy_score(val_y, val_pred)
    _, _, val_f1, _ = precision_recall_fscore_support(val_y, val_pred, average="binary")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        best_state = model.state_dict()

    print(f"Epoch {epoch+1:02d}/{EPOCHS} | Loss: {total_loss/len(train_seq):.4f} | Val F1: {val_f1:.4f}")

# ----------------------------- ------------ 
# Step 6. Final testing
# ----------------------------------- 
model.load_state_dict(best_state)
model.eval()
with torch.no_grad():
    preds = []
    for xb, yb in batched_iter(test_seq, test_y):
        logits = model(xb.to(device))
        preds.append(logits.argmax(dim=1).cpu())
    y_pred = torch.cat(preds)
    y_true = test_y

acc = accuracy_score(y_true, y_pred)
prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary")
cm = confusion_matrix(y_true, y_pred)

print("\n===== Final Test Evaluation =====")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"F1 Score : {f1:.4f}")
print("Confusion Matrix:\n", cm)



===== Data Info =====
BERT train: (6405, 64, 768)
BERT test : (1602, 64, 768)

Train seq: torch.Size([5756, 10, 64, 768]), Val seq: torch.Size([631, 10, 64, 768]), Test seq: torch.Size([1593, 10, 64, 768])
Epoch 01/10 | Loss: 0.6150 | Val F1: 0.6824
Epoch 02/10 | Loss: 0.5574 | Val F1: 0.6612
Epoch 03/10 | Loss: 0.5077 | Val F1: 0.6699
Epoch 04/10 | Loss: 0.4453 | Val F1: 0.6805
Epoch 05/10 | Loss: 0.3760 | Val F1: 0.6994
Epoch 06/10 | Loss: 0.3013 | Val F1: 0.7072
Epoch 07/10 | Loss: 0.2374 | Val F1: 0.7011
Epoch 08/10 | Loss: 0.1928 | Val F1: 0.7380
Epoch 09/10 | Loss: 0.1750 | Val F1: 0.7061
Epoch 10/10 | Loss: 0.1394 | Val F1: 0.7240

===== Final Test Evaluation =====
Accuracy : 0.7433
Precision: 0.7117
Recall   : 0.7456
F1 Score : 0.7282
Confusion Matrix:
 [[636 222]
 [187 548]]


In [14]:
# ==========================================================
# Temporal Graph Convolutional Network (TGCN)
# Depression Detection using T5 embeddings  

import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
from tqdm import tqdm

# -------------------------------------- -
# Step 1. Load T5 embeddings
# -------------------------------------------------- 
INPUT_DIR = ""  

t5_train = np.load(os.path.join(INPUT_DIR, "T5_X_train_emb.npy"))
t5_test  = np.load(os.path.join(INPUT_DIR, "T5_X_test_emb.npy"))
y_train  = np.load(os.path.join(INPUT_DIR, "BERT_y_train.npy"))
y_test   = np.load(os.path.join(INPUT_DIR, "BERT_y_test.npy"))

print("\n===== Data Info =====")
print(f"T5 train: {t5_train.shape}")
print(f"T5 test : {t5_test.shape}")

# Convert to torch tensors
x_train = torch.from_numpy(t5_train.astype(np.float32))
x_test  = torch.from_numpy(t5_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.int64))
y_test  = torch.from_numpy(y_test.astype(np.int64))

# Create validation split (10%)
val_size = int(0.1 * len(x_train))
x_val, y_val = x_train[-val_size:], y_train[-val_size:]
x_train, y_train = x_train[:-val_size], y_train[:-val_size]

# ----------------------------- 
# Step 2. Create temporal sliding windows
# ------------------------------------------- 
WINDOW_SIZE = 10

def make_sequences(x_all, y_all, window=WINDOW_SIZE):
    seqs, labels = [], []
    for i in range(len(x_all) - window + 1):
        seqs.append(x_all[i:i+window])       
        labels.append(y_all[i+window-1])    
    return torch.stack(seqs), torch.stack(labels)

train_seq, train_y = make_sequences(x_train, y_train)
val_seq, val_y     = make_sequences(x_val, y_val)
test_seq, test_y   = make_sequences(x_test, y_test)

print(f"\nTrain seq: {train_seq.shape}, Val seq: {val_seq.shape}, Test seq: {test_seq.shape}")

# ----------------------------------------------------------
# Step 3. TemporalGCN model (auto node detection 


class TemporalGCN(nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels, window_size, n_nodes=None):
        super().__init__()
        self.gcn = GCNConv(in_channels, hidden_channels)
        self.gru = nn.GRU(hidden_channels, hidden_channels, batch_first=True)
        self.fc = nn.Linear(hidden_channels, out_channels)
        self.window_size = window_size
        self.n_nodes = n_nodes
        self.edge_nodes = None   

    def _build_edges(self, n_nodes, device):
        src = torch.arange(n_nodes - 1)
        dst = torch.arange(1, n_nodes)
        edge_index = torch.stack([torch.cat([src, dst]), torch.cat([dst, src])]).to(device)
        return edge_index

    def forward(self, x_seq):
        """
        x_seq: [B, W, N, F]
        - GCN operates on N nodes per timestep
        - GRU models temporal sequence across W steps
        """
        B, W, N, Fdim = x_seq.shape
        device = x_seq.device

        # Build edges dynamically if needed
        if (self.edge_nodes is None) or (self.n_nodes != N):
            self.n_nodes = N
            self.edge_nodes = self._build_edges(N, device)

        outputs = []
        for b in range(B):
            temporal_feats = []
            for t in range(W):
                h_nodes = F.relu(self.gcn(x_seq[b, t], self.edge_nodes))   
                h_mean = h_nodes.mean(dim=0)   
                temporal_feats.append(h_mean.unsqueeze(0))
            temporal_feats = torch.cat(temporal_feats, dim=0).unsqueeze(0)   
            _, h_n = self.gru(temporal_feats)   
            out = self.fc(h_n.squeeze(0).squeeze(0))
            outputs.append(out)
        return torch.stack(outputs, dim=0)

# --------------------------- 
# Step 4. Training setup
# ------------------------- 
device = "cuda" if torch.cuda.is_available() else "cpu"
IN_DIM = t5_train.shape[2]   # should be 768

model = TemporalGCN(
    in_channels=IN_DIM,
    hidden_channels=128,
    out_channels=2,
    window_size=WINDOW_SIZE
).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

def batched_iter(X, y, batch_size=8):
    for i in range(0, len(X), batch_size):
        yield X[i:i+batch_size], y[i:i+batch_size]

# -------------------------------------- 
# Step 5. Training loop
# -------------------- 
EPOCHS = 10
best_val_f1 = 0
best_state = None

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0
    for xb, yb in batched_iter(train_seq, train_y):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        out = model(xb)
        loss = F.cross_entropy(out, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * len(xb)

    # Validation
    model.eval()
    preds = []
    with torch.no_grad():
        for xb, yb in batched_iter(val_seq, val_y):
            logits = model(xb.to(device))
            preds.append(logits.argmax(dim=1).cpu())
    val_pred = torch.cat(preds)
    val_acc = accuracy_score(val_y, val_pred)
    _, _, val_f1, _ = precision_recall_fscore_support(val_y, val_pred, average="binary")

    if val_f1 > best_val_f1:
        best_val_f1 = val_f1
        best_state = model.state_dict()

    print(f"Epoch {epoch+1:02d}/{EPOCHS} | Loss: {total_loss/len(train_seq):.4f} | Val F1: {val_f1:.4f}")

# --------------------------------- 
# Step 6. Final testing
# ------------------------------ 
model.load_state_dict(best_state)
model.eval()
with torch.no_grad():
    preds = []
    for xb, yb in batched_iter(test_seq, test_y):
        logits = model(xb.to(device))
        preds.append(logits.argmax(dim=1).cpu())
    y_pred = torch.cat(preds)
    y_true = test_y

acc = accuracy_score(y_true, y_pred)
prec, rec, f1, _ = precision_recall_fscore_support(y_true, y_pred, average="binary")
cm = confusion_matrix(y_true, y_pred)

print("\n===== Final Test Evaluation =====")
print(f"Accuracy : {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall   : {rec:.4f}")
print(f"F1 Score : {f1:.4f}")
print("Confusion Matrix:\n", cm)



===== Data Info =====
T5 train: (6405, 64, 768)
T5 test : (1602, 64, 768)

Train seq: torch.Size([5756, 10, 64, 768]), Val seq: torch.Size([631, 10, 64, 768]), Test seq: torch.Size([1593, 10, 64, 768])
Epoch 01/10 | Loss: 0.6202 | Val F1: 0.6615
Epoch 02/10 | Loss: 0.5440 | Val F1: 0.6826
Epoch 03/10 | Loss: 0.4696 | Val F1: 0.7158
Epoch 04/10 | Loss: 0.3877 | Val F1: 0.7172
Epoch 05/10 | Loss: 0.3074 | Val F1: 0.7249
Epoch 06/10 | Loss: 0.2270 | Val F1: 0.7474
Epoch 07/10 | Loss: 0.1875 | Val F1: 0.7561
Epoch 08/10 | Loss: 0.1599 | Val F1: 0.7710
Epoch 09/10 | Loss: 0.1078 | Val F1: 0.7752
Epoch 10/10 | Loss: 0.1067 | Val F1: 0.7690

===== Final Test Evaluation =====
Accuracy : 0.7784
Precision: 0.7553
Recall   : 0.7687
F1 Score : 0.7620
Confusion Matrix:
 [[675 183]
 [170 565]]
