In [6]:

import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
from scipy.sparse import coo_matrix, csr_matrix
from sklearn.preprocessing import StandardScaler

# === HGNN Layer ===
class HGNNConv(nn.Module):
    def __init__(self, in_features, out_features):
        super(HGNNConv, self).__init__()
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, x, G):
        x = self.linear(x)
        x = torch.spmm(G, x)  # Sparse matrix multiplication
        return F.relu(x)

# === HGNN Model ===
class HGNN(nn.Module):
    def __init__(self, in_features, hidden_dim, out_features):
        super(HGNN, self).__init__()
        self.layer1 = HGNNConv(in_features, hidden_dim)
        self.layer2 = HGNNConv(hidden_dim, out_features)

    def forward(self, x, G):
        x = self.layer1(x, G)
        x = self.layer2(x, G)
        return x

# === Build Hypergraph Incidence Matrix ===
def build_incidence_matrix(num_nodes, edge_index):
    rows, cols = [], []
    for i, (u, v) in enumerate(edge_index):
        rows += [u, v]
        cols += [i, i]
    data = np.ones(len(rows))
    H = coo_matrix((data, (rows, cols)), shape=(num_nodes, len(edge_index))).tocoo()
    return H

def build_incidence_matrix_from_adjacency(adj_matrix):
    # Ensure symmetry
    adj_matrix = np.maximum(adj_matrix, adj_matrix.T)

    n = adj_matrix.shape[0]
    edge_index = []

    # Add self-loops for isolated nodes
    isolated_nodes = []
    for i in range(n):
        connected = False
        for j in range(n):
            if i != j and adj_matrix[i, j] != 0:
                edge_index.append([i, j])
                connected = True
        if not connected:
            isolated_nodes.append(i)

    # Add self-loop to isolated nodes
    for node in isolated_nodes:
        edge_index.append([node, node])

    print(f"Added self-loops for {len(isolated_nodes)} isolated nodes.")
    return build_incidence_matrix(n, edge_index)

def normalize_incidence_matrix(H):
    H = H.astype(np.float32)
    Dv = np.array(H.sum(1)).flatten()
    De = np.array(H.sum(0)).flatten()

    Dv_inv_sqrt = 1.0 / np.sqrt(Dv + 1e-6)
    De_inv = 1.0 / (De + 1e-6)

    Dv_inv_sqrt_mat = coo_matrix((Dv_inv_sqrt, (np.arange(len(Dv)), np.arange(len(Dv)))), shape=(len(Dv), len(Dv)))
    De_inv_mat = coo_matrix((De_inv, (np.arange(len(De)), np.arange(len(De)))), shape=(len(De), len(De)))

    HT = H.transpose()
    G = Dv_inv_sqrt_mat @ H @ De_inv_mat @ HT @ Dv_inv_sqrt_mat
    return torch.FloatTensor(G.todense())

# === Load Preprocessed Data ===
def load_data(features_file, adjacency_file):
    features_df = pd.read_csv(features_file)
    drug_names = features_df.iloc[:, 0].values
    raw_features = features_df.iloc[:, 1:].values

    # Feature normalization
    scaler = StandardScaler()
    features = scaler.fit_transform(raw_features)
    features = torch.FloatTensor(features)

    adj_df = pd.read_csv(adjacency_file)
    adj_matrix = adj_df.iloc[:, 1:].values.astype(np.float32)

    return features, adj_matrix, drug_names

# === Training Function ===
def train_hgnn(features_csv='/content/all_drug_data_processed.csv',
               adjacency_csv='/content/DDR-HC-new.csv',
               hidden_dim=32, embedding_dim=16, epochs=5000):
    print("Loading data...")
    X, adj_matrix, drug_names = load_data(features_csv, adjacency_csv)

    print("Building hypergraph...")
    H = build_incidence_matrix_from_adjacency(adj_matrix)
    G = normalize_incidence_matrix(H)

    print("Initializing HGNN...")
    model = HGNN(X.shape[1], hidden_dim, embedding_dim)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    print("Training HGNN...")
    model.train()
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(X, G)

        # MSE loss between 16-dim output and input
        loss = F.mse_loss(out, X[:, :embedding_dim])
        loss.backward()
        optimizer.step()

        if epoch % 20 == 0:
            print(f"Epoch {epoch}, Loss: {loss.item():.4f}")

    embeddings = out.detach().numpy()

    # Save embeddings with drug names
    df_embed = pd.DataFrame(embeddings, columns=[f'embedding_{i}' for i in range(embedding_dim)])
    df_embed.insert(0, 'drug_name', drug_names)
    df_embed.to_csv("drug_embeddings.csv", index=False)
    print("Embeddings saved to drug_embeddings.csv")

if __name__ == '__main__':
    train_hgnn()


Loading data...
Building hypergraph...
Added self-loops for 67 isolated nodes.
Initializing HGNN...
Training HGNN...
Epoch 0, Loss: 1.0053
Epoch 20, Loss: 0.9483
Epoch 40, Loss: 0.8916
Epoch 60, Loss: 0.8290
Epoch 80, Loss: 0.7604
Epoch 100, Loss: 0.6925
Epoch 120, Loss: 0.6298
Epoch 140, Loss: 0.5675
Epoch 160, Loss: 0.5081
Epoch 180, Loss: 0.4508
Epoch 200, Loss: 0.4061
Epoch 220, Loss: 0.3716
Epoch 240, Loss: 0.3454
Epoch 260, Loss: 0.3253
Epoch 280, Loss: 0.3101
Epoch 300, Loss: 0.2983
Epoch 320, Loss: 0.2888
Epoch 340, Loss: 0.2808
Epoch 360, Loss: 0.2739
Epoch 380, Loss: 0.2679
Epoch 400, Loss: 0.2628
Epoch 420, Loss: 0.2583
Epoch 440, Loss: 0.2540
Epoch 460, Loss: 0.2503
Epoch 480, Loss: 0.2470
Epoch 500, Loss: 0.2441
Epoch 520, Loss: 0.2416
Epoch 540, Loss: 0.2394
Epoch 560, Loss: 0.2373
Epoch 580, Loss: 0.2355
Epoch 600, Loss: 0.2339
Epoch 620, Loss: 0.2324
Epoch 640, Loss: 0.2310
Epoch 660, Loss: 0.2297
Epoch 680, Loss: 0.2285
Epoch 700, Loss: 0.2273
Epoch 720, Loss: 0.2262
E