# `Keeping the comparison GNC and GIN separately improving GNN by applying advanced Feature Engineering + Improved GNN`

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, GINConv, global_mean_pool
from torch_geometric.loader import DataLoader
from sklearn.metrics import roc_auc_score
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class ToxGCN(nn.Module):
    def __init__(self, num_tasks):
        super().__init__()
        self.conv1 = GCNConv(1, 64)
        self.conv2 = GCNConv(64, 64)
        self.lin = nn.Linear(64, num_tasks)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = global_mean_pool(x, batch)
        return self.lin(x)

class ToxGIN(nn.Module):
    def __init__(self, num_tasks):
        super().__init__()
        nn1 = nn.Sequential(nn.Linear(1, 64), nn.ReLU(), nn.Linear(64, 64))
        self.conv1 = GINConv(nn1)
        nn2 = nn.Sequential(nn.Linear(64, 64), nn.ReLU(), nn.Linear(64, 64))
        self.conv2 = GINConv(nn2)
        self.lin = nn.Linear(64, num_tasks)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        x = F.relu(self.conv1(x, edge_index))
        x = F.relu(self.conv2(x, edge_index))
        x = global_mean_pool(x, batch)
        return self.lin(x)

def train(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for data in loader:
        data = data.to(device)
        optimizer.zero_grad()
        out = model(data)

        y = data.y.to(torch.float32)
        mask = y != -1

        y = y.view(out.shape)
        mask = mask.view(out.shape)

        loss = criterion(out[mask], y[mask])
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def evaluate(model, loader):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            out = model(data)

            y = data.y.to(torch.float32)
            y = y.view(out.shape)

            mask = y != -1
            y_true.append(y[mask].cpu())
            y_pred.append(out[mask].cpu())

    y_true = torch.cat(y_true, dim=0).numpy()
    y_pred = torch.cat(y_pred, dim=0).numpy()

    if len(y_true.shape) == 1:
        y_true = y_true[:, np.newaxis]
        y_pred = y_pred[:, np.newaxis]

    aucs = []
    for i in range(y_true.shape[1]):
        try:
            auc = roc_auc_score(y_true[:, i], y_pred[:, i])
            aucs.append(auc)
        except ValueError:
            continue

    return sum(aucs) / len(aucs) if aucs else 0.0

def run_experiment(ModelClass, name):
    model = ModelClass(num_tasks=12).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.BCEWithLogitsLoss()

    for epoch in range(1, 21):
        loss = train(model, train_loader, optimizer, criterion)
        val_auc = evaluate(model, val_loader)
        print(f"[{name}] Epoch {epoch:02d} | Loss: {loss:.4f} | Val AUC: {val_auc:.4f}")

    test_auc = evaluate(model, test_loader)
    print(f"[{name}] Test ROC-AUC: {test_auc:.4f}\n")
    return model, test_auc

model_gcn, auc_gcn = run_experiment(ToxGCN, "GCN")
model_gin, auc_gin = run_experiment(ToxGIN, "GIN")

print(f"GCN Test ROC-AUC: {auc_gcn:.4f}")
print(f"GIN Test ROC-AUC: {auc_gin:.4f}")

In [11]:
import os
os.chdir('/content') # This takes you to the root of the Colab environment

In [12]:
!mkdir -p Toxicity_prediction/notebooks
!mkdir -p Toxicity_prediction/data  # Optional
!mkdir -p Toxicity_prediction/models # Optional
!mkdir -p Toxicity_prediction/utils  # Optional

In [13]:
!mv ai_lab_testing/01_baseline_deepchem.ipynb Toxicity_prediction/notebooks/
!mv ai_lab_testing/02_gnn_baseline.ipynb Toxicity_prediction/notebooks/
!mv ai_lab_testing/03_gnn_advanced_features.ipynb Toxicity_prediction/notebooks/

mv: cannot stat 'ai_lab_testing/01_baseline_deepchem.ipynb': No such file or directory
mv: cannot stat 'ai_lab_testing/02_gnn_baseline.ipynb': No such file or directory
mv: cannot stat 'ai_lab_testing/03_gnn_advanced_features.ipynb': No such file or directory


In [14]:
import os
print(os.getcwd())

/content


In [15]:
!ls

sample_data  Toxicity_prediction


In [16]:
os.chdir('/content/Toxicity_prediction')
!ls
os.chdir('/content') # Go back to /content

data  models  notebooks  utils


In [17]:
import os
os.chdir('/content')

In [18]:
!mv ai_lab_testing/01_baseline_deepchem.ipynb Toxicity_prediction/notebooks/
!mv ai_lab_testing/02_gnn_baseline.ipynb Toxicity_prediction/notebooks/
!mv ai_lab_testing/03_gnn_advanced_features.ipynb Toxicity_prediction/notebooks/

mv: cannot stat 'ai_lab_testing/01_baseline_deepchem.ipynb': No such file or directory
mv: cannot stat 'ai_lab_testing/02_gnn_baseline.ipynb': No such file or directory
mv: cannot stat 'ai_lab_testing/03_gnn_advanced_features.ipynb': No such file or directory


In [19]:
os.chdir('/content/Toxicity_prediction')

In [20]:
!git init

[33mhint: Using 'master' as the name for the initial branch. This default branch name[m
[33mhint: is subject to change. To configure the initial branch name to use in all[m
[33mhint: [m
[33mhint: 	git config --global init.defaultBranch <name>[m
[33mhint: [m
[33mhint: Names commonly chosen instead of 'master' are 'main', 'trunk' and[m
[33mhint: 'development'. The just-created branch can be renamed via this command:[m
[33mhint: [m
[33mhint: 	git branch -m <name>[m
Initialized empty Git repository in /content/Toxicity_prediction/.git/


In [21]:
!git add .

In [22]:
!git config --global user.name "MreegendraNarayan"
!git config --global user.email "mreegendra2003211@gmail.com"

In [23]:
!git commit -m "Baseline model is created"

On branch master

Initial commit

nothing to commit (create/copy files and use "git add" to track)


In [24]:
!git remote add origin https://github.com/MreegendraNarayan/Toxicity_prediction.git

In [25]:
!git push origin main

error: src refspec main does not match any
[31merror: failed to push some refs to 'https://github.com/MreegendraNarayan/Toxicity_prediction.git'
[m

In [26]:
!git branch -v
!git log --oneline

fatal: your current branch 'master' does not have any commits yet
