In [1]:
pip install torch==2.5.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu124 

Looking in indexes: https://download.pytorch.org/whl/cu124
Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install dgl==1.1.2

Note: you may need to restart the kernel to use updated packages.


In [3]:
pip install torchdata==0.9.0

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install pandas

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install dgllife

Note: you may need to restart the kernel to use updated packages.


In [6]:
pip install rdkit

Note: you may need to restart the kernel to use updated packages.


In [7]:
pip install utils

Note: you may need to restart the kernel to use updated packages.


In [8]:
import torch
print("PyTorch version:", torch.__version__)
print("CUDA version:", torch.version.cuda)

cuda_available = torch.cuda.is_available()
print("CUDA available:", cuda_available)

if cuda_available:
    device = torch.cuda.get_device_properties(0)
    print("GPU name:", device.name)
    print("Total memory (GB):", round(device.total_memory / 1e9, 2))

PyTorch version: 2.5.1+cu124
CUDA version: 12.4
CUDA available: True
GPU name: NVIDIA L40
Total memory (GB): 47.58


In [9]:
import dgl
import numpy as np 
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import pandas as pd
from dgllife.model.gnn import GCN
from torch import Tensor
from torch.nn.utils.weight_norm import weight_norm
from typing import Optional
from typing import Tuple

In [10]:
# arguments for general hyperparameters, proteins, and smiles, going to args.py 
class Args:
    n_epochs   = 100
    batch_size = 64
    lr         = 0.0001
    seed       = 2048
    n_cpu      = 2
    shuffle    = True
    reg        = 5e-5
    drop       = 0.1
    S          = 'human'
    T          = 'biosnap'

    # Decoder args
    mlp_in_dim = 256
    mlp_hidden_dim = 512
    mlp_out_dim = 128
    binary = 1
args = Args()

# there are many more arguments to be added for both smiles and proteins later
class Prot_Args:
    max = 1000
    encode_dim = 512
    layers = 3
    num_heads = 8
    embedding_dim = 128
    num_filters = [128, 128, 128]
    filter_size = [3, 6, 9]
    padding = True

    # Transformer-related settings
    feed_forward_expansion_factor = 4
    feed_forward_dropout_p = 0.1
    attention_dropout_p = 0.1
    conv_dropout_p = 0.1
    conv_kernel_size = 3
    
prot_args = Prot_Args()

class Smiles_Args:
    max_nodes = 290
    drug_in = 75
    embedding_dim =  128
    hidden_layers = 128
    padding = True
    
smiles_args = Smiles_Args()

In [11]:
import os 
from pathlib import Path
cwd = Path.cwd()
print(cwd)

/home/jovyan/DTI-SL


In [12]:
# home/jovyan is from JupyterHub, going to loader.py
S_dir = Path("/home/jovyan/DTI-SL/datasets") / args.S # domain here

train = pd.read_csv(S_dir / "train.csv")[:500]
val   = pd.read_csv(S_dir / "val.csv")[:100]
test  = pd.read_csv(S_dir / "test.csv")[:100]

In [13]:
train.shape, val.shape, test.shape, train.columns, train.head() # 3 for drug,protein,label 

((500, 3),
 (100, 3),
 (100, 3),
 Index(['SMILES', 'Protein', 'Y'], dtype='object'),
                                               SMILES  \
 0  CC(C)[C@H](NS(=O)(=O)C1=CC=C(C=C1)C2=CC=C(C=C2...   
 1  [O-2].[O-2].[O-2].[O-2].[O-2].[O-2].[O-2].[O-2...   
 2              CC12CCC3C(C1CCC2=O)CCC4=CC(=O)CCC34CO   
 3  C1CN(CCC1(C2=CC=C(C=C2)Cl)O)CCCC(=O)C3=CC=C(C=...   
 4  C1C[C@@H]2CN[C@H](C[C@@H]2C[C@@H]1CCC3=NNN=N3)...   
 
                                              Protein  Y  
 0  MILLTFSTGRRLDFVHHSGVFFLQTLLWILCATVCGTEQYFNVEVW...  1  
 1  MLPSASRERPGYRAGVAAPDLLDPKSAAQNSKPRLSFSTKPTVLAS...  0  
 2  MWSWKCLLFWAVLVTATLCTARPSPTLPEQAQPWGAPVEVESFLVH...  0  
 3  MPVRRGHVAPQNTFLDTIIRKFEGQSRKFIIANARVENCAVIYCND...  1  
 4  MAEDGEEAEFHFAALYISGQWPRLRADTDLQRLGSSAMAPSRKFFV...  0  )

In [14]:
import utils
from functools import partial
from dgllife.utils import smiles_to_bigraph, CanonicalAtomFeaturizer, CanonicalBondFeaturizer
from torch.utils.data import Dataset


In [15]:
import torch.nn.functional as F
from torch import Tensor
from typing import Optional

In [16]:
# setting up the protein matrix, going to loader.py
# ---------- amino-acid dictionary ----------
amino_dict = {"B": 1, "A": 2, "C": 3, "E": 4, "D": 5, "G": 6,
              "F": 7, "I": 8, "H": 9, "K": 10, "M": 11, "L": 12,
              "O": 13, "N": 14, "Q": 15, "P": 16, "S": 17, "R": 18,
              "U": 19, "T": 20, "W": 21, "V": 22, "X": 23, "Z": 24, "Y": 25}

# ---------- helper: encode one protein ----------
def encode_protein(seq: str, max_len: int):
    encoded = [amino_dict.get(res, 0) for res in seq[:max_len]]
    if len(encoded) < max_len:
        encoded.extend([0] * (max_len - len(encoded)))
    return np.asarray(encoded, dtype=np.int16)

In [17]:
# dataset class, going to loader.py
class DrugProteinDataset(Dataset):
    """
    A lightweight Dataset that:
      • stores pre-encoded proteins (np.ndarray of shape [N, max_len])
      • builds/pads drug graphs on demand
      • returns tensors ready for model input
    """
    def __init__(self, df: pd.DataFrame,
                 prot_args: Prot_Args,
                 smiles_args: Smiles_Args):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.p_max = prot_args.max # max is 1000 amino acids
        self.d_max = smiles_args.max_nodes # max is 290 nodes in the graph

        # — pre-encode every protein once —
        self.protein_int = np.stack(
            df["Protein"].apply(lambda s: encode_protein(s, self.p_max)).values
        )

        # — drug graph helpers —
        self.atom_featurizer = CanonicalAtomFeaturizer()
        self.bond_featurizer = CanonicalBondFeaturizer(self_loop=True)
        self.bigraph_fn = partial(smiles_to_bigraph, add_self_loop=True)

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]

        # -------- DRUG / SMILES --------
        g = self.bigraph_fn(
            smiles=row["SMILES"],
            node_featurizer=self.atom_featurizer,
            edge_featurizer=self.bond_featurizer,
        )

        # real-node indicator before padding
        n_real = g.num_nodes()
        real_node_bit = torch.zeros(n_real, 1)
        g.ndata["h"] = torch.cat([g.ndata.pop("h"), real_node_bit], dim=1) # h gets node features, e for bond features

        # pad to self.d_max virtual nodes
        n_fake = self.d_max - n_real
        if n_fake < 0:
            raise ValueError(f"SMILES string at index {idx} has {n_real} atoms "
                             f"which exceeds max_nodes={self.d_max}. "
                             "Either increase Smiles_Args.max_nodes or drop this entry.")
        if n_fake:
            virtual_feats = torch.cat([torch.zeros(n_fake, 74),
                                       torch.ones(n_fake, 1)], dim=1)
            g.add_nodes(n_fake, {"h": virtual_feats})

        # -------- PROTEIN --------
        prot_int = torch.tensor(self.protein_int[idx], dtype=torch.long)     # [max_len]
        mask = (prot_int != 0).float()                                       # 1 for real, 0 for pad

        # -------- LABEL --------
        y = torch.tensor(row["Y"], dtype=torch.float32)

        return g, prot_int, mask, y

In [18]:
# test block
prot_args   = Prot_Args()
smiles_args = Smiles_Args()

train_data = DrugProteinDataset(train, prot_args, smiles_args)
val_data   = DrugProteinDataset(val,   prot_args, smiles_args)
test_data  = DrugProteinDataset(test,  prot_args, smiles_args)

# sanity check, this will show nodes, edges, protein size for a single instance 
g, prot_int, mask, y = train_data[50]
print(g, prot_int.size(), mask.sum(), y)
g.ndata["h"].shape

Graph(num_nodes=290, num_edges=75,
      ndata_schemes={'h': Scheme(shape=(75,), dtype=torch.float32)}
      edata_schemes={'e': Scheme(shape=(13,), dtype=torch.float32)}) torch.Size([1000]) tensor(432.) tensor(0.)


torch.Size([290, 75])

In [19]:
# GCN, drug encoder block
class DrugGCN(nn.Module):
    def __init__(self, in_feats,
                 dim_embedding=128, 
                 padding=True,
                 hidden_feats=None, # apparently the DGL library can fill defaults for layers to be the same number as dim, 128x128x128
                 activation=None): # activation default is relu for GCNs, imported
        super(DrugGCN, self).__init__()

        self.init_transform = nn.Linear(in_feats, dim_embedding, bias=False)

        if padding:
            with torch.no_grad():
                self.init_transform.weight[-1].fill_(0)  # zero out mask bit effect

        # Use DGL-LifeSci's GCN here directly
        self.gnn = GCN(in_feats=dim_embedding,
                       hidden_feats=hidden_feats,
                       activation=activation)

        self.output_feats = hidden_feats[-1]

    def forward(self, batch_graph):
        node_feats = batch_graph.ndata.pop('h')              # [∑N_i, in_feats]
        node_feats = self.init_transform(node_feats)         # [∑N_i, dim_embedding]
        node_feats = self.gnn(batch_graph, node_feats)       # [∑N_i, output_feats]
        batch_size = batch_graph.batch_size
        node_feats = node_feats.view(batch_size, -1, self.output_feats)
        return node_feats

In [20]:
# CNNTrans, protein encoder block

class Swish(nn.Module):
    def forward(self, x):
        return x * torch.sigmoid(x)


class GLU(nn.Module):
    def __init__(self, dim):
        super().__init__()
        self.dim = dim

    def forward(self, x):
        a, b = x.chunk(2, dim=self.dim)
        return a * torch.sigmoid(b)


class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=10000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe[:, :x.size(1)]


class RelativeMultiHeadAttention(nn.Module):
    def __init__(self, d_model=512, num_heads=8):
        super().__init__()
        assert d_model % num_heads == 0
        self.num_heads = num_heads
        self.d_head = d_model // num_heads
        self.q_proj = nn.Linear(d_model, d_model)
        self.k_proj = nn.Linear(d_model, d_model)
        self.v_proj = nn.Linear(d_model, d_model)
        self.pos_proj = nn.Linear(d_model, d_model, bias=False)
        self.u_bias = nn.Parameter(torch.randn(num_heads, self.d_head))
        self.v_bias = nn.Parameter(torch.randn(num_heads, self.d_head))
        self.out_proj = nn.Linear(d_model, d_model)
        self.scale = math.sqrt(d_model)

    def forward(self, x, pos_enc, mask=None):
        B, L, _ = x.size()
        q = self.q_proj(x).view(B, L, self.num_heads, self.d_head).transpose(1, 2)
        k = self.k_proj(x).view(B, L, self.num_heads, self.d_head).transpose(1, 2)
        v = self.v_proj(x).view(B, L, self.num_heads, self.d_head).transpose(1, 2)
        p = self.pos_proj(pos_enc).view(B, L, self.num_heads, self.d_head).transpose(1, 2)

        content_score = torch.matmul(q + self.u_bias.unsqueeze(1), k.transpose(-2, -1))
        pos_score = torch.matmul(q + self.v_bias.unsqueeze(1), p.transpose(-2, -1))
        score = (content_score + self._relative_shift(pos_score)) / self.scale

        if mask is not None:
            score = score.masked_fill(mask.unsqueeze(1).unsqueeze(2) == 0, -1e9)

        attn = F.softmax(score, dim=-1)
        context = torch.matmul(attn, v).transpose(1, 2).contiguous().view(B, L, -1)
        return self.out_proj(context)

    def _relative_shift(self, x):
        B, H, L1, L2 = x.size()
        zero_pad = torch.zeros((B, H, L1, 1), device=x.device, dtype=x.dtype)
        x_padded = torch.cat([zero_pad, x], dim=-1)
        x_padded = x_padded.view(B, H, L2 + 1, L1)
        return x_padded[:, :, 1:].view(B, H, L1, L2)


class FeedForwardModule(nn.Module):
    def __init__(self, d_model, expansion=4, dropout=0.1):
        super().__init__()
        self.seq = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Linear(d_model, d_model * expansion),
            Swish(),
            nn.Dropout(dropout),
            nn.Linear(d_model * expansion, d_model),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.seq(x)


class ConvModule(nn.Module):
    def __init__(self, d_model, kernel_size=5, dropout=0.1):
        super().__init__()
        self.seq = nn.Sequential(
            nn.LayerNorm(d_model),
            nn.Conv1d(d_model, d_model * 2, kernel_size=1),
            Swish(),
            nn.Conv1d(d_model * 2, d_model * 2, kernel_size, padding=(kernel_size - 1) // 2),
            GLU(dim=1),
            nn.BatchNorm1d(d_model),
            nn.Conv1d(d_model, d_model, kernel_size=1),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        x = x.transpose(1, 2)
        x = self.seq(x)
        return x.transpose(1, 2)


class CNNTransBlock(nn.Module):
    def __init__(self, d_model=512, num_heads=8, kernel_size=5, dropout=0.1, max_len=1000):
        super().__init__()
        self.attn = RelativeMultiHeadAttention(d_model, num_heads)
        self.pos_enc = PositionalEncoding(d_model, max_len)
        self.norm = nn.LayerNorm(d_model)
        self.ff = FeedForwardModule(d_model, dropout=dropout)
        self.conv = ConvModule(d_model, kernel_size, dropout)

    def forward(self, x, mask):
        pos = self.pos_enc(x)
        x = self.attn(self.norm(x), pos, mask) + x
        x = self.conv(x) + x
        return 0.5 * self.ff(x) + 0.5 * x


class ProteinCNNTrans(nn.Module):
    def __init__(self, max_len=1000, encoder_dim=128, num_layers=3, **kwargs):
        super().__init__()
        self.blocks = nn.ModuleList([
            CNNTransBlock(d_model=encoder_dim, max_len=max_len, **kwargs)
            for _ in range(num_layers)
        ])
        self.final_ff = FeedForwardModule(encoder_dim)

    def forward(self, x, mask):
        for block in self.blocks:
            x = block(x, mask)
        return 0.5 * self.final_ff(x) + 0.5 * x

In [27]:
# decoder block
class MLPClassifier(nn.Module):
    def __init__(self, in_dim, hidden_dim, out_dim, binary=1):
        super(MLPClassifier, self).__init__()
        self.fc1 = nn.Linear(in_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.bn2 = nn.BatchNorm1d(hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, out_dim)
        self.bn3 = nn.BatchNorm1d(out_dim)
        self.fc4 = nn.Linear(out_dim, binary)

    def forward(self, x):
        x = self.bn1(F.relu(self.fc1(x)))
        x = self.bn2(F.relu(self.fc2(x)))
        x = self.bn3(F.relu(self.fc3(x)))
        x = self.fc4(x)
        return x

In [28]:
prot_args = Prot_Args()
smiles_args = Smiles_Args()
args = Args()

class DTIModel(nn.Module):
    def __init__(self, smiles_args, prot_args, **config):
        super(DTIModel, self).__init__()

        self.drug_in = smiles_args.drug_in
        self.embedding_dim = smiles_args.embedding_dim
        self.max_nodes = smiles_args.max_nodes
        self.padding = smiles_args.padding
        self.hidden_layers = smiles_args.hidden_layers 

         # Drug feature extractor using Graph Convolutional Network (GCN)
        self.drug_encoder = DrugGCN(
            in_feats=self.drug_in,
            dim_embedding=self.embedding_dim,
            hidden_feats=[self.hidden_layers] * 3, # three layers GCN 128x128x128
            activation=[F.relu] * 3,
            padding=self.padding
        )

        # Protein feature encoder combining CNN and Transformer
        self.protein_encoder = ProteinCNNTrans(
            max_len=prot_args.max,
            encoder_dim=prot_args.embedding_dim,
            num_layers=prot_args.layers,
            num_heads=prot_args.num_heads,
            kernel_size=prot_args.conv_kernel_size,
            dropout=prot_args.conv_dropout_p
        )
        
         # MLP decoder for the final classification output
        self.mlp_classifier = MLPClassifier(
            args.mlp_in_dim,  # Input dimension for MLP
            args.mlp_hidden_dim,  # Hidden layer dimension for MLP
            args.mlp_out_dim,  # Output dimension for MLP
            args.binary  # Binary classification flag for MLP output
        )

        self.protein_embed = nn.Embedding(26, 128, padding_idx=0)  # 26 possible amino acids, embedded into 128-dimensional space

        self.mix_attention_layer = nn.MultiheadAttention(128, 4)  # 128-dimensional input, 4 attention heads

        # Max pooling layers for drug and protein feature extraction
        self.Drug_max_pool = nn.MaxPool1d(290)  # Pooling layer for drug features (max pool over 290 values)
        self.Protein_max_pool = nn.MaxPool1d(1000)  # Pooling layer for protein features (max pool over 1000 values)

        # Dropout layer for regularization
        self.dropout1 = nn.Dropout(0.1)  # Dropout with a probability of 10%

    def forward(self, bg_d, v_p, protein_mask, mode="train"):
        # Process drug graph through molecular GCN feature extractor
        v_d = self.drug_encoder(bg_d)
        
        # Embed protein sequences using the protein embedding layer
        v_p = self.protein_embed(v_p.long().to(device))  # Convert protein indices to embeddings
        protein_mask = protein_mask.long().to(device)  # Protein mask for attention
        
        # Process protein embeddings through the protein feature encoder
        v_p = self.protein_encoder(v_p, protein_mask)

        # Prepare for attention by permuting the dimensions of drug and protein features
        drugConv = v_d.permute(0, 2, 1)  # Permute for attention processing
        proteinConv = v_p.permute(0, 2, 1)  # Permute for attention processing
        
        # Prepare drug and protein for attention mechanism (Q, K, V are query, key, value)
        drug_QKV = drugConv.permute(2, 0, 1)  # Query for drug
        protein_QKV = proteinConv.permute(2, 0, 1)  # Query for protein
        
        # Apply multi-head attention between drug and protein
        drug_att, _ = self.mix_attention_layer(drug_QKV, protein_QKV, protein_QKV)
        protein_att, _ = self.mix_attention_layer(protein_QKV, drug_QKV, drug_QKV)

        # Permute back after attention to get the correct shape
        drug_att = drug_att.permute(1, 2, 0)
        protein_att = protein_att.permute(1, 2, 0)

        # Combine original and attended features
        drugConv = drugConv * 0.5 + drug_att * 0.5
        proteinConv = proteinConv * 0.5 + protein_att * 0.5

        # Apply max pooling to both drug and protein features
        drugConv = self.Drug_max_pool(drugConv).squeeze(2)
        proteinConv = self.Protein_max_pool(proteinConv).squeeze(2)

        # Concatenate drug and protein attended features and apply dropout
        result = torch.cat((drug_att, protein_att), dim=-1)
        pair = torch.cat([drugConv, proteinConv], dim=1)
        pair = self.dropout1(pair)

        # Pass concatenated features through MLP classifier
        score = self.mlp_classifier(pair)

        # Return results based on mode (train or eval)
        if mode == "train":
            return v_d, v_p, pair, score  # Return all outputs for training
        elif mode == "eval":
            return v_d, v_p, score, result  # Return simplified outputs for evaluation
        
            

In [29]:
import random
from torch.utils.data import DataLoader
import math
args = Args()

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)

set_seed(args.seed)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [1]:

def dgl_has_cuda():
    if not torch.cuda.is_available():
        return False
    try:
        g = dgl.graph(([0], [1]))
        g = g.to('cuda:0')
        return True
    except Exception as e:
        print("DGL CUDA check failed:", e)
        return False

if dgl_has_cuda():
    device = torch.device('cuda')
else:
    device = torch.device('cpu')

print("Using device:", device)

NameError: name 'torch' is not defined

In [54]:
device = torch.device('cpu')  # FORCE CPU
print(device)

cpu


In [55]:
def collate_fn(batch):
    graphs, prot_seqs, masks, labels = map(list, zip(*batch))
    bg = dgl.batch(graphs)  # stays on CPU

    prot_seqs = torch.stack(prot_seqs)
    masks = torch.stack(masks)
    labels = torch.tensor(labels, dtype=torch.float32)

    return bg, prot_seqs, masks, labels


In [56]:
S_dir = Path("/home/jovyan/DTI-SL/datasets") / args.S

train_df = pd.read_csv(S_dir / "train.csv")
val_df   = pd.read_csv(S_dir / "val.csv")
test_df  = pd.read_csv(S_dir / "test.csv")

train_data = DrugProteinDataset(train_df, prot_args, smiles_args)
val_data   = DrugProteinDataset(val_df, prot_args, smiles_args)
test_data  = DrugProteinDataset(test_df, prot_args, smiles_args)

train_loader = DataLoader(train_data, batch_size=args.batch_size, shuffle=args.shuffle, collate_fn=collate_fn)
val_loader   = DataLoader(val_data, batch_size=args.batch_size, collate_fn=collate_fn)
test_loader  = DataLoader(test_data, batch_size=args.batch_size, collate_fn=collate_fn)

In [57]:
model = DTIModel(smiles_args, prot_args).to(device)

optimizer = torch.optim.AdamW(model.parameters(), lr=args.lr, weight_decay=args.reg)
criterion = torch.nn.BCEWithLogitsLoss()


In [58]:
def train_and_eval(model, train_loader, val_loader, optimizer, criterion, epochs):
    model.to(device)  # moves model to CPU

    for epoch in range(epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        total_train = 0

        for g_batch, prot_int, mask, y in train_loader:
            g_batch = g_batch.cpu()  # DGL graph on CPU
            prot_int = prot_int.to(device)
            mask = mask.to(device)
            y = y.to(device).float()

            optimizer.zero_grad()

            _, _, _, output = model(g_batch, prot_int, mask, mode="train")
            loss = criterion(output.squeeze(), y)
            loss.backward()
            optimizer.step()

            train_loss += loss.item() * y.size(0)
            preds = (output.squeeze() > 0.5).float()
            train_correct += (preds == y).sum().item()
            total_train += y.size(0)

        train_loss /= total_train
        train_acc = train_correct / total_train

        # Validation
        model.eval()
        val_loss = 0
        val_correct = 0
        total_val = 0

        with torch.no_grad():
            for g_batch, prot_int, mask, y in val_loader:
                g_batch = g_batch.cpu()
                prot_int = prot_int.to(device)
                mask = mask.to(device)
                y = y.to(device).float()

                _, _, _, output = model(g_batch, prot_int, mask, mode="eval")
                loss = criterion(output.squeeze(), y)

                val_loss += loss.item() * y.size(0)
                preds = (output.squeeze() > 0.5).float()
                val_correct += (preds == y).sum().item()
                total_val += y.size(0)

        val_loss /= total_val
        val_acc = val_correct / total_val

        print(f"Epoch {epoch+1}/{epochs} — Train Loss: {train_loss:.4f} Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} Val Acc: {val_acc:.4f}")

In [59]:
train_and_eval(model, train_loader, val_loader, optimizer, criterion, epochs=args.n_epochs)


DGLError: There are 0-in-degree nodes in the graph, output for those nodes will be invalid. This is harmful for some applications, causing silent performance regression. Adding self-loop on the input graph by calling `g = dgl.add_self_loop(g)` will resolve the issue. Setting ``allow_zero_in_degree`` to be `True` when constructing this module will suppress the check and let the code run.