In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch_geometric.datasets import MoleculeNet
from torch_geometric.nn import GCNConv, global_mean_pool, global_add_pool, SAGEConv
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from rdkit import Chem
from rdkit.Chem import Draw
import pubchempy as pcp
import pandas as pd

class LipophilicityGNN(torch.nn.Module):
    def __init__(self, num_features, hidden_channels=64, num_layers=3, dropout=0.2):
        super(LipophilicityGNN, self).__init__()
        self.num_layers = num_layers
        self.dropout = dropout
        
       
        self.conv_first = GCNConv(num_features, hidden_channels)
        
        self.convs = torch.nn.ModuleList([
            GCNConv(hidden_channels, hidden_channels) 
            for _ in range(num_layers - 1)
        ])
        
        self.skips = torch.nn.ModuleList([
            nn.Linear(hidden_channels, hidden_channels)
            for _ in range(num_layers - 1)
        ])
        self.linear1 = nn.Linear(hidden_channels, hidden_channels // 2)
        self.linear2 = nn.Linear(hidden_channels // 2, 1)
        
    def forward(self, x, edge_index, batch):
       
        h = F.relu(self.conv_first(x, edge_index))
        h = F.dropout(h, p=self.dropout, training=self.training)
        
        for i in range(self.num_layers - 1):
            h_new = F.relu(self.convs[i](h, edge_index))
            h_skip = self.skips[i](h)
            h = h_new + h_skip
            h = F.dropout(h, p=self.dropout, training=self.training)
        
        
        h = global_mean_pool(h, batch)
        

        h = F.relu(self.linear1(h))
        h = F.dropout(h, p=self.dropout, training=self.training)
        return self.linear2(h)
        
def load_and_preprocess_data():
    dataset = MoleculeNet(root='data/lipophilicity', name='ESOL')
    
    data_list = list(dataset)
    
    indices = list(range(len(dataset)))
    train_idx, test_idx = train_test_split(indices, test_size=0.2, random_state=42)
    
    train_loader = DataLoader([dataset[i] for i in train_idx], batch_size=64, shuffle=True)
    test_loader = DataLoader([dataset[i] for i in test_idx], batch_size=64)
    
    return train_loader, test_loader, dataset

def train_model(model, train_loader, optimizer, device):
    model.train()
    total_loss = 0
    
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        
        out = model(data.x, data.edge_index, data.batch)
        loss = F.mse_loss(out, data.y.view(-1, 1))
        
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.num_graphs
        
    return total_loss / len(train_loader.dataset)

def evaluate_model(model, loader, device):
    model.eval()
    predictions = []
    actuals = []
    
    with torch.no_grad():
        for data in loader:
            data = data.to(device)
            out = model(data.x, data.edge_index, data.batch)
            predictions.extend(out.cpu().numpy())
            actuals.extend(data.y.cpu().numpy())
    
    predictions = np.array(predictions).flatten()
    actuals = np.array(actuals)
    
    r2 = r2_score(actuals, predictions)
    rmse = np.sqrt(mean_squared_error(actuals, predictions))
    mae = mean_absolute_error(actuals, predictions)
    
    return r2, rmse, mae

def main():
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    train_loader, test_loader, dataset = load_and_preprocess_data()
    
    model = LipophilicityGNN(
        num_features=dataset[0].x.shape[1],
        hidden_channels=64,
        num_layers=3,
        dropout=0.2
    ).to(device)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    
    epochs = 100
    train_losses = []
    test_metrics = []
    
    for epoch in range(epochs):
        train_loss = train_model(model, train_loader, optimizer, device)
        
        r2, rmse, mae = evaluate_model(model, test_loader, device)
        
        train_losses.append(train_loss)
        test_metrics.append((r2, rmse, mae))
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1:03d}, Loss: {train_loss:.4f}, R²: {r2:.4f}, '
                  f'RMSE: {rmse:.4f}, MAE: {mae:.4f}')
    
    plt.figure(figsize=(12, 4))
    
    plt.subplot(131)
    plt.plot(train_losses)
    plt.title('Training Loss')
    plt.xlabel('Epoch')
    plt.ylabel('MSE Loss')
    
    plt.subplot(132)
    plt.plot([m[0] for m in test_metrics])
    plt.title('R² Score')
    plt.xlabel('Epoch')
    plt.ylabel('R²')
    
    plt.subplot(133)
    plt.plot([m[1] for m in test_metrics])
    plt.title('RMSE')
    plt.xlabel('Epoch')
    plt.ylabel('RMSE')
    
    plt.tight_layout()
    plt.show()

if __name__ == "__main__":
    main()

ModuleNotFoundError: No module named 'torch'

In [2]:
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118


Looking in indexes: https://download.pytorch.org/whl/cu118


ERROR: Could not find a version that satisfies the requirement torch (from versions: none)
ERROR: No matching distribution found for torch


In [3]:
!pip install torch torchvision torchaudio

ERROR: Could not find a version that satisfies the requirement torch (from versions: none)
ERROR: No matching distribution found for torch


In [2]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())  # Check if GPU is accessible


ModuleNotFoundError: No module named 'torch'