In [18]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.loader import DataLoader


from data_loaders import preproccess_data, generate_scaffold_split, df_to_graph_list, get_scaffolds
from gcn import GCN



device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [19]:
file_path = 'data/curated-solubility-dataset.csv'
df = preproccess_data(file_path)


df['scaffold'] = df['mol'].apply(get_scaffolds)

# scaffolds to get train, val, text
train_idx, val_idx, test_idx = generate_scaffold_split(df)

# Split the dataframe into train, val, and test
train_df = df.iloc[train_idx]
val_df = df.iloc[val_idx]
test_df = df.iloc[test_idx]

# df to graph list
train_graph_list = df_to_graph_list(train_df)
val_graph_list = df_to_graph_list(val_df)
test_graph_list = df_to_graph_list(test_df)

[11:49:47] Explicit valence for atom # 5 N, 4, is greater than permitted
[11:49:47] Explicit valence for atom # 5 N, 4, is greater than permitted


In [20]:
train_loader = DataLoader(train_graph_list, batch_size=32, shuffle=True)
val_loader = DataLoader(val_graph_list, batch_size=32, shuffle=False)
test_loader = DataLoader(test_graph_list, batch_size=32, shuffle=False)

In [24]:
# Set seed for reproducibility
torch.manual_seed(42)


num_node = train_graph_list[0].x.shape[1]
edge_attr = train_graph_list[0].edge_attr.shape[1]
u_d = train_graph_list[0].u.shape[1]

model = GCN(num_node_features=num_node,
            edge_attr_dim=edge_attr,
            u_dim=u_d).to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 100  
for epoch in range(1, num_epochs + 1):
    model.train() #train model
    train_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
  
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.num_graphs
    train_loss /= len(train_loader.dataset) #compute train loss
    
    # Validation step
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for data in val_loader:
            data = data.to(device)
            output = model(data)
            target = data.y.view(data.num_graphs, -1).to(device)
            loss = criterion(output, target) #get loss based on criterion
            val_loss += loss.item() * data.num_graphs
    val_loss /= len(val_loader.dataset) #compute validation loss
    val_rmse = val_loss ** 0.5
    
    print(f"Epoch: {epoch}, Train Loss: {train_loss:.4f}, Val RMSE: {val_rmse:.4f}")

TypeError: __init__() got an unexpected keyword argument 'edge_attr_dim'

In [None]:
# Testing
model.eval()
test_loss = 0
with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        output = model(data)
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        test_loss += loss.item() * data.num_graphs
test_loss /= len(test_loader.dataset)
test_rmse = test_loss ** 0.5
print(f"Test RMSE: {test_rmse:.4f}")