In [None]:
!pip install torch_geometric
!pip install rdkit
!pip install umap-learn
!pip install hdbscan
!pip install pandas
!pip install matplotlib
!pip install seaborn
!pip install scikit-learn
!pip install numpy

Collecting torch_geometric
  Downloading torch_geometric-2.6.1-py3-none-any.whl.metadata (63 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/63.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.1-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.1
Collecting rdkit
  Downloading rdkit-2024.9.6-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.0 kB)
Downloading rdkit-2024.9.6-cp311-cp311-manylinux_2_28_x86_64.whl (34.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m34.3/34.3 MB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit
Successfully installed rdkit-2024.9.6


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.loader import DataLoader


from data_loaders import preproccess_data, generate_scaffold_split, df_to_graph_list, get_scaffolds
from gcn_change4 import GCN

from sklearn.metrics import r2_score
import numpy as np
import scipy.stats as stats

from torch.optim.lr_scheduler import CosineAnnealingLR

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
file_path = '/content/drive/MyDrive/BioSolveAI/data/curated-solubility-dataset.csv'
df = preproccess_data(file_path)


df['scaffold'] = df['mol'].apply(get_scaffolds)

# scaffolds to get train, val, text
train_idx, val_idx, test_idx = generate_scaffold_split(df)

# Split the dataframe into train, val, and test
train_df = df.iloc[train_idx]
val_df = df.iloc[val_idx]
test_df = df.iloc[test_idx]

# df to graph list
train_graph_list = df_to_graph_list(train_df)
val_graph_list = df_to_graph_list(val_df)
test_graph_list = df_to_graph_list(test_df)

[19:39:21] Explicit valence for atom # 5 N, 4, is greater than permitted
[19:39:21] Explicit valence for atom # 5 N, 4, is greater than permitted


In [None]:
train_loader = DataLoader(train_graph_list, batch_size=32, shuffle=True)
val_loader = DataLoader(val_graph_list, batch_size=32, shuffle=False)
test_loader = DataLoader(test_graph_list, batch_size=32, shuffle=False)

The below run introduces AdamW opt and CosineAnnealing schduler. Also reduced epochs to 50

In [None]:
# Set seed for reproducibility
torch.manual_seed(42)

num_node = train_graph_list[0].x.shape[1]
edge_attr = train_graph_list[0].edge_attr.shape[1]
u_d = train_graph_list[0].u.shape[1]

model = GCN(num_node_features=num_node,
            edge_attr_dim=edge_attr,
            u_dim=u_d,
            hidden_dim=64,
            output_dim=1).to(device)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay= 0.01) # introduced AdamW
scheduler = CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-6) # introduced Cosine Annealign Scheduler

# Training loop
num_epochs = 50
for epoch in range(1, num_epochs + 1):
    model.train()
    train_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.num_graphs
    train_loss /= len(train_loader.dataset)

    # Validation step
    model.eval()
    all_preds, all_targets = [], []
    val_loss = 0
    with torch.no_grad():
        for data in val_loader:
            data = data.to(device)
            output = model(data)
            target = data.y.view(data.num_graphs, -1).to(device)
            loss = criterion(output, target) #get loss based on criterion
            val_loss += loss.item() * data.num_graphs
            all_preds.extend(output.cpu().numpy())
            all_targets.extend(target.cpu().numpy())
    val_loss /= len(val_loader.dataset) #compute validation loss
    val_rmse = val_loss ** 0.5

    scheduler.step()

    # Compute R^2
    all_preds = np.array(all_preds).flatten()
    all_targets = np.array(all_targets).flatten()
    r2 = r2_score(all_targets, all_preds)

    # Compute 95% Confidence Interval for RMSE
    confidence = 0.95
    squared_errors = (all_preds - all_targets) ** 2
    mean_se = np.mean(squared_errors)
    se = stats.sem(squared_errors)
    interval = stats.t.interval(confidence, len(squared_errors)-1, loc=mean_se, scale=se)
    ci_lower, ci_upper = np.sqrt(interval[0]), np.sqrt(interval[1])

    print(f"Epoch: {epoch}, Train Loss: {train_loss:.4f}, Val RMSE: {val_rmse:.4f}, R²: {r2:.4f}, CI (95%):[{ci_lower:.4f}, {ci_upper:.4f}]")

Epoch: 1, Train Loss: 330.7809, Val RMSE: 4.1775, R²: -19.3666, CI (95%):[3.8518, 4.4797]
Epoch: 2, Train Loss: 35.9041, Val RMSE: 3.1633, R²: -10.6775, CI (95%):[2.8376, 3.4584]
Epoch: 3, Train Loss: 20.5847, Val RMSE: 2.8952, R²: -8.7819, CI (95%):[2.6639, 3.1093]
Epoch: 4, Train Loss: 12.3498, Val RMSE: 2.1423, R²: -4.3562, CI (95%):[1.9796, 2.2935]
Epoch: 5, Train Loss: 7.0016, Val RMSE: 1.9648, R²: -3.5054, CI (95%):[1.7974, 2.1191]
Epoch: 6, Train Loss: 4.4244, Val RMSE: 1.5054, R²: -1.6446, CI (95%):[1.4134, 1.5920]
Epoch: 7, Train Loss: 2.4298, Val RMSE: 0.9901, R²: -0.1439, CI (95%):[0.9136, 1.0611]
Epoch: 8, Train Loss: 1.4540, Val RMSE: 0.8128, R²: 0.2290, CI (95%):[0.7520, 0.8693]
Epoch: 9, Train Loss: 1.0414, Val RMSE: 0.7173, R²: 0.3995, CI (95%):[0.6696, 0.7621]
Epoch: 10, Train Loss: 0.8091, Val RMSE: 0.6784, R²: 0.4629, CI (95%):[0.6352, 0.7190]
Epoch: 11, Train Loss: 0.7077, Val RMSE: 0.6367, R²: 0.5269, CI (95%):[0.6040, 0.6678]
Epoch: 12, Train Loss: 0.6353, Val RMS

In [None]:
# Testing
model.eval()
test_loss = 0
all_preds, all_targets = [], []

with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        output = model(data)
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        test_loss += loss.item() * data.num_graphs
        all_preds.extend(output.cpu().numpy())
        all_targets.extend(target.cpu().numpy())
test_loss /= len(test_loader.dataset)
test_rmse = test_loss ** 0.5

# Compute R^2
all_preds = np.array(all_preds).flatten()
all_targets = np.array(all_targets).flatten()
r2 = r2_score(all_targets, all_preds)

# Compute 95% Confidence Interval for RMSE
confidence = 0.95
squared_errors = (all_preds - all_targets) ** 2
mean_se = np.mean(squared_errors)
se = stats.sem(squared_errors)
interval = stats.t.interval(confidence, len(squared_errors)-1, loc=mean_se, scale=se)
ci_lower, ci_upper = np.sqrt(interval[0]), np.sqrt(interval[1])

print(f"Test RMSE: {test_rmse:.4f}, R²: {r2:.4f}, CI (95%): [{ci_lower:.4f}, {ci_upper:.4f}]")


Test RMSE: 0.4954, R²: 0.6317, CI (95%): [0.4708, 0.5188]


Model perfoms a decent bit better!

Implementing dropout (0.35) to help with generalization. Also introducing LayerNorm based on proff's advice. Changes are in gcn_change1

In [None]:
from gcn_change1 import GCN
# Set seed for reproducibility
torch.manual_seed(42)

num_node = train_graph_list[0].x.shape[1]
edge_attr = train_graph_list[0].edge_attr.shape[1]
u_d = train_graph_list[0].u.shape[1]

model = GCN(num_node_features=num_node,
            edge_attr_dim=edge_attr,
            u_dim=u_d,
            hidden_dim=64,
            output_dim=1).to(device)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay= 0.01) # introduced AdamW
scheduler = CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-6) # introduced Cosine Annealign Scheduler

# Training loop
num_epochs = 50
for epoch in range(1, num_epochs + 1):
    model.train()
    train_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.num_graphs
    train_loss /= len(train_loader.dataset)

    # Validation step
    model.eval()
    all_preds, all_targets = [], []
    val_loss = 0
    with torch.no_grad():
        for data in val_loader:
            data = data.to(device)
            output = model(data)
            target = data.y.view(data.num_graphs, -1).to(device)
            loss = criterion(output, target) #get loss based on criterion
            val_loss += loss.item() * data.num_graphs
            all_preds.extend(output.cpu().numpy())
            all_targets.extend(target.cpu().numpy())
    val_loss /= len(val_loader.dataset) #compute validation loss
    val_rmse = val_loss ** 0.5

    scheduler.step()

    # Compute R^2
    all_preds = np.array(all_preds).flatten()
    all_targets = np.array(all_targets).flatten()
    r2 = r2_score(all_targets, all_preds)

    # Compute 95% Confidence Interval for RMSE
    confidence = 0.95
    squared_errors = (all_preds - all_targets) ** 2
    mean_se = np.mean(squared_errors)
    se = stats.sem(squared_errors)
    interval = stats.t.interval(confidence, len(squared_errors)-1, loc=mean_se, scale=se)
    ci_lower, ci_upper = np.sqrt(interval[0]), np.sqrt(interval[1])

    print(f"Epoch: {epoch}, Train Loss: {train_loss:.4f}, Val RMSE: {val_rmse:.4f}, R²: {r2:.4f}, CI (95%):[{ci_lower:.4f}, {ci_upper:.4f}]")

Epoch: 1, Train Loss: 341.2259, Val RMSE: 4.1277, R²: -18.8833, CI (95%):[3.8185, 4.4153]
Epoch: 2, Train Loss: 37.2352, Val RMSE: 3.3622, R²: -12.1921, CI (95%):[3.1327, 3.5769]
Epoch: 3, Train Loss: 19.5527, Val RMSE: 2.6306, R²: -7.0758, CI (95%):[2.4363, 2.8115]
Epoch: 4, Train Loss: 11.6486, Val RMSE: 2.4061, R²: -5.7564, CI (95%):[2.2653, 2.5391]
Epoch: 5, Train Loss: 6.5833, Val RMSE: 1.5856, R²: -1.9342, CI (95%):[1.4722, 1.6915]
Epoch: 6, Train Loss: 3.6747, Val RMSE: 1.1461, R²: -0.5330, CI (95%):[1.0583, 1.2277]
Epoch: 7, Train Loss: 2.0262, Val RMSE: 0.9361, R²: -0.0227, CI (95%):[0.8791, 0.9899]
Epoch: 8, Train Loss: 1.2075, Val RMSE: 0.7886, R²: 0.2743, CI (95%):[0.7293, 0.8437]
Epoch: 9, Train Loss: 0.9191, Val RMSE: 0.6908, R²: 0.4431, CI (95%):[0.6473, 0.7318]
Epoch: 10, Train Loss: 0.7575, Val RMSE: 0.6646, R²: 0.4845, CI (95%):[0.6271, 0.7000]
Epoch: 11, Train Loss: 0.6960, Val RMSE: 0.6628, R²: 0.4873, CI (95%):[0.6307, 0.6934]
Epoch: 12, Train Loss: 0.6452, Val RMS

In [None]:
# Testing
model.eval()
test_loss = 0
all_preds, all_targets = [], []

with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        output = model(data)
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        test_loss += loss.item() * data.num_graphs
        all_preds.extend(output.cpu().numpy())
        all_targets.extend(target.cpu().numpy())
test_loss /= len(test_loader.dataset)
test_rmse = test_loss ** 0.5

# Compute R^2
all_preds = np.array(all_preds).flatten()
all_targets = np.array(all_targets).flatten()
r2 = r2_score(all_targets, all_preds)

# Compute 95% Confidence Interval for RMSE
confidence = 0.95
squared_errors = (all_preds - all_targets) ** 2
mean_se = np.mean(squared_errors)
se = stats.sem(squared_errors)
interval = stats.t.interval(confidence, len(squared_errors)-1, loc=mean_se, scale=se)
ci_lower, ci_upper = np.sqrt(interval[0]), np.sqrt(interval[1])

print(f"Test RMSE: {test_rmse:.4f}, R²: {r2:.4f}, CI (95%): [{ci_lower:.4f}, {ci_upper:.4f}]")


Test RMSE: 0.4938, R²: 0.6340, CI (95%): [0.4690, 0.5174]


Performance is roughly the same. No harm in keeping them then!

Now, going to try different pooling methods. Mean pooling is quite basic.
First, trying add pool (since many molecular features are additive generally). Changes in gcn_change2

In [None]:
from gcn_change2 import GCN
# Set seed for reproducibility
torch.manual_seed(42)

num_node = train_graph_list[0].x.shape[1]
edge_attr = train_graph_list[0].edge_attr.shape[1]
u_d = train_graph_list[0].u.shape[1]

model = GCN(num_node_features=num_node,
            edge_attr_dim=edge_attr,
            u_dim=u_d,
            hidden_dim=64,
            output_dim=1).to(device)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay= 0.01) # introduced AdamW
scheduler = CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-6) # introduced Cosine Annealign Scheduler

# Training loop
num_epochs = 50
for epoch in range(1, num_epochs + 1):
    model.train()
    train_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.num_graphs
    train_loss /= len(train_loader.dataset)

    # Validation step
    model.eval()
    all_preds, all_targets = [], []
    val_loss = 0
    with torch.no_grad():
        for data in val_loader:
            data = data.to(device)
            output = model(data)
            target = data.y.view(data.num_graphs, -1).to(device)
            loss = criterion(output, target) #get loss based on criterion
            val_loss += loss.item() * data.num_graphs
            all_preds.extend(output.cpu().numpy())
            all_targets.extend(target.cpu().numpy())
    val_loss /= len(val_loader.dataset) #compute validation loss
    val_rmse = val_loss ** 0.5

    scheduler.step()

    # Compute R^2
    all_preds = np.array(all_preds).flatten()
    all_targets = np.array(all_targets).flatten()
    r2 = r2_score(all_targets, all_preds)

    # Compute 95% Confidence Interval for RMSE
    confidence = 0.95
    squared_errors = (all_preds - all_targets) ** 2
    mean_se = np.mean(squared_errors)
    se = stats.sem(squared_errors)
    interval = stats.t.interval(confidence, len(squared_errors)-1, loc=mean_se, scale=se)
    ci_lower, ci_upper = np.sqrt(interval[0]), np.sqrt(interval[1])

    print(f"Epoch: {epoch}, Train Loss: {train_loss:.4f}, Val RMSE: {val_rmse:.4f}, R²: {r2:.4f}, CI (95%):[{ci_lower:.4f}, {ci_upper:.4f}]")

Epoch: 1, Train Loss: 226.9515, Val RMSE: 4.7634, R²: -25.4796, CI (95%):[4.5957, 4.9254]
Epoch: 2, Train Loss: 50.0830, Val RMSE: 3.2526, R²: -11.3462, CI (95%):[3.0800, 3.4165]
Epoch: 3, Train Loss: 23.5278, Val RMSE: 2.2275, R²: -4.7904, CI (95%):[2.1062, 2.3425]
Epoch: 4, Train Loss: 10.4504, Val RMSE: 1.8344, R²: -2.9272, CI (95%):[1.7423, 1.9222]
Epoch: 5, Train Loss: 5.3104, Val RMSE: 1.1661, R²: -0.5870, CI (95%):[1.0943, 1.2338]
Epoch: 6, Train Loss: 3.0357, Val RMSE: 1.0770, R²: -0.3536, CI (95%):[1.0244, 1.1270]
Epoch: 7, Train Loss: 2.2403, Val RMSE: 0.9267, R²: -0.0021, CI (95%):[0.8794, 0.9716]
Epoch: 8, Train Loss: 1.6545, Val RMSE: 0.8422, R²: 0.1722, CI (95%):[0.8005, 0.8819]
Epoch: 9, Train Loss: 1.4914, Val RMSE: 0.8049, R²: 0.2440, CI (95%):[0.7608, 0.8467]
Epoch: 10, Train Loss: 1.2161, Val RMSE: 0.8531, R²: 0.1506, CI (95%):[0.8146, 0.8900]
Epoch: 11, Train Loss: 1.1673, Val RMSE: 0.8686, R²: 0.1195, CI (95%):[0.8308, 0.9049]
Epoch: 12, Train Loss: 1.0342, Val RMS

No, this one performed much worse. Let's try a more complex method, set2set from torch_geometric. Changes in gcn_change3

In [None]:
from gcn_change3 import GCN
# Set seed for reproducibility
torch.manual_seed(42)

num_node = train_graph_list[0].x.shape[1]
edge_attr = train_graph_list[0].edge_attr.shape[1]
u_d = train_graph_list[0].u.shape[1]

model = GCN(num_node_features=num_node,
            edge_attr_dim=edge_attr,
            u_dim=u_d,
            hidden_dim=64,
            output_dim=1).to(device)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay= 0.01) # introduced AdamW
scheduler = CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-6) # introduced Cosine Annealign Scheduler

# Training loop
num_epochs = 50
for epoch in range(1, num_epochs + 1):
    model.train()
    train_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.num_graphs
    train_loss /= len(train_loader.dataset)

    # Validation step
    model.eval()
    all_preds, all_targets = [], []
    val_loss = 0
    with torch.no_grad():
        for data in val_loader:
            data = data.to(device)
            output = model(data)
            target = data.y.view(data.num_graphs, -1).to(device)
            loss = criterion(output, target) #get loss based on criterion
            val_loss += loss.item() * data.num_graphs
            all_preds.extend(output.cpu().numpy())
            all_targets.extend(target.cpu().numpy())
    val_loss /= len(val_loader.dataset) #compute validation loss
    val_rmse = val_loss ** 0.5

    scheduler.step()

    # Compute R^2
    all_preds = np.array(all_preds).flatten()
    all_targets = np.array(all_targets).flatten()
    r2 = r2_score(all_targets, all_preds)

    # Compute 95% Confidence Interval for RMSE
    confidence = 0.95
    squared_errors = (all_preds - all_targets) ** 2
    mean_se = np.mean(squared_errors)
    se = stats.sem(squared_errors)
    interval = stats.t.interval(confidence, len(squared_errors)-1, loc=mean_se, scale=se)
    ci_lower, ci_upper = np.sqrt(interval[0]), np.sqrt(interval[1])

    print(f"Epoch: {epoch}, Train Loss: {train_loss:.4f}, Val RMSE: {val_rmse:.4f}, R²: {r2:.4f}, CI (95%):[{ci_lower:.4f}, {ci_upper:.4f}]")

Epoch: 1, Train Loss: 516.4796, Val RMSE: 4.2576, R²: -20.1545, CI (95%):[3.8954, 4.5913]
Epoch: 2, Train Loss: 19.3141, Val RMSE: 3.1152, R²: -10.3251, CI (95%):[2.8519, 3.3579]
Epoch: 3, Train Loss: 10.5381, Val RMSE: 2.2250, R²: -4.7774, CI (95%):[2.0504, 2.3869]
Epoch: 4, Train Loss: 6.3784, Val RMSE: 1.7072, R²: -2.4012, CI (95%):[1.5895, 1.8172]
Epoch: 5, Train Loss: 4.0824, Val RMSE: 1.2648, R²: -0.8668, CI (95%):[1.1765, 1.3473]
Epoch: 6, Train Loss: 2.9577, Val RMSE: 1.0395, R²: -0.2611, CI (95%):[0.9807, 1.0953]
Epoch: 7, Train Loss: 2.3172, Val RMSE: 0.9541, R²: -0.0624, CI (95%):[0.9000, 1.0053]
Epoch: 8, Train Loss: 1.9219, Val RMSE: 0.8058, R²: 0.2422, CI (95%):[0.7564, 0.8523]
Epoch: 9, Train Loss: 1.6122, Val RMSE: 0.8201, R²: 0.2150, CI (95%):[0.7754, 0.8625]
Epoch: 10, Train Loss: 1.4029, Val RMSE: 0.7626, R²: 0.3212, CI (95%):[0.7211, 0.8020]
Epoch: 11, Train Loss: 1.1904, Val RMSE: 0.8269, R²: 0.2020, CI (95%):[0.7875, 0.8645]
Epoch: 12, Train Loss: 1.0097, Val RMSE

In [None]:
# Testing
model.eval()
test_loss = 0
all_preds, all_targets = [], []

with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        output = model(data)
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        test_loss += loss.item() * data.num_graphs
        all_preds.extend(output.cpu().numpy())
        all_targets.extend(target.cpu().numpy())
test_loss /= len(test_loader.dataset)
test_rmse = test_loss ** 0.5

# Compute R^2
all_preds = np.array(all_preds).flatten()
all_targets = np.array(all_targets).flatten()
r2 = r2_score(all_targets, all_preds)

# Compute 95% Confidence Interval for RMSE
confidence = 0.95
squared_errors = (all_preds - all_targets) ** 2
mean_se = np.mean(squared_errors)
se = stats.sem(squared_errors)
interval = stats.t.interval(confidence, len(squared_errors)-1, loc=mean_se, scale=se)
ci_lower, ci_upper = np.sqrt(interval[0]), np.sqrt(interval[1])

print(f"Test RMSE: {test_rmse:.4f}, R²: {r2:.4f}, CI (95%): [{ci_lower:.4f}, {ci_upper:.4f}]")


Test RMSE: 0.5397, R²: 0.5629, CI (95%): [0.5107, 0.5671]


This one was also slightly worse, might come back to it later in time. For now, lets just roll with change 1 of including the layer norm and dropout.

One last change I am going to try is just adding another fully connected layer to allow for some more learning wiht the global features.

In [None]:
from gcn_change4 import GCN
# Set seed for reproducibility
torch.manual_seed(42)

num_node = train_graph_list[0].x.shape[1]
edge_attr = train_graph_list[0].edge_attr.shape[1]
u_d = train_graph_list[0].u.shape[1]

model = GCN(num_node_features=num_node,
            edge_attr_dim=edge_attr,
            u_dim=u_d,
            hidden_dim=64,
            output_dim=1).to(device)

criterion = nn.MSELoss()
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay= 0.01) # introduced AdamW
scheduler = CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-6) # introduced Cosine Annealign Scheduler

# Training loop
num_epochs = 50
for epoch in range(1, num_epochs + 1):
    model.train()
    train_loss = 0
    for data in train_loader:
        data = data.to(device)
        optimizer.zero_grad()
        output = model(data)
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        train_loss += loss.item() * data.num_graphs
    train_loss /= len(train_loader.dataset)

    # Validation step
    model.eval()
    all_preds, all_targets = [], []
    val_loss = 0
    with torch.no_grad():
        for data in val_loader:
            data = data.to(device)
            output = model(data)
            target = data.y.view(data.num_graphs, -1).to(device)
            loss = criterion(output, target) #get loss based on criterion
            val_loss += loss.item() * data.num_graphs
            all_preds.extend(output.cpu().numpy())
            all_targets.extend(target.cpu().numpy())
    val_loss /= len(val_loader.dataset) #compute validation loss
    val_rmse = val_loss ** 0.5

    scheduler.step()

    # Compute R^2
    all_preds = np.array(all_preds).flatten()
    all_targets = np.array(all_targets).flatten()
    r2 = r2_score(all_targets, all_preds)

    # Compute 95% Confidence Interval for RMSE
    confidence = 0.95
    squared_errors = (all_preds - all_targets) ** 2
    mean_se = np.mean(squared_errors)
    se = stats.sem(squared_errors)
    interval = stats.t.interval(confidence, len(squared_errors)-1, loc=mean_se, scale=se)
    ci_lower, ci_upper = np.sqrt(interval[0]), np.sqrt(interval[1])

    print(f"Epoch: {epoch}, Train Loss: {train_loss:.4f}, Val RMSE: {val_rmse:.4f}, R²: {r2:.4f}, CI (95%):[{ci_lower:.4f}, {ci_upper:.4f}]")

Epoch: 1, Train Loss: 90.8583, Val RMSE: 1.0416, R²: -0.2660, CI (95%):[0.9052, 1.1620]
Epoch: 2, Train Loss: 7.4894, Val RMSE: 0.7955, R²: 0.2615, CI (95%):[0.7568, 0.8324]
Epoch: 3, Train Loss: 1.8274, Val RMSE: 0.7011, R²: 0.4263, CI (95%):[0.6670, 0.7336]
Epoch: 4, Train Loss: 1.1954, Val RMSE: 0.7213, R²: 0.3929, CI (95%):[0.6876, 0.7535]
Epoch: 5, Train Loss: 0.9093, Val RMSE: 0.6845, R²: 0.4532, CI (95%):[0.6502, 0.7171]
Epoch: 6, Train Loss: 0.7990, Val RMSE: 0.7235, R²: 0.3891, CI (95%):[0.6910, 0.7546]
Epoch: 7, Train Loss: 0.7290, Val RMSE: 0.6741, R²: 0.4696, CI (95%):[0.6425, 0.7044]
Epoch: 8, Train Loss: 0.6761, Val RMSE: 0.6970, R²: 0.4331, CI (95%):[0.6655, 0.7271]
Epoch: 9, Train Loss: 0.6662, Val RMSE: 0.6847, R²: 0.4528, CI (95%):[0.6531, 0.7150]
Epoch: 10, Train Loss: 0.6250, Val RMSE: 0.6652, R²: 0.4836, CI (95%):[0.6346, 0.6944]
Epoch: 11, Train Loss: 0.6079, Val RMSE: 0.6398, R²: 0.5223, CI (95%):[0.6090, 0.6692]
Epoch: 12, Train Loss: 0.5917, Val RMSE: 0.6511, R

In [None]:
# Testing
model.eval()
test_loss = 0
all_preds, all_targets = [], []

with torch.no_grad():
    for data in test_loader:
        data = data.to(device)
        output = model(data)
        target = data.y.view(data.num_graphs, -1).to(device)
        loss = criterion(output, target)
        test_loss += loss.item() * data.num_graphs
        all_preds.extend(output.cpu().numpy())
        all_targets.extend(target.cpu().numpy())
test_loss /= len(test_loader.dataset)
test_rmse = test_loss ** 0.5

# Compute R^2
all_preds = np.array(all_preds).flatten()
all_targets = np.array(all_targets).flatten()
r2 = r2_score(all_targets, all_preds)

# Compute 95% Confidence Interval for RMSE
confidence = 0.95
squared_errors = (all_preds - all_targets) ** 2
mean_se = np.mean(squared_errors)
se = stats.sem(squared_errors)
interval = stats.t.interval(confidence, len(squared_errors)-1, loc=mean_se, scale=se)
ci_lower, ci_upper = np.sqrt(interval[0]), np.sqrt(interval[1])

print(f"Test RMSE: {test_rmse:.4f}, R²: {r2:.4f}, CI (95%): [{ci_lower:.4f}, {ci_upper:.4f}]")


Test RMSE: 0.4534, R²: 0.6914, CI (95%): [0.4285, 0.4771]


Ok yeah! This one performs the best and has the highest R^2. Nice.

Now I will implement a Bayesian sweep for hyperparam search using WandB

In [None]:
import wandb
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mraiyann-j[0m ([33mraiyann-j-university-of-toronto[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [None]:
from gcn_change4 import GCN

sweep_config = {
    'method': 'bayes',
    'metric': {
        'name': 'val_rmse',
        'goal': 'minimize'
    },
    'parameters': {
        'epochs': {
            'values': [40, 50, 60]
        },
        'lr': {
            'min': 0.0008,
            'max': 0.0012,
            'distribution': 'uniform'
        },
        'weight_decay': {
            'min': 0.005,
            'max': 0.015,
            'distribution': 'uniform'
        }
    }
}
sweep_id = wandb.sweep(sweep_config, project="GCN_Sweep")


Create sweep with ID: y5dau394
Sweep URL: https://wandb.ai/raiyann-j-university-of-toronto/GCN_Sweep/sweeps/y5dau394


In [None]:
def train():
    wandb.init()
    config = wandb.config

    # Set random seed for reproducibility
    torch.manual_seed(42)

    num_node = train_graph_list[0].x.shape[1]
    edge_attr = train_graph_list[0].edge_attr.shape[1]
    u_d = train_graph_list[0].u.shape[1]

    model = GCN(num_node_features=num_node,
                edge_attr_dim=edge_attr,
                u_dim=u_d,
                hidden_dim=64,
                output_dim=1).to(device)

    criterion = nn.MSELoss()
    optimizer = optim.AdamW(model.parameters(), lr=config.lr, weight_decay=config.weight_decay)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=50, eta_min=1e-6)

    num_epochs = config.epochs
    for epoch in range(1, num_epochs + 1):
        model.train()
        train_loss = 0
        for data in train_loader:
            data = data.to(device)
            optimizer.zero_grad()
            output = model(data)
            target = data.y.view(data.num_graphs, -1).to(device)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * data.num_graphs
        train_loss /= len(train_loader.dataset)

        # Validation
        model.eval()
        all_preds, all_targets = [], []
        val_loss = 0
        with torch.no_grad():
            for data in val_loader:
                data = data.to(device)
                output = model(data)
                target = data.y.view(data.num_graphs, -1).to(device)
                loss = criterion(output, target)
                val_loss += loss.item() * data.num_graphs
                all_preds.extend(output.cpu().numpy())
                all_targets.extend(target.cpu().numpy())
        val_loss /= len(val_loader.dataset)
        val_rmse = val_loss ** 0.5

        # Compute R² score
        all_preds = np.array(all_preds).flatten()
        all_targets = np.array(all_targets).flatten()
        r2 = r2_score(all_targets, all_preds)

        wandb.log({
            "epoch": epoch,
            "train_loss": train_loss,
            "val_rmse": val_rmse,
            "r2": r2
        }, step=epoch)

        scheduler.step()
        print(f"Epoch: {epoch}, Train Loss: {train_loss:.4f}, Val RMSE: {val_rmse:.4f}, R²: {r2:.4f}")

    wandb.finish()

In [17]:
wandb.agent(sweep_id, train, count=10)

[34m[1mwandb[0m: Agent Starting Run: htl877ui with config:
[34m[1mwandb[0m: 	epochs: 60
[34m[1mwandb[0m: 	lr: 0.000886187460885644
[34m[1mwandb[0m: 	weight_decay: 0.012323978864357


Epoch: 1, Train Loss: 100.7067, Val RMSE: 1.0571, R²: -0.3042
Epoch: 2, Train Loss: 9.7262, Val RMSE: 0.8940, R²: 0.0672
Epoch: 3, Train Loss: 3.1292, Val RMSE: 0.8337, R²: 0.1888
Epoch: 4, Train Loss: 1.4636, Val RMSE: 0.6743, R²: 0.4694
Epoch: 5, Train Loss: 1.0500, Val RMSE: 0.6816, R²: 0.4578
Epoch: 6, Train Loss: 0.8153, Val RMSE: 0.6853, R²: 0.4519
Epoch: 7, Train Loss: 0.7479, Val RMSE: 0.6579, R²: 0.4948
Epoch: 8, Train Loss: 0.6793, Val RMSE: 0.6374, R²: 0.5259
Epoch: 9, Train Loss: 0.6625, Val RMSE: 0.6767, R²: 0.4656
Epoch: 10, Train Loss: 0.6221, Val RMSE: 0.6362, R²: 0.5276
Epoch: 11, Train Loss: 0.6112, Val RMSE: 0.6194, R²: 0.5523
Epoch: 12, Train Loss: 0.6048, Val RMSE: 0.6241, R²: 0.5454
Epoch: 13, Train Loss: 0.5979, Val RMSE: 0.6498, R²: 0.5073
Epoch: 14, Train Loss: 0.6037, Val RMSE: 0.5954, R²: 0.5862
Epoch: 15, Train Loss: 0.5841, Val RMSE: 0.5782, R²: 0.6099
Epoch: 16, Train Loss: 0.5975, Val RMSE: 0.6333, R²: 0.5319
Epoch: 17, Train Loss: 0.5633, Val RMSE: 0.563

0,1
epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
r2,▁▄▄▆▆▆▇▆▇▇▇▇▇▇▇█████████████████████████
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▆▅▃▃▃▃▃▃▂▃▂▂▂▂▂▂▁▂▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
r2,0.69703
train_loss,0.40586
val_rmse,0.50952


[34m[1mwandb[0m: Agent Starting Run: khs9lsrm with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	lr: 0.0011041599374830807
[34m[1mwandb[0m: 	weight_decay: 0.011056832182907636


Epoch: 1, Train Loss: 83.4974, Val RMSE: 1.0425, R²: -0.2683
Epoch: 2, Train Loss: 6.3893, Val RMSE: 0.9072, R²: 0.0394
Epoch: 3, Train Loss: 1.4730, Val RMSE: 0.7037, R²: 0.4221
Epoch: 4, Train Loss: 1.0636, Val RMSE: 0.7027, R²: 0.4238
Epoch: 5, Train Loss: 0.7913, Val RMSE: 0.6960, R²: 0.4347
Epoch: 6, Train Loss: 0.7368, Val RMSE: 0.7141, R²: 0.4048
Epoch: 7, Train Loss: 0.6983, Val RMSE: 0.6664, R²: 0.4818
Epoch: 8, Train Loss: 0.7014, Val RMSE: 0.6492, R²: 0.5081
Epoch: 9, Train Loss: 0.6396, Val RMSE: 0.6850, R²: 0.4524
Epoch: 10, Train Loss: 0.6245, Val RMSE: 0.7150, R²: 0.4034
Epoch: 11, Train Loss: 0.6481, Val RMSE: 0.6677, R²: 0.4798
Epoch: 12, Train Loss: 0.6046, Val RMSE: 0.7502, R²: 0.3432
Epoch: 13, Train Loss: 0.6023, Val RMSE: 0.6522, R²: 0.5035
Epoch: 14, Train Loss: 0.5706, Val RMSE: 0.6339, R²: 0.5311
Epoch: 15, Train Loss: 0.5784, Val RMSE: 0.6032, R²: 0.5754
Epoch: 16, Train Loss: 0.5624, Val RMSE: 0.6027, R²: 0.5760
Epoch: 17, Train Loss: 0.5422, Val RMSE: 0.6020

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
r2,▁▃▆▆▆▇▆▆▇▆▇▇▇▇▇▇▇▇▇▇▇█▇█████████████████
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▆▃▃▃▃▃▄▃▄▂▂▂▂▃▂▂▂▂▂▂▂▂▁▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,50.0
r2,0.67318
train_loss,0.42406
val_rmse,0.5292


[34m[1mwandb[0m: Agent Starting Run: 3f9pd5r6 with config:
[34m[1mwandb[0m: 	epochs: 60
[34m[1mwandb[0m: 	lr: 0.0008868488378563112
[34m[1mwandb[0m: 	weight_decay: 0.012100751850523676


Epoch: 1, Train Loss: 100.7375, Val RMSE: 1.0564, R²: -0.3025
Epoch: 2, Train Loss: 9.6488, Val RMSE: 0.9110, R²: 0.0315
Epoch: 3, Train Loss: 2.9945, Val RMSE: 0.8296, R²: 0.1969
Epoch: 4, Train Loss: 1.4274, Val RMSE: 0.6907, R²: 0.4433
Epoch: 5, Train Loss: 1.0221, Val RMSE: 0.6827, R²: 0.4561
Epoch: 6, Train Loss: 0.8210, Val RMSE: 0.7086, R²: 0.4140
Epoch: 7, Train Loss: 0.7968, Val RMSE: 0.6634, R²: 0.4864
Epoch: 8, Train Loss: 0.6960, Val RMSE: 0.6463, R²: 0.5126
Epoch: 9, Train Loss: 0.6616, Val RMSE: 0.6336, R²: 0.5315
Epoch: 10, Train Loss: 0.6324, Val RMSE: 0.6539, R²: 0.5010
Epoch: 11, Train Loss: 0.6312, Val RMSE: 0.6265, R²: 0.5419
Epoch: 12, Train Loss: 0.5885, Val RMSE: 0.6042, R²: 0.5739
Epoch: 13, Train Loss: 0.5757, Val RMSE: 0.6079, R²: 0.5687
Epoch: 14, Train Loss: 0.5815, Val RMSE: 0.6194, R²: 0.5522
Epoch: 15, Train Loss: 0.5575, Val RMSE: 0.5819, R²: 0.6049
Epoch: 16, Train Loss: 0.5425, Val RMSE: 0.5709, R²: 0.6196
Epoch: 17, Train Loss: 0.5325, Val RMSE: 0.571

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇███
r2,▁▃▅▅▆▆▆▇▆▇▆▇▇▇▇▇██▇▇████████████████████
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▆▅▄▃▃▃▃▂▂▂▂▂▃▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
r2,0.70486
train_loss,0.41213
val_rmse,0.50289


[34m[1mwandb[0m: Agent Starting Run: a5foxdq7 with config:
[34m[1mwandb[0m: 	epochs: 40
[34m[1mwandb[0m: 	lr: 0.001165414220608254
[34m[1mwandb[0m: 	weight_decay: 0.014628062036721111


Epoch: 1, Train Loss: 79.6112, Val RMSE: 1.0137, R²: -0.1992
Epoch: 2, Train Loss: 5.7578, Val RMSE: 0.8035, R²: 0.2465
Epoch: 3, Train Loss: 1.2716, Val RMSE: 0.6811, R²: 0.4587
Epoch: 4, Train Loss: 1.0293, Val RMSE: 0.6743, R²: 0.4694
Epoch: 5, Train Loss: 0.7446, Val RMSE: 0.6636, R²: 0.4862
Epoch: 6, Train Loss: 0.7092, Val RMSE: 0.6978, R²: 0.4317
Epoch: 7, Train Loss: 0.7020, Val RMSE: 0.6848, R²: 0.4527
Epoch: 8, Train Loss: 0.6985, Val RMSE: 0.6309, R²: 0.5355
Epoch: 9, Train Loss: 0.6344, Val RMSE: 0.7025, R²: 0.4241
Epoch: 10, Train Loss: 0.5948, Val RMSE: 0.6499, R²: 0.5071
Epoch: 11, Train Loss: 0.5897, Val RMSE: 0.6345, R²: 0.5301
Epoch: 12, Train Loss: 0.5724, Val RMSE: 0.6269, R²: 0.5414
Epoch: 13, Train Loss: 0.5645, Val RMSE: 0.6086, R²: 0.5677
Epoch: 14, Train Loss: 0.5533, Val RMSE: 0.6276, R²: 0.5403
Epoch: 15, Train Loss: 0.5664, Val RMSE: 0.5732, R²: 0.6166
Epoch: 16, Train Loss: 0.5529, Val RMSE: 0.5687, R²: 0.6226
Epoch: 17, Train Loss: 0.5310, Val RMSE: 0.5712

0,1
epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
r2,▁▅▆▆▆▆▆▇▆▇▇▇▇▇▇█▇▇▇▇▇▇▇▇█▇▇▇██▇█████████
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▅▃▃▃▄▃▃▄▃▃▃▂▃▂▂▂▃▂▂▂▂▂▂▂▂▂▂▁▁▂▂▂▂▁▁▁▁▁▁

0,1
epoch,40.0
r2,0.68451
train_loss,0.44511
val_rmse,0.51994


[34m[1mwandb[0m: Agent Starting Run: h85hyjnj with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	lr: 0.0010106416869554236
[34m[1mwandb[0m: 	weight_decay: 0.00675403800099531


Epoch: 1, Train Loss: 90.0357, Val RMSE: 1.0265, R²: -0.2297
Epoch: 2, Train Loss: 7.0563, Val RMSE: 0.8913, R²: 0.0728
Epoch: 3, Train Loss: 2.0408, Val RMSE: 0.8367, R²: 0.1829
Epoch: 4, Train Loss: 1.2390, Val RMSE: 0.7518, R²: 0.3403
Epoch: 5, Train Loss: 1.0122, Val RMSE: 0.7247, R²: 0.3871
Epoch: 6, Train Loss: 0.8066, Val RMSE: 0.7432, R²: 0.3553
Epoch: 7, Train Loss: 0.7386, Val RMSE: 0.6858, R²: 0.4511
Epoch: 8, Train Loss: 0.7419, Val RMSE: 0.7157, R²: 0.4022
Epoch: 9, Train Loss: 0.6894, Val RMSE: 0.7025, R²: 0.4241
Epoch: 10, Train Loss: 0.6539, Val RMSE: 0.7233, R²: 0.3895
Epoch: 11, Train Loss: 0.6319, Val RMSE: 0.6719, R²: 0.4731
Epoch: 12, Train Loss: 0.6207, Val RMSE: 0.6590, R²: 0.4931
Epoch: 13, Train Loss: 0.6161, Val RMSE: 0.6330, R²: 0.5324
Epoch: 14, Train Loss: 0.6103, Val RMSE: 0.6815, R²: 0.4580
Epoch: 15, Train Loss: 0.6014, Val RMSE: 0.6370, R²: 0.5264
Epoch: 16, Train Loss: 0.5860, Val RMSE: 0.5979, R²: 0.5828
Epoch: 17, Train Loss: 0.5836, Val RMSE: 0.6265

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
r2,▁▃▄▅▅▆▆▆▆▇▆▇▇▇▆▇▇▇▇▇█▇██▇███████████████
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▅▄▄▄▄▄▄▃▃▃▃▂▃▃▃▂▂▂▂▂▂▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,50.0
r2,0.66741
train_loss,0.42529
val_rmse,0.53385


[34m[1mwandb[0m: Agent Starting Run: 191byxkq with config:
[34m[1mwandb[0m: 	epochs: 60
[34m[1mwandb[0m: 	lr: 0.0009105444714952368
[34m[1mwandb[0m: 	weight_decay: 0.012601351515500445


Epoch: 1, Train Loss: 98.4241, Val RMSE: 1.0475, R²: -0.2805
Epoch: 2, Train Loss: 9.1362, Val RMSE: 0.9007, R²: 0.0533
Epoch: 3, Train Loss: 2.5265, Val RMSE: 0.6996, R²: 0.4288
Epoch: 4, Train Loss: 1.3577, Val RMSE: 0.7105, R²: 0.4109
Epoch: 5, Train Loss: 0.9751, Val RMSE: 0.6745, R²: 0.4691
Epoch: 6, Train Loss: 0.8198, Val RMSE: 0.7158, R²: 0.4021
Epoch: 7, Train Loss: 0.6983, Val RMSE: 0.6532, R²: 0.5021
Epoch: 8, Train Loss: 0.6728, Val RMSE: 0.6684, R²: 0.4786
Epoch: 9, Train Loss: 0.6488, Val RMSE: 0.6676, R²: 0.4799
Epoch: 10, Train Loss: 0.6056, Val RMSE: 0.6325, R²: 0.5331
Epoch: 11, Train Loss: 0.5995, Val RMSE: 0.6261, R²: 0.5425
Epoch: 12, Train Loss: 0.5774, Val RMSE: 0.6299, R²: 0.5370
Epoch: 13, Train Loss: 0.5794, Val RMSE: 0.6180, R²: 0.5543
Epoch: 14, Train Loss: 0.5749, Val RMSE: 0.5929, R²: 0.5898
Epoch: 15, Train Loss: 0.5565, Val RMSE: 0.5818, R²: 0.6050
Epoch: 16, Train Loss: 0.5496, Val RMSE: 0.5747, R²: 0.6145
Epoch: 17, Train Loss: 0.5268, Val RMSE: 0.5671

0,1
epoch,▁▁▁▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇███
r2,▁▁▂▃▃▄▄▄▅▆▄▅▆▆▇▇▇▆▇▇█▇▇▇████████████████
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▄▄▄▃▃▃▃▂▂▂▂▃▂▂▂▂▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
r2,0.70405
train_loss,0.39464
val_rmse,0.50358


[34m[1mwandb[0m: Agent Starting Run: 2y3yytt5 with config:
[34m[1mwandb[0m: 	epochs: 60
[34m[1mwandb[0m: 	lr: 0.0008935532131641973
[34m[1mwandb[0m: 	weight_decay: 0.006955968863499973


Epoch: 1, Train Loss: 100.1350, Val RMSE: 1.0431, R²: -0.2697
Epoch: 2, Train Loss: 9.1208, Val RMSE: 0.8993, R²: 0.0563
Epoch: 3, Train Loss: 2.8384, Val RMSE: 0.6910, R²: 0.4428
Epoch: 4, Train Loss: 1.4413, Val RMSE: 0.7158, R²: 0.4021
Epoch: 5, Train Loss: 1.0858, Val RMSE: 0.6777, R²: 0.4640
Epoch: 6, Train Loss: 0.8368, Val RMSE: 0.7268, R²: 0.3835
Epoch: 7, Train Loss: 0.7433, Val RMSE: 0.6463, R²: 0.5126
Epoch: 8, Train Loss: 0.6762, Val RMSE: 0.6665, R²: 0.4816
Epoch: 9, Train Loss: 0.6606, Val RMSE: 0.6773, R²: 0.4647
Epoch: 10, Train Loss: 0.6161, Val RMSE: 0.6828, R²: 0.4559
Epoch: 11, Train Loss: 0.6082, Val RMSE: 0.6355, R²: 0.5287
Epoch: 12, Train Loss: 0.5769, Val RMSE: 0.6395, R²: 0.5228
Epoch: 13, Train Loss: 0.5691, Val RMSE: 0.6337, R²: 0.5314
Epoch: 14, Train Loss: 0.5761, Val RMSE: 0.6206, R²: 0.5506
Epoch: 15, Train Loss: 0.5578, Val RMSE: 0.5972, R²: 0.5838
Epoch: 16, Train Loss: 0.5612, Val RMSE: 0.6063, R²: 0.5710
Epoch: 17, Train Loss: 0.5241, Val RMSE: 0.562

0,1
epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇████
r2,▁▆▆▇▆▆▇▇▇▇▇▇▇▇█▇▇▇██████████████████████
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▄▅▄▅▄▄▃▄▃▃▂▃▃▂▂▃▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,60.0
r2,0.70799
train_loss,0.39077
val_rmse,0.50022


[34m[1mwandb[0m: Agent Starting Run: nlnclbeb with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	lr: 0.001057138036105804
[34m[1mwandb[0m: 	weight_decay: 0.01478506977883988


Epoch: 1, Train Loss: 86.5968, Val RMSE: 1.0302, R²: -0.2385
Epoch: 2, Train Loss: 6.8707, Val RMSE: 0.9013, R²: 0.0520
Epoch: 3, Train Loss: 1.6507, Val RMSE: 0.7339, R²: 0.3714
Epoch: 4, Train Loss: 1.0812, Val RMSE: 0.7082, R²: 0.4147
Epoch: 5, Train Loss: 0.8512, Val RMSE: 0.6842, R²: 0.4536
Epoch: 6, Train Loss: 0.7687, Val RMSE: 0.7121, R²: 0.4082
Epoch: 7, Train Loss: 0.7012, Val RMSE: 0.6571, R²: 0.4961
Epoch: 8, Train Loss: 0.6706, Val RMSE: 0.6738, R²: 0.4702
Epoch: 9, Train Loss: 0.6384, Val RMSE: 0.6640, R²: 0.4855
Epoch: 10, Train Loss: 0.6093, Val RMSE: 0.6566, R²: 0.4969
Epoch: 11, Train Loss: 0.5944, Val RMSE: 0.6172, R²: 0.5555
Epoch: 12, Train Loss: 0.5781, Val RMSE: 0.6290, R²: 0.5383
Epoch: 13, Train Loss: 0.5712, Val RMSE: 0.6266, R²: 0.5418
Epoch: 14, Train Loss: 0.5575, Val RMSE: 0.5817, R²: 0.6051
Epoch: 15, Train Loss: 0.5681, Val RMSE: 0.5878, R²: 0.5968
Epoch: 16, Train Loss: 0.5588, Val RMSE: 0.6113, R²: 0.5639
Epoch: 17, Train Loss: 0.5418, Val RMSE: 0.5559

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
r2,▁▃▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇██▇███████████████████
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▄▄▄▄▃▃▃▃▃▂▃▂▃▂▂▂▂▂▁▁▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,50.0
r2,0.70365
train_loss,0.40281
val_rmse,0.50392


[34m[1mwandb[0m: Agent Starting Run: mkkc11g4 with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	lr: 0.0010890883200160078
[34m[1mwandb[0m: 	weight_decay: 0.00797417825416806


Epoch: 1, Train Loss: 84.4610, Val RMSE: 1.0149, R²: -0.2020
Epoch: 2, Train Loss: 6.2977, Val RMSE: 0.9085, R²: 0.0369
Epoch: 3, Train Loss: 1.5649, Val RMSE: 0.7255, R²: 0.3857
Epoch: 4, Train Loss: 1.0519, Val RMSE: 0.6868, R²: 0.4496
Epoch: 5, Train Loss: 0.7885, Val RMSE: 0.6789, R²: 0.4621
Epoch: 6, Train Loss: 0.6966, Val RMSE: 0.6869, R²: 0.4494
Epoch: 7, Train Loss: 0.6788, Val RMSE: 0.6459, R²: 0.5132
Epoch: 8, Train Loss: 0.6608, Val RMSE: 0.6756, R²: 0.4673
Epoch: 9, Train Loss: 0.6251, Val RMSE: 0.6966, R²: 0.4337
Epoch: 10, Train Loss: 0.6162, Val RMSE: 0.7057, R²: 0.4189
Epoch: 11, Train Loss: 0.6333, Val RMSE: 0.6497, R²: 0.5074
Epoch: 12, Train Loss: 0.5888, Val RMSE: 0.6481, R²: 0.5098
Epoch: 13, Train Loss: 0.5796, Val RMSE: 0.6333, R²: 0.5320
Epoch: 14, Train Loss: 0.5605, Val RMSE: 0.6187, R²: 0.5533
Epoch: 15, Train Loss: 0.5683, Val RMSE: 0.5937, R²: 0.5886
Epoch: 16, Train Loss: 0.5687, Val RMSE: 0.6035, R²: 0.5750
Epoch: 17, Train Loss: 0.5398, Val RMSE: 0.5679

0,1
epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
r2,▁▃▆▆▆▇▆▆▇▇▇▇▇▇▇▇▇█▇██▇██▇███████████████
train_loss,█▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▇▄▃▃▃▃▄▃▃▂▂▂▂▃▂▂▂▂▂▂▁▁▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,50.0
r2,0.68749
train_loss,0.42413
val_rmse,0.51748


[34m[1mwandb[0m: Agent Starting Run: zdfxdg5v with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	lr: 0.00080466220371861
[34m[1mwandb[0m: 	weight_decay: 0.01085875929346324


Epoch: 1, Train Loss: 109.5848, Val RMSE: 1.1160, R²: -0.4533
Epoch: 2, Train Loss: 12.1983, Val RMSE: 0.8232, R²: 0.2092
Epoch: 3, Train Loss: 3.8030, Val RMSE: 0.7294, R²: 0.3791
Epoch: 4, Train Loss: 1.6322, Val RMSE: 0.7002, R²: 0.4279
Epoch: 5, Train Loss: 1.0300, Val RMSE: 0.6996, R²: 0.4288
Epoch: 6, Train Loss: 0.8698, Val RMSE: 0.7303, R²: 0.3777
Epoch: 7, Train Loss: 0.7946, Val RMSE: 0.6731, R²: 0.4712
Epoch: 8, Train Loss: 0.7541, Val RMSE: 0.6934, R²: 0.4390
Epoch: 9, Train Loss: 0.8094, Val RMSE: 0.6802, R²: 0.4601
Epoch: 10, Train Loss: 0.6576, Val RMSE: 0.6406, R²: 0.5211
Epoch: 11, Train Loss: 0.6221, Val RMSE: 0.6317, R²: 0.5343
Epoch: 12, Train Loss: 0.5980, Val RMSE: 0.6204, R²: 0.5508
Epoch: 13, Train Loss: 0.6017, Val RMSE: 0.6316, R²: 0.5344
Epoch: 14, Train Loss: 0.5983, Val RMSE: 0.6021, R²: 0.5770
Epoch: 15, Train Loss: 0.5811, Val RMSE: 0.5698, R²: 0.6211
Epoch: 16, Train Loss: 0.5922, Val RMSE: 0.5693, R²: 0.6218
Epoch: 17, Train Loss: 0.5977, Val RMSE: 0.61

0,1
epoch,▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
r2,▁▅▆▆▆▇▆▆▇▇▇▇▇▇▇▇▇██▇████████████████████
train_loss,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
val_rmse,█▅▄▃▃▃▃▃▃▃▃▂▂▂▃▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,50.0
r2,0.70058
train_loss,0.42586
val_rmse,0.50653
