In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch_geometric.data import DataLoader
from sklearn.metrics import (
    confusion_matrix, f1_score, accuracy_score, 
    precision_score, recall_score, roc_auc_score
)
import numpy as np
from tqdm import tqdm
from dataset import MoleculeDataset
from model import MoleculeNet

import mlflow.pytorch
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

In [16]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Device selected:", device)

mlflow.set_tracking_uri("http://localhost:5000")

Device selected: cpu


In [17]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def train_one_epoch(epoch, model, train_loader, optimizer, loss_fn):
    all_preds = []
    all_labels = []
    running_loss = 0.0
    step = 0

    for _, batch in enumerate(tqdm(train_loader)):
        batch.to(device)
        optimizer.zero_grad()
        pred = model(batch.x.float(),
                     batch.edge_attr.float(),
                     batch.edge_index,
                     batch.batch)
        # Calculating the loss and gradients
        loss = loss_fn(torch.squeeze(pred), batch.y.float())
        loss.backward()
        optimizer.step()
        # Update tracking
        running_loss += loss.item()
        step += 1
        all_preds.append(np.rint(torch.sigmoid(pred).cpu().detach().numpy()))
        all_labels.append(batch.y.cpu().detach().numpy())
    
    all_preds = np.concatenate(all_preds).ravel()
    all_labels = np.concatenate(all_labels).ravel()

    calculate_metrics(all_preds, all_labels, epoch, "train")
    return running_loss / step

def test(epoch, model, test_loader, loss_fn):
    all_preds = []
    all_preds_raw = []
    all_labels = []
    running_loss = 0.0
    step = 0
    for batch in test_loader:
        batch.to(device)  
        pred = model(batch.x.float(), 
                        batch.edge_attr.float(),
                        batch.edge_index, 
                        batch.batch) 
        loss = loss_fn(torch.squeeze(pred), batch.y.float())

         # Update tracking
        running_loss += loss.item()
        step += 1
        all_preds.append(np.rint(torch.sigmoid(pred).cpu().detach().numpy()))
        all_preds_raw.append(torch.sigmoid(pred).cpu().detach().numpy())
        all_labels.append(batch.y.cpu().detach().numpy())
    
    all_preds = np.concatenate(all_preds).ravel()
    all_labels = np.concatenate(all_labels).ravel()
    print(all_preds_raw[0][:10])
    print(all_preds[:10])
    print(all_labels[:10])
    calculate_metrics(all_preds, all_labels, epoch, "test")
    log_conf_matrix(all_preds, all_labels, epoch)
    return running_loss/step

def log_conf_matrix(y_pred, y_true, epoch):
    # Log confusion matrix as image
    cm = confusion_matrix(y_pred, y_true)
    classes = ["0", "1"]
    df_cfm = pd.DataFrame(cm, index = classes, columns = classes)
    plt.figure(figsize = (10,7))
    cfm_plot = sns.heatmap(df_cfm, annot=True, cmap='Blues', fmt='g')
    cfm_plot.figure.savefig(f'data/images/cm_{epoch}.png')
    mlflow.log_artifact(f"data/images/cm_{epoch}.png")

def calculate_metrics(y_pred, y_true, epoch, type):
    print("\nConfusion matrix:\n", confusion_matrix(y_true, y_pred))
    print("\nF1 score:", f1_score(y_true, y_pred))
    print("\nAccuracy score:", accuracy_score(y_true, y_pred))

    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    print(f"Precision score:", precision)
    print(f"Recall score:", recall)

    mlflow.log_metric(key=f"Precision-{type}", value=float(precision), step=epoch)
    mlflow.log_metric(key=f"Recall-{type}", value=float(recall), step=epoch)

    try:
        roc = roc_auc_score(y_true, y_pred)
        print("ROC AUC:", roc)
        mlflow.log_metric(key=f"ROC-AUC-{type}", value=float(roc), step=epoch)
    except:
        mlflow.log_metric(key=f"ROC-AUC-{type}", value=0.0, step=epoch)
        print("[Exception] ROC AUC: Not Defined")

In [18]:
from mango import Tuner, scheduler
from config import HYPERPARAMETERS, BEST_PARAMETERS, SIGNATURE

In [19]:
def run_one_training(params):
    params = params[0]
    with mlflow.start_run() as run:
        for key in params.keys():
            mlflow.log_param(key, params[key])
        
        print("Loading dataset...")
        train_dataset = MoleculeDataset(root="../data/", filename="HIV_train.csv")
        test_dataset = MoleculeDataset(root="../data/", filename="HIV_test.csv", test=True)
        params["model_edge_dim"] = train_dataset[0].edge_attr.shape[1]

        train_loader = DataLoader(train_dataset, batch_size=params["batch_size"], shuffle=True)
        test_loader = DataLoader(test_dataset, batch_size=params["batch_size"], shuffle=True)

        print("Loading model...")
        model_params = {k: v for k, v in params.items() if k.startswith("model_")}
        model = MoleculeNet(feature_size=train_dataset[0].x.shape[1], model_params=model_params)
        model = model.to(device)
        print("Number of parameters:", count_parameters(model))
        mlflow.log_param("num_params", count_parameters(model))

        # < 1 increases precision, > 1 recall
        weight = torch.tensor([params["pos_weight"]], dtype=torch.float32).to(device)
        loss_fn = nn.BCEWithLogitsLoss(pos_weight=weight)
        optimizer = optim.SGD(model.parameters(), lr=params["learning_rate"], momentum=params["sgd_momentum"], weight_decay=params["weight_decay"])
        scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=params["scheduler_gamma"])

        # Start training
        best_loss = 1000
        early_stopping_counter = 0
        for epoch in range(300):
            if early_stopping_counter <= 10:
                # Training
                model.train()
                loss = train_one_epoch(epoch, model, train_loader, optimizer, loss_fn)
                print(f"Epoch {epoch+1} | Train loss: {loss}")
                mlflow.log_metric(key="Train loss", value=float(loss), step=epoch+1)

                # Validation
                model.eval()
                if epoch % 5 == 0:
                    loss = test(epoch, model, test_loader, loss_fn)
                    print(f"Epoch {epoch+1} | Validation loss: {loss}")
                    mlflow.log_metric(key="Validation loss", value=float(loss), step=epoch+1)

                    if float(loss) < best_loss:
                        best_loss = loss
                        mlflow.pytorch.log_model(model, "model", signature=SIGNATURE)
                        early_stopping_counter = 0
                    else:
                        early_stopping_counter += 1
                
                scheduler.step()

            else:
                print("Early stopping due to no improvement.")
                return [best_loss]
    
    print("Finishing training with best validation loss:", best_loss)
    return [best_loss]

In [21]:
print("Running Hyperparameter search: ")
config = dict()
config["optimizer"] = "Bayesian"
config["num_iteration"] = 100

tuner = Tuner(HYPERPARAMETERS, objective=run_one_training, conf_dict=config)
results = tuner.minimize()

2024/09/14 23:29:42 INFO mlflow.tracking._tracking_service.client: 🏃 View run grandiose-snake-169 at: http://localhost:5000/#/experiments/0/runs/cb56466dbdfa488b971468df6ec895c4.
2024/09/14 23:29:42 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5000/#/experiments/0.


Running Hyperparameter search: 
Loading dataset...


NotImplementedError: 