In [1]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, TensorDataset
from torch_geometric.data import DataLoader as GeometricDataLoader
from sklearn.model_selection import KFold
from math import sqrt

import os
import sys
import nbimporter

project_root = os.path.join(os.getcwd(), '..')
sys.path.append(project_root)

from datapreparation.Process_1D_data import *
from datapreparation.Process_graph_2d_data import *
from datapreparation.Process_graph_3d_data import *
from datapreparation.Process_mlp_data import *
from cv_strategies.train_cv_strategy_123D import get_data_label

In [2]:
def offline_test_model(model, criterion, model_path, test_loader, device, data_type):
    model.load_state_dict(torch.load(model_path))
    model.eval()
    model.to(device)
    
    TP = FP = TN = FN = 0

    with torch.no_grad():

        for data in test_loader:
            inputs, labels = get_data_label(data, data_type, device)
            if inputs is None or labels is None:
                raise ValueError(f"Unsupported data type: {data_type}")
                
            outputs = model(inputs)
            
            if isinstance(criterion, torch.nn.BCEWithLogitsLoss):
                labels = labels.float()
                labels = labels.unsqueeze(1)
                predicted = (torch.sigmoid(outputs) > 0.5).float()
            else:
                predicted = torch.max(outputs, 1)[1]
                
            TP += ((predicted == 1) & (labels == 1)).sum().item()
            TN += ((predicted == 0) & (labels == 0)).sum().item()
            FP += ((predicted == 1) & (labels == 0)).sum().item()
            FN += ((predicted == 0) & (labels == 1)).sum().item()
        
    ACC = (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0
    SEN = TP / (TP + FN) if (TP + FN) > 0 else 0
    SPE = TN / (TN + FP) if (TN + FP) > 0 else 0
    MCC = ((TP * TN) - (FP * FN)) / sqrt((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN)) if (TP + FP) * (TP + FN) * (TN + FP) * (TN + FN) > 0 else 0
    
    return ACC, SEN, SPE, MCC


# Offline test for 1D 2D 3D data

In [3]:
def integrated_test_model(offlinetest_file, data_type, model, criterion,device, batch_size, model_save_dir, tokenizer=None, atom_numbers=None, mode='test', Preprocess=None, scale_path=None):
    offlinetest_smiles_df = pd.read_csv(offlinetest_file)
    
    if data_type == '1d':
        offlinetest_smiles_list = offlinetest_smiles_df['SMILES'].tolist()
        offlinetest_labels = offlinetest_smiles_df['Label'].apply(lambda x: 1 if x == 'Positive' else 0).tolist()
        offlinetest_dataset = SMILESDataset(offlinetest_smiles_list, offlinetest_labels, tokenizer)
        offlinetest_loader = DataLoader(offlinetest_dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)
    else:
        offlinetest_smiles_list = offlinetest_smiles_df['SMILES'].values
        offlinetest_labels = offlinetest_smiles_df['Label'].values

    if data_type == '2d':
        graph_data_2d = preprocess_2d_graph_data(offlinetest_smiles_list, offlinetest_labels, atom_numbers)
        torch_graph_data_list = get_torch_graph_data_list(graph_data_2d, mode, Preprocess, scale_path)
        offlinetest_loader = GeometricDataLoader(torch_graph_data_list, batch_size=batch_size, shuffle=False)
    elif data_type == '3d':
        graph_data_3d = preprocess_3d_graph(offlinetest_smiles_list, offlinetest_labels, atom_numbers)
        torch_graph_data_list = get_torch_graph_data_list(graph_data_3d, mode, Preprocess, scale_path)
        offlinetest_loader = GeometricDataLoader(torch_graph_data_list, batch_size=batch_size, shuffle=False)
    elif data_type == '3d_voxels':
        graph_data_3d = preprocess_3d_graph(offlinetest_smiles_list, offlinetest_labels, atom_numbers)
        voxels,labels = get_voxels_labels(graph_data_3d)
        offlinetest_dataset = TensorDataset(voxels, labels)
        offlinetest_loader = DataLoader(offlinetest_dataset, batch_size=batch_size, shuffle=False)

    for model_file in os.listdir(model_save_dir):
        model_path = os.path.join(model_save_dir, model_file)
        ACC, SEN, SPE, MCC = offline_test_model(model, criterion,model_path, offlinetest_loader, device, data_type)
        print(f"Model: {model_file}, ACC: {ACC:.4f}, SEN: {SEN:.4f}, SPE: {SPE:.4f}, MCC: {MCC:.4f}")


# Offline test for machine leaning methods

In [4]:
import joblib
import numpy as np
from sklearn.metrics import accuracy_score, recall_score, matthews_corrcoef, confusion_matrix

def ML_testing(X_test, y_test, model_save_dir, scale_path=None, pca_path=None):
    if scale_path is not None:
        scaler = joblib.load(scale_path)
        X_test = scaler.transform(X_test)
    if pca_path is not None:
        pca = joblib.load(pca_path)
        X_test = pca.transform(X_test)

    total_accuracy = []
    total_sensitivity = []
    total_specificity = []
    total_mcc = []

    for model_file in os.listdir(model_save_dir):
        model_path = os.path.join(model_save_dir, model_file)
        model = joblib.load(model_path)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)
        sensitivity = recall_score(y_test, y_pred, pos_label=1)
        tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
        specificity = tn / (tn+fp)
        mcc = matthews_corrcoef(y_test, y_pred)
        print(f"Model {model_file}: ACC: {accuracy:.4f}, SEN: {sensitivity:.4f}, SPE: {specificity:.4f}, MCC: {mcc:.4f}")
        
        total_accuracy.append(accuracy)
        total_sensitivity.append(sensitivity)
        total_specificity.append(specificity)
        total_mcc.append(mcc)

    print(f"Average ACC: {np.mean(total_accuracy):.4f}, Average SEN: {np.mean(total_sensitivity):.4f}, "
          f"Average SPE: {np.mean(total_specificity):.4f}, Average MCC: {np.mean(total_mcc):.4f}")
