# **Load Libraries**

In [None]:
import os
import numpy as np
import pandas as pd
import oddt
from oddt.fingerprints import PLEC
from scipy import stats
from sklearn import preprocessing
import pickle
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.metrics import matthews_corrcoef, precision_recall_curve, accuracy_score, auc
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.neural_network import MLPClassifier
from sklearn.utils import parallel_backend
from xgboost.sklearn import XGBClassifier
from rdkit import Chem
from rdkit.Chem import AllChem
import deepchem as dc
from deepchem.utils import download_url, load_from_disk
from deepchem.utils.vina_utils import prepare_inputs
from deepchem.models import AtomicConvModel
from deepchem.feat import RdkitGridFeaturizer
from joblib import Parallel, delayed
from tqdm import tqdm
import glob
import tempfile

# **Load Data**

In [3]:
# training set true actives
plec_train_true_actives = pd.read_csv('Path_to_csv')
grid_train_true_actives = pd.read_csv('Path_to_csv')


# test sets true actives
plec_test_true_actives = pd.read_csv('Path_to_csv')
grid_test_true_actives = pd.read_csv('Path_to_csv')

# **Load Decoys**

In [None]:
# training set random_decoys
plec_train_random_decoys = pd.read_csv('Path_to_csv')
grid_train_random_decoys = pd.read_csv('Path_to_csv')


# test sets random_decoys
plec_test_random_decoys = pd.read_csv('Path_to_csv')
grid_test_random_decoys = pd.read_csv('Path_to_csv')



# training set deepcoy decoys
plec_train_deepcoy_decoys = pd.read_csv('Path_to_csv')
grid_train_deepcoy_decoys = pd.read_csv('Path_to_csv')


# test sets deepcoy decoys
plec_test_deepcoy_decoys = pd.read_csv('Path_to_csv')
grid_test_deepcoy_decoys = pd.read_csv('Path_to_csv')

# **Cross validation with DeepCoys in the training data**

In [104]:
plec_train = pd.concat([plec_train_true_actives,plec_train_random_decoys])
grid_train = pd.concat([grid_train_true_actives,grid_train_random_decoys])

In [105]:
# train
X_plec_train, y_plec_train = plec_train.drop(['class', 'potency','index'], axis= 1), plec_train['potency']
X_grid_train, y_grid_train = grid_train.drop(['class', 'potency'], axis= 1), grid_train['potency']

In [106]:
plec_train_reset = plec_train.reset_index(drop=True)
grid_train_reset = grid_train.reset_index(drop=True)

In [107]:
X_plec_train, y_plec_train = plec_train_reset.drop(['class', 'potency','index'], axis= 1), plec_train_reset['potency']
X_grid_train, y_grid_train = grid_train_reset.drop(['class', 'potency'], axis= 1), grid_train_reset['potency']

In [47]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, average_precision_score, precision_score, recall_score, matthews_corrcoef, mean_absolute_error, precision_recall_curve, f1_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor

def find_optimal_threshold(y_true, y_pred):
    precisions, recalls, thresholds = precision_recall_curve(y_true, y_pred)
    f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
    optimal_threshold = thresholds[np.argmax(f1_scores)]
    return optimal_threshold

def convert_to_binary_test(y, threshold=2):
    return (y > threshold).astype(int)
    
def convert_to_binary(y, threshold):
    return (y > threshold).astype(int)

# Initialize the KFold object
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Define the models in a dictionary
models = {
    'Random Forest': RandomForestRegressor(max_depth=3, max_features='log2', min_samples_leaf=1, min_samples_split=8, n_estimators=270, n_jobs=40),
    'XGBoost': XGBRegressor(learning_rate=0.01, max_depth=7, colsample_bytree=0.73, gamma=1.96, min_child_weight=8.0, subsample=0.71, n_estimators=150),
    'ANN': MLPRegressor(hidden_layer_sizes=50, activation='tanh', alpha=0.0070, learning_rate='invscaling', solver='sgd')
}

# Perform 5-fold cross-validation for each model
results = {}

for model_name, model in models.items():
    print(f"Evaluating {model_name}...")
    mae_scores, roc_auc_scores, pr_auc_scores, precision_scores, recall_scores, mcc_scores, avg_precision_scores, f1_scores, thresholds = [], [], [], [], [], [], [], [], []
    
    fold = 1
    for train_index, test_index in kf.split(X_plec_train):
        # Split the data into training and validation sets
        X_train, X_test = X_plec_train.iloc[train_index], X_plec_train.iloc[test_index]
        y_train, y_test = y_plec_train.iloc[train_index], y_plec_train.iloc[test_index]
        
        # Train the model
        model.fit(X_train, y_train)
        
        # Predict on the validation set
        y_pred = model.predict(X_test)
        
        # Calculate regression metric (MAE)
        mae = mean_absolute_error(y_test, y_pred)

        # Convert continuous true values to binary
        #y_pred_binary = convert_to_binary(y_pred)
        y_test_binary = convert_to_binary_test(y_test)        
        
        # Find optimal threshold
        optimal_threshold = find_optimal_threshold(y_test_binary, y_pred)
        thresholds.append(optimal_threshold)
        
        # Convert continuous predictions and true values to binary using optimal threshold
        y_pred_binary = convert_to_binary(y_pred, optimal_threshold)
        #y_test_binary = convert_to_binary(y_test, optimal_threshold)
        
        # Calculate classification metrics
        roc_auc = roc_auc_score(y_test_binary, y_pred)
        avg_precision = average_precision_score(y_test_binary, y_pred)
        precision = precision_score(y_test_binary, y_pred_binary)
        recall = recall_score(y_test_binary, y_pred_binary)
        mcc = matthews_corrcoef(y_test_binary, y_pred_binary)
        f1 = f1_score(y_test_binary, y_pred_binary)

        # Calculate PR-AUC
        precision_curve, recall_curve, _ = precision_recall_curve(y_test_binary, y_pred)
        pr_auc = auc(recall_curve, precision_curve)
        
        # Append scores
        mae_scores.append(mae)
        roc_auc_scores.append(roc_auc)
        pr_auc_scores.append(pr_auc)
        precision_scores.append(precision)
        recall_scores.append(recall)
        mcc_scores.append(mcc)
        avg_precision_scores.append(avg_precision)
        f1_scores.append(f1)
        
        print(f"Fold {fold} - Threshold: {optimal_threshold:.4f}, F1: {f1:.4f}, AVG-Pre: {avg_precision:.4f}, ROC-AUC: {roc_auc:.4f}, PR-AUC: {pr_auc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, MAE: {mae:.4f}, MCC: {mcc:.4f}")
        fold += 1
    
    # Calculate mean and standard deviation of scores
    mean_threshold, std_threshold = np.mean(thresholds), np.std(thresholds)
    mean_f1, std_f1 = np.mean(f1_scores), np.std(f1_scores)
    mean_avg_pre, std_avg_pre = np.mean(avg_precision_scores), np.std(avg_precision_scores)
    mean_roc_auc, std_roc_auc = np.mean(roc_auc_scores), np.std(roc_auc_scores)
    mean_pr_auc, std_pr_auc = np.mean(pr_auc_scores), np.std(pr_auc_scores)
    mean_precision, std_precision = np.mean(precision_scores), np.std(precision_scores)
    mean_recall, std_recall = np.mean(recall_scores), np.std(recall_scores)
    mean_mcc, std_mcc = np.mean(mcc_scores), np.std(mcc_scores)
    mean_mae, std_mae = np.mean(mae_scores), np.std(mae_scores)
    
    results[model_name] = (mean_threshold, std_threshold,
                           mean_f1, std_f1,
                           mean_avg_pre, std_avg_pre, 
                           mean_roc_auc, std_roc_auc,
                           mean_pr_auc, std_pr_auc,
                           mean_precision, std_precision, 
                           mean_recall, std_recall,
                           mean_mcc, std_mcc,
                           mean_mae, std_mae)
    
    print(f"\n{model_name} Results:")
    print(f"Mean Threshold: {mean_threshold:.4f} (±{std_threshold:.4f})")
    print(f"Mean F1: {mean_f1:.4f} (±{std_f1:.4f})")
    print(f"Mean AVG_Pre: {mean_avg_pre:.4f} (±{std_avg_pre:.4f})")
    print(f"Mean ROC-AUC: {mean_roc_auc:.4f} (±{std_roc_auc:.4f})")
    print(f"Mean PR-AUC: {mean_pr_auc:.4f} (±{std_pr_auc:.4f})")
    print(f"Mean Precision: {mean_precision:.4f} (±{std_precision:.4f})")
    print(f"Mean Recall: {mean_recall:.4f} (±{std_recall:.4f})")
    print(f"Mean MCC: {mean_mcc:.4f} (±{std_mcc:.4f})")
    print(f"Mean MAE: {mean_mae:.4f} (±{std_mae:.4f})\n")

# Print overall results
print("Overall Results:")
for model_name, scores in results.items():
    print(f"{model_name}:")
    print(f"  Mean Threshold: {scores[0]:.4f} (±{scores[1]:.4f})")
    print(f"  Mean F1: {scores[2]:.4f} (±{scores[3]:.4f})")
    print(f"  Mean AVG_Pre: {scores[4]:.4f} (±{scores[5]:.4f})")
    print(f"  Mean ROC-AUC: {scores[6]:.4f} (±{scores[7]:.4f})")
    print(f"  Mean PR-AUC: {scores[8]:.4f} (±{scores[9]:.4f})")
    print(f"  Mean Precision: {scores[10]:.4f} (±{scores[11]:.4f})")
    print(f"  Mean Recall: {scores[12]:.4f} (±{scores[13]:.4f})")
    print(f"  Mean MCC: {scores[14]:.4f} (±{scores[15]:.4f})")
    print(f"  Mean MAE: {scores[16]:.4f} (±{scores[17]:.4f})")
    print()

# Collect mean scores for each model
mean_scores = {metric: [] for metric in ['Threshold', 'F1', 'AVG-Prec', 'ROC-AUC', 'PR-AUC', 'Precision', 'Recall', 'MCC', 'MAE']}

for model_name, scores in results.items():
    mean_scores['Threshold'].append(round(scores[0], 3))
    mean_scores['F1'].append(round(scores[2], 3))
    mean_scores['AVG-Prec'].append(round(scores[4], 3))
    mean_scores['ROC-AUC'].append(round(scores[6], 3))
    mean_scores['PR-AUC'].append(round(scores[8], 3))
    mean_scores['Precision'].append(round(scores[10], 3))
    mean_scores['Recall'].append(round(scores[12], 3))
    mean_scores['MCC'].append(round(scores[14], 3))
    mean_scores['MAE'].append(round(scores[16], 3))

print("Mean Scores for Each Model (rounded to three decimal places):")
for metric, scores in mean_scores.items():
    print(f"{metric}: {scores}")

Evaluating Random Forest...
Fold 1 - Threshold: 2.1171, F1: 0.8057, AVG-Pre: 0.8430, ROC-AUC: 0.9772, PR-AUC: 0.8426, Precision: 0.8586, Recall: 0.7589, MAE: 0.1061, MCC: 0.8027
Fold 2 - Threshold: 2.1004, F1: 0.7845, AVG-Pre: 0.8725, ROC-AUC: 0.9910, PR-AUC: 0.8722, Precision: 0.7712, Recall: 0.7982, MAE: 0.1055, MCC: 0.7790
Fold 3 - Threshold: 2.1575, F1: 0.8087, AVG-Pre: 0.8854, ROC-AUC: 0.9923, PR-AUC: 0.8851, Precision: 0.9610, Recall: 0.6981, MAE: 0.1083, MCC: 0.8156
Fold 4 - Threshold: 2.1219, F1: 0.8148, AVG-Pre: 0.8613, ROC-AUC: 0.9794, PR-AUC: 0.8609, Precision: 0.8750, Recall: 0.7624, MAE: 0.1074, MCC: 0.8129
Fold 5 - Threshold: 2.1314, F1: 0.8660, AVG-Pre: 0.9043, ROC-AUC: 0.9797, PR-AUC: 0.9042, Precision: 0.9545, Recall: 0.7925, MAE: 0.1042, MCC: 0.8670

Random Forest Results:
Mean Threshold: 2.1257 (±0.0188)
Mean F1: 0.8159 (±0.0270)
Mean AVG_Pre: 0.8733 (±0.0208)
Mean ROC-AUC: 0.9839 (±0.0064)
Mean PR-AUC: 0.8730 (±0.0209)
Mean Precision: 0.8841 (±0.0698)
Mean Recall: 0

In [48]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, average_precision_score, precision_score, recall_score, matthews_corrcoef, mean_absolute_error, precision_recall_curve, f1_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor

def find_optimal_threshold(y_true, y_pred):
    precisions, recalls, thresholds = precision_recall_curve(y_true, y_pred)
    f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
    optimal_threshold = thresholds[np.argmax(f1_scores)]
    return optimal_threshold

def convert_to_binary_test(y, threshold=2):
    return (y > threshold).astype(int)
    
def convert_to_binary(y, threshold):
    return (y > threshold).astype(int)

# Initialize the KFold object
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Define the models in a dictionary
models = {
    'Random Forest': RandomForestRegressor(max_depth=3, max_features='log2', min_samples_leaf=1, min_samples_split=8, n_estimators=270, n_jobs=40),
    'XGBoost': XGBRegressor(learning_rate=0.01, max_depth=7, colsample_bytree=0.73, gamma=1.96, min_child_weight=8.0, subsample=0.71, n_estimators=150),
    'ANN': MLPRegressor(hidden_layer_sizes=50, activation='tanh', alpha=0.0070, learning_rate='invscaling', solver='sgd')
}

# Perform 5-fold cross-validation for each model
results = {}

for model_name, model in models.items():
    print(f"Evaluating {model_name}...")
    mae_scores, roc_auc_scores, pr_auc_scores, precision_scores, recall_scores, mcc_scores, avg_precision_scores, f1_scores, thresholds = [], [], [], [], [], [], [], [], []
    
    fold = 1
    for train_index, test_index in kf.split(X_grid_train):
        # Split the data into training and validation sets
        X_train, X_test = X_grid_train.iloc[train_index], X_grid_train.iloc[test_index]
        y_train, y_test = y_grid_train.iloc[train_index], y_grid_train.iloc[test_index]
        
        # Train the model
        model.fit(X_train, y_train)
        
        # Predict on the validation set
        y_pred = model.predict(X_test)
        
        # Calculate regression metric (MAE)
        mae = mean_absolute_error(y_test, y_pred)

        # Convert continuous true values to binary
        #y_pred_binary = convert_to_binary(y_pred)
        y_test_binary = convert_to_binary_test(y_test)        
        
        # Find optimal threshold
        optimal_threshold = find_optimal_threshold(y_test_binary, y_pred)
        thresholds.append(optimal_threshold)
        
        # Convert continuous predictions and true values to binary using optimal threshold
        y_pred_binary = convert_to_binary(y_pred, optimal_threshold)
        #y_test_binary = convert_to_binary(y_test, optimal_threshold)
        
        # Calculate classification metrics
        roc_auc = roc_auc_score(y_test_binary, y_pred)
        avg_precision = average_precision_score(y_test_binary, y_pred)
        precision = precision_score(y_test_binary, y_pred_binary)
        recall = recall_score(y_test_binary, y_pred_binary)
        mcc = matthews_corrcoef(y_test_binary, y_pred_binary)
        f1 = f1_score(y_test_binary, y_pred_binary)

        # Calculate PR-AUC
        precision_curve, recall_curve, _ = precision_recall_curve(y_test_binary, y_pred)
        pr_auc = auc(recall_curve, precision_curve)
        
        # Append scores
        mae_scores.append(mae)
        roc_auc_scores.append(roc_auc)
        pr_auc_scores.append(pr_auc)
        precision_scores.append(precision)
        recall_scores.append(recall)
        mcc_scores.append(mcc)
        avg_precision_scores.append(avg_precision)
        f1_scores.append(f1)
        
        print(f"Fold {fold} - Threshold: {optimal_threshold:.4f}, F1: {f1:.4f}, AVG-Pre: {avg_precision:.4f}, ROC-AUC: {roc_auc:.4f}, PR-AUC: {pr_auc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, MAE: {mae:.4f}, MCC: {mcc:.4f}")
        fold += 1
    
    # Calculate mean and standard deviation of scores
    mean_threshold, std_threshold = np.mean(thresholds), np.std(thresholds)
    mean_f1, std_f1 = np.mean(f1_scores), np.std(f1_scores)
    mean_avg_pre, std_avg_pre = np.mean(avg_precision_scores), np.std(avg_precision_scores)
    mean_roc_auc, std_roc_auc = np.mean(roc_auc_scores), np.std(roc_auc_scores)
    mean_pr_auc, std_pr_auc = np.mean(pr_auc_scores), np.std(pr_auc_scores)
    mean_precision, std_precision = np.mean(precision_scores), np.std(precision_scores)
    mean_recall, std_recall = np.mean(recall_scores), np.std(recall_scores)
    mean_mcc, std_mcc = np.mean(mcc_scores), np.std(mcc_scores)
    mean_mae, std_mae = np.mean(mae_scores), np.std(mae_scores)
    
    results[model_name] = (mean_threshold, std_threshold,
                           mean_f1, std_f1,
                           mean_avg_pre, std_avg_pre, 
                           mean_roc_auc, std_roc_auc,
                           mean_pr_auc, std_pr_auc,
                           mean_precision, std_precision, 
                           mean_recall, std_recall,
                           mean_mcc, std_mcc,
                           mean_mae, std_mae)
    
    print(f"\n{model_name} Results:")
    print(f"Mean Threshold: {mean_threshold:.4f} (±{std_threshold:.4f})")
    print(f"Mean F1: {mean_f1:.4f} (±{std_f1:.4f})")
    print(f"Mean AVG_Pre: {mean_avg_pre:.4f} (±{std_avg_pre:.4f})")
    print(f"Mean ROC-AUC: {mean_roc_auc:.4f} (±{std_roc_auc:.4f})")
    print(f"Mean PR-AUC: {mean_pr_auc:.4f} (±{std_pr_auc:.4f})")
    print(f"Mean Precision: {mean_precision:.4f} (±{std_precision:.4f})")
    print(f"Mean Recall: {mean_recall:.4f} (±{std_recall:.4f})")
    print(f"Mean MCC: {mean_mcc:.4f} (±{std_mcc:.4f})")
    print(f"Mean MAE: {mean_mae:.4f} (±{std_mae:.4f})\n")

# Print overall results
print("Overall Results:")
for model_name, scores in results.items():
    print(f"{model_name}:")
    print(f"  Mean Threshold: {scores[0]:.4f} (±{scores[1]:.4f})")
    print(f"  Mean F1: {scores[2]:.4f} (±{scores[3]:.4f})")
    print(f"  Mean AVG_Pre: {scores[4]:.4f} (±{scores[5]:.4f})")
    print(f"  Mean ROC-AUC: {scores[6]:.4f} (±{scores[7]:.4f})")
    print(f"  Mean PR-AUC: {scores[8]:.4f} (±{scores[9]:.4f})")
    print(f"  Mean Precision: {scores[10]:.4f} (±{scores[11]:.4f})")
    print(f"  Mean Recall: {scores[12]:.4f} (±{scores[13]:.4f})")
    print(f"  Mean MCC: {scores[14]:.4f} (±{scores[15]:.4f})")
    print(f"  Mean MAE: {scores[16]:.4f} (±{scores[17]:.4f})")
    print()

# Collect mean scores for each model
mean_scores = {metric: [] for metric in ['Threshold', 'F1', 'AVG-Prec', 'ROC-AUC', 'PR-AUC', 'Precision', 'Recall', 'MCC', 'MAE']}

for model_name, scores in results.items():
    mean_scores['Threshold'].append(round(scores[0], 3))
    mean_scores['F1'].append(round(scores[2], 3))
    mean_scores['AVG-Prec'].append(round(scores[4], 3))
    mean_scores['ROC-AUC'].append(round(scores[6], 3))
    mean_scores['PR-AUC'].append(round(scores[8], 3))
    mean_scores['Precision'].append(round(scores[10], 3))
    mean_scores['Recall'].append(round(scores[12], 3))
    mean_scores['MCC'].append(round(scores[14], 3))
    mean_scores['MAE'].append(round(scores[16], 3))

print("Mean Scores for Each Model (rounded to three decimal places):")
for metric, scores in mean_scores.items():
    print(f"{metric}: {scores}")

Evaluating Random Forest...
Fold 1 - Threshold: 2.1643, F1: 0.7293, AVG-Pre: 0.7321, ROC-AUC: 0.9288, PR-AUC: 0.7317, Precision: 0.9565, Recall: 0.5893, MAE: 0.1194, MCC: 0.7463
Fold 2 - Threshold: 2.1454, F1: 0.7292, AVG-Pre: 0.7666, ROC-AUC: 0.9615, PR-AUC: 0.7662, Precision: 0.8974, Recall: 0.6140, MAE: 0.1223, MCC: 0.7372
Fold 3 - Threshold: 2.1568, F1: 0.7735, AVG-Pre: 0.7833, ROC-AUC: 0.9658, PR-AUC: 0.7830, Precision: 0.9333, Recall: 0.6604, MAE: 0.1225, MCC: 0.7810
Fold 4 - Threshold: 2.1528, F1: 0.6864, AVG-Pre: 0.7403, ROC-AUC: 0.9516, PR-AUC: 0.7398, Precision: 0.8529, Recall: 0.5743, MAE: 0.1173, MCC: 0.6945
Fold 5 - Threshold: 2.1381, F1: 0.7919, AVG-Pre: 0.8308, ROC-AUC: 0.9717, PR-AUC: 0.8305, Precision: 0.8571, Recall: 0.7358, MAE: 0.1180, MCC: 0.7897

Random Forest Results:
Mean Threshold: 2.1515 (±0.0091)
Mean F1: 0.7420 (±0.0371)
Mean AVG_Pre: 0.7706 (±0.0352)
Mean ROC-AUC: 0.9559 (±0.0150)
Mean PR-AUC: 0.7703 (±0.0353)
Mean Precision: 0.8995 (±0.0409)
Mean Recall: 0

# **Cross validation with Random decoys in the training data**

In [None]:
plec_train = pd.concat([plec_train_true_actives,plec_train_deepcoys_decoys])
grid_train = pd.concat([grid_train_true_actives,grid_train_deepcoys_decoys])

In [95]:
# train
X_plec_train, y_plec_train = plec_train.drop(['class', 'potency','index'], axis= 1), plec_train['potency']
X_grid_train, y_grid_train = grid_train.drop(['class', 'potency','index'], axis= 1), grid_train['potency']

In [96]:
plec_train_reset = plec_train.reset_index(drop=True)
grid_train_reset = grid_train.reset_index(drop=True)

In [106]:
X_plec_train, y_plec_train = plec_train_reset.drop(['class', 'potency','index'], axis= 1), plec_train_reset['potency']
X_grid_train, y_grid_train = grid_train_reset.drop(['class', 'potency','index'], axis= 1), grid_train_reset['potency']

In [59]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, average_precision_score, precision_score, recall_score, matthews_corrcoef, mean_absolute_error, precision_recall_curve, f1_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor

def find_optimal_threshold(y_true, y_pred):
    precisions, recalls, thresholds = precision_recall_curve(y_true, y_pred)
    f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
    optimal_threshold = thresholds[np.argmax(f1_scores)]
    return optimal_threshold

def convert_to_binary_test(y, threshold=2):
    return (y > threshold).astype(int)
    
def convert_to_binary(y, threshold):
    return (y > threshold).astype(int)

# Initialize the KFold object
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Define the models in a dictionary
models = {
    'Random Forest': RandomForestRegressor(max_depth=3, max_features='log2', min_samples_leaf=1, min_samples_split=8, n_estimators=270, n_jobs=40),
    'XGBoost': XGBRegressor(learning_rate=0.01, max_depth=7, colsample_bytree=0.73, gamma=1.96, min_child_weight=8.0, subsample=0.71, n_estimators=150),
    'ANN': MLPRegressor(hidden_layer_sizes=50, activation='tanh', alpha=0.0070, learning_rate='invscaling', solver='sgd')
}

# Perform 5-fold cross-validation for each model
results = {}

for model_name, model in models.items():
    print(f"Evaluating {model_name}...")
    mae_scores, roc_auc_scores, pr_auc_scores, precision_scores, recall_scores, mcc_scores, avg_precision_scores, f1_scores, thresholds = [], [], [], [], [], [], [], [], []
    
    fold = 1
    for train_index, test_index in kf.split(X_plec_train):
        # Split the data into training and validation sets
        X_train, X_test = X_plec_train.iloc[train_index], X_plec_train.iloc[test_index]
        y_train, y_test = y_plec_train.iloc[train_index], y_plec_train.iloc[test_index]
        
        # Train the model
        model.fit(X_train, y_train)
        
        # Predict on the validation set
        y_pred = model.predict(X_test)
        
        # Calculate regression metric (MAE)
        mae = mean_absolute_error(y_test, y_pred)

        # Convert continuous true values to binary
        #y_pred_binary = convert_to_binary(y_pred)
        y_test_binary = convert_to_binary_test(y_test)        
        
        # Find optimal threshold
        optimal_threshold = find_optimal_threshold(y_test_binary, y_pred)
        thresholds.append(optimal_threshold)
        
        # Convert continuous predictions and true values to binary using optimal threshold
        y_pred_binary = convert_to_binary(y_pred, optimal_threshold)
        #y_test_binary = convert_to_binary(y_test, optimal_threshold)
        
        # Calculate classification metrics
        roc_auc = roc_auc_score(y_test_binary, y_pred)
        avg_precision = average_precision_score(y_test_binary, y_pred)
        precision = precision_score(y_test_binary, y_pred_binary)
        recall = recall_score(y_test_binary, y_pred_binary)
        mcc = matthews_corrcoef(y_test_binary, y_pred_binary)
        f1 = f1_score(y_test_binary, y_pred_binary)

        # Calculate PR-AUC
        precision_curve, recall_curve, _ = precision_recall_curve(y_test_binary, y_pred)
        pr_auc = auc(recall_curve, precision_curve)
        
        # Append scores
        mae_scores.append(mae)
        roc_auc_scores.append(roc_auc)
        pr_auc_scores.append(pr_auc)
        precision_scores.append(precision)
        recall_scores.append(recall)
        mcc_scores.append(mcc)
        avg_precision_scores.append(avg_precision)
        f1_scores.append(f1)
        
        print(f"Fold {fold} - Threshold: {optimal_threshold:.4f}, F1: {f1:.4f}, AVG-Pre: {avg_precision:.4f}, ROC-AUC: {roc_auc:.4f}, PR-AUC: {pr_auc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, MAE: {mae:.4f}, MCC: {mcc:.4f}")
        fold += 1
    
    # Calculate mean and standard deviation of scores
    mean_threshold, std_threshold = np.mean(thresholds), np.std(thresholds)
    mean_f1, std_f1 = np.mean(f1_scores), np.std(f1_scores)
    mean_avg_pre, std_avg_pre = np.mean(avg_precision_scores), np.std(avg_precision_scores)
    mean_roc_auc, std_roc_auc = np.mean(roc_auc_scores), np.std(roc_auc_scores)
    mean_pr_auc, std_pr_auc = np.mean(pr_auc_scores), np.std(pr_auc_scores)
    mean_precision, std_precision = np.mean(precision_scores), np.std(precision_scores)
    mean_recall, std_recall = np.mean(recall_scores), np.std(recall_scores)
    mean_mcc, std_mcc = np.mean(mcc_scores), np.std(mcc_scores)
    mean_mae, std_mae = np.mean(mae_scores), np.std(mae_scores)
    
    results[model_name] = (mean_threshold, std_threshold,
                           mean_f1, std_f1,
                           mean_avg_pre, std_avg_pre, 
                           mean_roc_auc, std_roc_auc,
                           mean_pr_auc, std_pr_auc,
                           mean_precision, std_precision, 
                           mean_recall, std_recall,
                           mean_mcc, std_mcc,
                           mean_mae, std_mae)
    
    print(f"\n{model_name} Results:")
    print(f"Mean Threshold: {mean_threshold:.4f} (±{std_threshold:.4f})")
    print(f"Mean F1: {mean_f1:.4f} (±{std_f1:.4f})")
    print(f"Mean AVG_Pre: {mean_avg_pre:.4f} (±{std_avg_pre:.4f})")
    print(f"Mean ROC-AUC: {mean_roc_auc:.4f} (±{std_roc_auc:.4f})")
    print(f"Mean PR-AUC: {mean_pr_auc:.4f} (±{std_pr_auc:.4f})")
    print(f"Mean Precision: {mean_precision:.4f} (±{std_precision:.4f})")
    print(f"Mean Recall: {mean_recall:.4f} (±{std_recall:.4f})")
    print(f"Mean MCC: {mean_mcc:.4f} (±{std_mcc:.4f})")
    print(f"Mean MAE: {mean_mae:.4f} (±{std_mae:.4f})\n")

# Print overall results
print("Overall Results:")
for model_name, scores in results.items():
    print(f"{model_name}:")
    print(f"  Mean Threshold: {scores[0]:.4f} (±{scores[1]:.4f})")
    print(f"  Mean F1: {scores[2]:.4f} (±{scores[3]:.4f})")
    print(f"  Mean AVG_Pre: {scores[4]:.4f} (±{scores[5]:.4f})")
    print(f"  Mean ROC-AUC: {scores[6]:.4f} (±{scores[7]:.4f})")
    print(f"  Mean PR-AUC: {scores[8]:.4f} (±{scores[9]:.4f})")
    print(f"  Mean Precision: {scores[10]:.4f} (±{scores[11]:.4f})")
    print(f"  Mean Recall: {scores[12]:.4f} (±{scores[13]:.4f})")
    print(f"  Mean MCC: {scores[14]:.4f} (±{scores[15]:.4f})")
    print(f"  Mean MAE: {scores[16]:.4f} (±{scores[17]:.4f})")
    print()

# Collect mean scores for each model
mean_scores = {metric: [] for metric in ['Threshold', 'F1', 'AVG-Prec', 'ROC-AUC', 'PR-AUC', 'Precision', 'Recall', 'MCC', 'MAE']}

for model_name, scores in results.items():
    mean_scores['Threshold'].append(round(scores[0], 3))
    mean_scores['F1'].append(round(scores[2], 3))
    mean_scores['AVG-Prec'].append(round(scores[4], 3))
    mean_scores['ROC-AUC'].append(round(scores[6], 3))
    mean_scores['PR-AUC'].append(round(scores[8], 3))
    mean_scores['Precision'].append(round(scores[10], 3))
    mean_scores['Recall'].append(round(scores[12], 3))
    mean_scores['MCC'].append(round(scores[14], 3))
    mean_scores['MAE'].append(round(scores[16], 3))

print("Mean Scores for Each Model (rounded to three decimal places):")
for metric, scores in mean_scores.items():
    print(f"{metric}: {scores}")

Evaluating Random Forest...
Fold 1 - Threshold: 2.1701, F1: 0.8700, AVG-Pre: 0.9276, ROC-AUC: 0.9969, PR-AUC: 0.9273, Precision: 0.8739, Recall: 0.8661, MAE: 0.0999, MCC: 0.8667
Fold 2 - Threshold: 2.1399, F1: 0.8306, AVG-Pre: 0.8963, ROC-AUC: 0.9969, PR-AUC: 0.8959, Precision: 0.7687, Recall: 0.9035, MAE: 0.1003, MCC: 0.8288
Fold 3 - Threshold: 2.2105, F1: 0.9005, AVG-Pre: 0.9578, ROC-AUC: 0.9987, PR-AUC: 0.9576, Precision: 0.9048, Recall: 0.8962, MAE: 0.1010, MCC: 0.8981
Fold 4 - Threshold: 2.2156, F1: 0.8235, AVG-Pre: 0.9043, ROC-AUC: 0.9957, PR-AUC: 0.9039, Precision: 0.8953, Recall: 0.7624, MAE: 0.0995, MCC: 0.8226
Fold 5 - Threshold: 2.1341, F1: 0.8571, AVG-Pre: 0.9373, ROC-AUC: 0.9976, PR-AUC: 0.9371, Precision: 0.7920, Recall: 0.9340, MAE: 0.0961, MCC: 0.8565

Random Forest Results:
Mean Threshold: 2.1740 (±0.0341)
Mean F1: 0.8563 (±0.0278)
Mean AVG_Pre: 0.9247 (±0.0223)
Mean ROC-AUC: 0.9972 (±0.0010)
Mean PR-AUC: 0.9243 (±0.0224)
Mean Precision: 0.8469 (±0.0558)
Mean Recall: 0

In [60]:
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import roc_auc_score, average_precision_score, precision_score, recall_score, matthews_corrcoef, mean_absolute_error, precision_recall_curve, f1_score
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.neural_network import MLPRegressor

def find_optimal_threshold(y_true, y_pred):
    precisions, recalls, thresholds = precision_recall_curve(y_true, y_pred)
    f1_scores = 2 * (precisions * recalls) / (precisions + recalls)
    optimal_threshold = thresholds[np.argmax(f1_scores)]
    return optimal_threshold

def convert_to_binary_test(y, threshold=2):
    return (y > threshold).astype(int)
    
def convert_to_binary(y, threshold):
    return (y > threshold).astype(int)

# Initialize the KFold object
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# Define the models in a dictionary
models = {
    'Random Forest': RandomForestRegressor(max_depth=3, max_features='log2', min_samples_leaf=1, min_samples_split=8, n_estimators=270, n_jobs=40),
    'XGBoost': XGBRegressor(learning_rate=0.01, max_depth=7, colsample_bytree=0.73, gamma=1.96, min_child_weight=8.0, subsample=0.71, n_estimators=150),
    'ANN': MLPRegressor(hidden_layer_sizes=50, activation='tanh', alpha=0.0070, learning_rate='invscaling', solver='sgd')
}

# Perform 5-fold cross-validation for each model
results = {}

for model_name, model in models.items():
    print(f"Evaluating {model_name}...")
    mae_scores, roc_auc_scores, pr_auc_scores, precision_scores, recall_scores, mcc_scores, avg_precision_scores, f1_scores, thresholds = [], [], [], [], [], [], [], [], []
    
    fold = 1
    for train_index, test_index in kf.split(X_grid_train):
        # Split the data into training and validation sets
        X_train, X_test = X_grid_train.iloc[train_index], X_grid_train.iloc[test_index]
        y_train, y_test = y_grid_train.iloc[train_index], y_grid_train.iloc[test_index]
        
        # Train the model
        model.fit(X_train, y_train)
        
        # Predict on the validation set
        y_pred = model.predict(X_test)
        
        # Calculate regression metric (MAE)
        mae = mean_absolute_error(y_test, y_pred)

        # Convert continuous true values to binary
        #y_pred_binary = convert_to_binary(y_pred)
        y_test_binary = convert_to_binary_test(y_test)        
        
        # Find optimal threshold
        optimal_threshold = find_optimal_threshold(y_test_binary, y_pred)
        thresholds.append(optimal_threshold)
        
        # Convert continuous predictions and true values to binary using optimal threshold
        y_pred_binary = convert_to_binary(y_pred, optimal_threshold)
        #y_test_binary = convert_to_binary(y_test, optimal_threshold)
        
        # Calculate classification metrics
        roc_auc = roc_auc_score(y_test_binary, y_pred)
        avg_precision = average_precision_score(y_test_binary, y_pred)
        precision = precision_score(y_test_binary, y_pred_binary)
        recall = recall_score(y_test_binary, y_pred_binary)
        mcc = matthews_corrcoef(y_test_binary, y_pred_binary)
        f1 = f1_score(y_test_binary, y_pred_binary)

        # Calculate PR-AUC
        precision_curve, recall_curve, _ = precision_recall_curve(y_test_binary, y_pred)
        pr_auc = auc(recall_curve, precision_curve)
        
        # Append scores
        mae_scores.append(mae)
        roc_auc_scores.append(roc_auc)
        pr_auc_scores.append(pr_auc)
        precision_scores.append(precision)
        recall_scores.append(recall)
        mcc_scores.append(mcc)
        avg_precision_scores.append(avg_precision)
        f1_scores.append(f1)
        
        print(f"Fold {fold} - Threshold: {optimal_threshold:.4f}, F1: {f1:.4f}, AVG-Pre: {avg_precision:.4f}, ROC-AUC: {roc_auc:.4f}, PR-AUC: {pr_auc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, MAE: {mae:.4f}, MCC: {mcc:.4f}")
        fold += 1
    
    # Calculate mean and standard deviation of scores
    mean_threshold, std_threshold = np.mean(thresholds), np.std(thresholds)
    mean_f1, std_f1 = np.mean(f1_scores), np.std(f1_scores)
    mean_avg_pre, std_avg_pre = np.mean(avg_precision_scores), np.std(avg_precision_scores)
    mean_roc_auc, std_roc_auc = np.mean(roc_auc_scores), np.std(roc_auc_scores)
    mean_pr_auc, std_pr_auc = np.mean(pr_auc_scores), np.std(pr_auc_scores)
    mean_precision, std_precision = np.mean(precision_scores), np.std(precision_scores)
    mean_recall, std_recall = np.mean(recall_scores), np.std(recall_scores)
    mean_mcc, std_mcc = np.mean(mcc_scores), np.std(mcc_scores)
    mean_mae, std_mae = np.mean(mae_scores), np.std(mae_scores)
    
    results[model_name] = (mean_threshold, std_threshold,
                           mean_f1, std_f1,
                           mean_avg_pre, std_avg_pre, 
                           mean_roc_auc, std_roc_auc,
                           mean_pr_auc, std_pr_auc,
                           mean_precision, std_precision, 
                           mean_recall, std_recall,
                           mean_mcc, std_mcc,
                           mean_mae, std_mae)
    
    print(f"\n{model_name} Results:")
    print(f"Mean Threshold: {mean_threshold:.4f} (±{std_threshold:.4f})")
    print(f"Mean F1: {mean_f1:.4f} (±{std_f1:.4f})")
    print(f"Mean AVG_Pre: {mean_avg_pre:.4f} (±{std_avg_pre:.4f})")
    print(f"Mean ROC-AUC: {mean_roc_auc:.4f} (±{std_roc_auc:.4f})")
    print(f"Mean PR-AUC: {mean_pr_auc:.4f} (±{std_pr_auc:.4f})")
    print(f"Mean Precision: {mean_precision:.4f} (±{std_precision:.4f})")
    print(f"Mean Recall: {mean_recall:.4f} (±{std_recall:.4f})")
    print(f"Mean MCC: {mean_mcc:.4f} (±{std_mcc:.4f})")
    print(f"Mean MAE: {mean_mae:.4f} (±{std_mae:.4f})\n")

# Print overall results
print("Overall Results:")
for model_name, scores in results.items():
    print(f"{model_name}:")
    print(f"  Mean Threshold: {scores[0]:.4f} (±{scores[1]:.4f})")
    print(f"  Mean F1: {scores[2]:.4f} (±{scores[3]:.4f})")
    print(f"  Mean AVG_Pre: {scores[4]:.4f} (±{scores[5]:.4f})")
    print(f"  Mean ROC-AUC: {scores[6]:.4f} (±{scores[7]:.4f})")
    print(f"  Mean PR-AUC: {scores[8]:.4f} (±{scores[9]:.4f})")
    print(f"  Mean Precision: {scores[10]:.4f} (±{scores[11]:.4f})")
    print(f"  Mean Recall: {scores[12]:.4f} (±{scores[13]:.4f})")
    print(f"  Mean MCC: {scores[14]:.4f} (±{scores[15]:.4f})")
    print(f"  Mean MAE: {scores[16]:.4f} (±{scores[17]:.4f})")
    print()

# Collect mean scores for each model
mean_scores = {metric: [] for metric in ['Threshold', 'F1', 'AVG-Prec', 'ROC-AUC', 'PR-AUC', 'Precision', 'Recall', 'MCC', 'MAE']}

for model_name, scores in results.items():
    mean_scores['Threshold'].append(round(scores[0], 3))
    mean_scores['F1'].append(round(scores[2], 3))
    mean_scores['AVG-Prec'].append(round(scores[4], 3))
    mean_scores['ROC-AUC'].append(round(scores[6], 3))
    mean_scores['PR-AUC'].append(round(scores[8], 3))
    mean_scores['Precision'].append(round(scores[10], 3))
    mean_scores['Recall'].append(round(scores[12], 3))
    mean_scores['MCC'].append(round(scores[14], 3))
    mean_scores['MAE'].append(round(scores[16], 3))

print("Mean Scores for Each Model (rounded to three decimal places):")
for metric, scores in mean_scores.items():
    print(f"{metric}: {scores}")

Evaluating Random Forest...
Fold 1 - Threshold: 2.2077, F1: 0.8889, AVG-Pre: 0.9539, ROC-AUC: 0.9983, PR-AUC: 0.9537, Precision: 0.9231, Recall: 0.8571, MAE: 0.1111, MCC: 0.8868
Fold 2 - Threshold: 2.1855, F1: 0.8776, AVG-Pre: 0.9337, ROC-AUC: 0.9965, PR-AUC: 0.9334, Precision: 0.8455, Recall: 0.9123, MAE: 0.1110, MCC: 0.8750
Fold 3 - Threshold: 2.1906, F1: 0.9124, AVG-Pre: 0.9683, ROC-AUC: 0.9990, PR-AUC: 0.9682, Precision: 0.8919, Recall: 0.9340, MAE: 0.1102, MCC: 0.9106
Fold 4 - Threshold: 2.1812, F1: 0.9128, AVG-Pre: 0.9608, ROC-AUC: 0.9983, PR-AUC: 0.9606, Precision: 0.9468, Recall: 0.8812, MAE: 0.1104, MCC: 0.9115
Fold 5 - Threshold: 2.2036, F1: 0.9038, AVG-Pre: 0.9507, ROC-AUC: 0.9924, PR-AUC: 0.9506, Precision: 0.9216, Recall: 0.8868, MAE: 0.1098, MCC: 0.9018

Random Forest Results:
Mean Threshold: 2.1937 (±0.0103)
Mean F1: 0.8991 (±0.0138)
Mean AVG_Pre: 0.9535 (±0.0116)
Mean ROC-AUC: 0.9969 (±0.0024)
Mean PR-AUC: 0.9533 (±0.0116)
Mean Precision: 0.9058 (±0.0348)
Mean Recall: 0