In [1]:
import pandas as pd
import numpy as np
import random
from rdkit import Chem
import torch
from torch.utils.data import TensorDataset, DataLoader, random_split
import matplotlib.pyplot as plt
from utils import load_dataset_df, smile_to_fp, data_splitter
from sklearn.metrics import confusion_matrix, roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
from sklearn.utils.class_weight import compute_class_weight
from sklearn import model_selection, svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier

No normalization for SPS. Feature removed!
No normalization for AvgIpc. Feature removed!
Skipped loading some Tensorflow models, missing a dependency. No module named 'tensorflow'
Skipped loading modules with pytorch-geometric dependency, missing a dependency. No module named 'torch_geometric'
Skipped loading modules with transformers dependency. No module named 'transformers'
cannot import name 'HuggingFaceModel' from 'deepchem.models.torch_models' (c:\Users\knsve\Desktop\MEI\Tese\torch\pt_venv2\lib\site-packages\deepchem\models\torch_models\__init__.py)
Skipped loading modules with pytorch-geometric dependency, missing a dependency. cannot import name 'DMPNN' from 'deepchem.models.torch_models' (c:\Users\knsve\Desktop\MEI\Tese\torch\pt_venv2\lib\site-packages\deepchem\models\torch_models\__init__.py)
Skipped loading modules with pytorch-lightning dependency, missing a dependency. No module named 'lightning'
Skipped loading some Jax models, missing a dependency. No module named 'jax'


#### Load DataFrame

In [2]:
files = ['tox21.csv','sider.csv', 'BBBP.csv']
dt_file = files[2]
dirname = dt_file.strip('.csv')

df, targets = load_dataset_df(filename=dt_file)
print(targets)

target_name = targets[0]
df = df[[target_name, 'smiles']].dropna()


['p_np']


In [3]:
print(target_name)
print(df[target_name].sum())
print(df[target_name].size)

p_np
1567
2050


#### SMILE to Fingerprint

In [4]:
fp_types = [['morgan', 1024], ['maccs', 167], ['RDKit', 1024], ['pubchem', 881]]
fp_type, num_bits = fp_types[1]
#num_bits = 2048
fp_config = {"fp_type": fp_type,
             "num_bits": num_bits}
#num_bits = 237
print(fp_type, '-', num_bits)

maccs - 167


In [6]:
dtype = torch.float32
split = "scaffold"
dataset = None
if dirname != 'BBBP':
    split = "random"
    fp_array, target_array = smile_to_fp(df, fp_config=fp_config, target_name=target_name)
    # Create Torch Dataset
    fp_tensor = torch.tensor(fp_array, dtype=dtype)
    target_tensor = torch.tensor(target_array, dtype=dtype).long()

    dataset = TensorDataset(fp_tensor, target_tensor)

In [7]:
#Metrics ---- roc  acc f1  prs sns sps
svm_metrics = [[], [], [], [], [], []]
rf_metrics  = [[], [], [], [], [], []]
knn_metrics = [[], [], [], [], [], []]
xgb_metrics = [[], [], [], [], [], []]
mlp_metrics = [[], [], [], [], [], []]
metrics = [svm_metrics, rf_metrics, xgb_metrics, knn_metrics, mlp_metrics]

grid_parameters = {
    "SVM": {
        "C": list(range(1, 100)),
        "kernel": ["linear", "rbf", "poly"],
        "gamma": ["scale", "auto"],
        "degree": [2, 3, 4],
    },
    "RF": {
        "max_depth": [5] + list(range(10, 100, 10)),
        "n_estimators": list(range(50, 400, 50)),
        "min_samples_split": [2, 5, 10],
        "min_samples_leaf": [1, 2, 4],
    },
    "XGB": {
        "learning_rate": [0.005, 0.01, 0.1, 0.2],
        "max_depth": range(2, 20, 2),
        "n_estimators": range(50, 400, 50),
    },
    "KNN": {
        "n_neighbors": list(range(1, 20)),
        "weights": ["uniform", "distance"],
        "metric": ["euclidean", "manhattan", "minkowski"],
    }
}

knn_best_params = []
svm_best_params = []
rf_best_params = []
xgb_best_params = []
 

In [8]:
def calculate_metrics(metrics_list, y_pred, y_true):
    accuracy = accuracy_score(y_true, y_pred)
    auc_roc = roc_auc_score(y_true, y_pred)
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    sensitivity = tp/(tp + fn)
    specificity = tn/(tn + fp)
    f1 = f1_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred)

    metrics_list[0].append(accuracy)
    metrics_list[1].append(auc_roc)
    metrics_list[2].append(sensitivity)
    metrics_list[3].append(specificity)
    metrics_list[4].append(f1)
    metrics_list[5].append(precision)

In [9]:
def train_test_model(model, train_X, train_Y, test_X, test_Y, metrics_list):
    model.fit(train_X,train_Y)

    predictions = model.predict(test_X)
    
    calculate_metrics(metrics_list=metrics_list, y_true=test_Y, y_pred=predictions)

In [10]:
def random_param_search(model, grid_param, train_X, train_Y):
    search = RandomizedSearchCV(model, grid_param, n_iter=40, scoring='roc_auc', random_state=42)
    search.fit(train_X, train_Y)
    return search.best_params_

In [11]:
#pos_weight = (sum(train_Y == 1) / sum(train_Y == 0))

In [12]:
iterations = 30
print("Iterations:")
for iter in range(iterations):
    print(str(iter) + "/30")
    seed = iter+1
    random.seed(seed)
    train, val, test = data_splitter(df, target_name, split=split, dataset=dataset, fp_config=fp_config, seed=iter+1, dtype=dtype)
    train_X, train_Y = train[:]
    val_X, val_Y = val[:]
    test_X, test_Y = test[:]
    
    class_weights = compute_class_weight(class_weight='balanced', classes=np.array([0, 1]), y=np.array(train_Y))
    #class_weights_dict = {0: class_weights[0], 1: class_weights[1]}
    class_weights_dict = "balanced"

    pos_weight = (sum(train_Y == 1).numpy() / sum(train_Y == 0).numpy())


    if iter == 0:
        print("KNN parameter search...")
        KNN = KNeighborsClassifier()
        knn_best_params = random_param_search(KNN, grid_parameters['KNN'], train_X, train_Y)
        print("SVM parameter search...")
        SVM = svm.SVC(class_weight=class_weights_dict, random_state=seed)
        svm_best_params = random_param_search(SVM, grid_parameters['SVM'], train_X, train_Y)
        print("Random Forest parameter search...")
        RF = RandomForestClassifier(class_weight=class_weights_dict, random_state=seed)
        rf_best_params = random_param_search(RF, grid_parameters['RF'], train_X, train_Y)
        print("XGBoost parameter search...")
        XGB = XGBClassifier(objective="binary:logistic", scale_pos_weight=pos_weight)
        xgb_best_params = random_param_search(XGB, grid_parameters['XGB'], train_X, train_Y)

        print(knn_best_params, svm_best_params, rf_best_params, xgb_best_params)

    SVM = svm.SVC(**svm_best_params, class_weight=class_weights_dict, random_state=seed)
    RF = RandomForestClassifier(**rf_best_params, class_weight=class_weights_dict, random_state=seed)
    XGB = XGBClassifier(**xgb_best_params, objective="binary:logistic", scale_pos_weight=pos_weight, random_state=seed)
    KNN = KNeighborsClassifier(**knn_best_params)
    MLP =  MLPClassifier(hidden_layer_sizes=(num_bits), activation='relu', solver='adam', max_iter=1000)

    models = [SVM, RF, XGB, KNN, MLP]
    for i, model in enumerate(models):
        train_test_model(model, train_X, train_Y, test_X, test_Y, metrics[i])
        

Iterations:
0/30


[01:44:28] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:44:28] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:44:29] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:44:29] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:44:29] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:44:29] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:44:29] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:44:29] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:44:29] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:44:29] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:44:29] Explicit valence for atom # 5 N, 4, is greater than permitted


KNN parameter search...
SVM parameter search...
Random Forest parameter search...
XGBoost parameter search...
{'weights': 'uniform', 'n_neighbors': 18, 'metric': 'manhattan'} {'kernel': 'poly', 'gamma': 'auto', 'degree': 4, 'C': 33} {'n_estimators': 200, 'min_samples_split': 10, 'min_samples_leaf': 2, 'max_depth': 90} {'n_estimators': 100, 'max_depth': 18, 'learning_rate': 0.1}
1/30


[01:51:24] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:51:24] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:51:25] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:51:25] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:51:25] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:51:25] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:25] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:25] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:25] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:25] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:25] Explicit valence for atom # 5 N, 4, is greater than permitted


2/30


[01:51:39] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:51:39] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:51:39] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:51:40] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:51:40] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:51:40] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:40] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:40] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:40] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:40] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:40] Explicit valence for atom # 5 N, 4, is greater than permitted


3/30


[01:51:52] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:51:52] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:51:53] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:51:53] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:51:53] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:51:53] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:53] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:53] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:53] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:53] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:51:53] Explicit valence for atom # 5 N, 4, is greater than permitted


4/30


[01:52:06] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:52:06] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:52:06] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:52:06] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:52:06] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:52:06] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:06] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:06] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:06] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:06] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:06] Explicit valence for atom # 5 N, 4, is greater than permitted


5/30


[01:52:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:52:20] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:52:20] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:52:20] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:52:20] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:52:20] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:20] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:20] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:20] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:20] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:20] Explicit valence for atom # 5 N, 4, is greater than permitted


6/30


[01:52:32] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:52:32] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:52:32] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:52:32] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:52:32] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:52:32] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:32] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:32] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:32] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:32] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:32] Explicit valence for atom # 5 N, 4, is greater than permitted


7/30


[01:52:45] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:52:45] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:52:46] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:52:46] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:52:46] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:52:46] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:46] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:46] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:46] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:46] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:46] Explicit valence for atom # 5 N, 4, is greater than permitted


8/30


[01:52:57] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:52:57] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:52:57] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:52:57] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:52:57] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:52:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:52:57] Explicit valence for atom # 5 N, 4, is greater than permitted


9/30


[01:53:08] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:53:08] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:53:08] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:53:08] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:53:08] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:53:08] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:08] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:08] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:08] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:08] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:08] Explicit valence for atom # 5 N, 4, is greater than permitted


10/30


[01:53:20] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:53:20] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:53:21] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:53:21] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:53:21] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:53:21] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:21] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:21] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:21] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:21] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:21] Explicit valence for atom # 5 N, 4, is greater than permitted


11/30


[01:53:32] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:53:32] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:53:32] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:53:32] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:53:32] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:53:32] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:32] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:32] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:32] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:32] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:32] Explicit valence for atom # 5 N, 4, is greater than permitted


12/30


[01:53:44] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:53:44] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:53:44] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:53:44] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:53:44] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:53:44] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:44] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:44] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:44] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:44] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:44] Explicit valence for atom # 5 N, 4, is greater than permitted


13/30


[01:53:57] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:53:57] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:53:57] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:53:57] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:53:57] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:53:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:57] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:53:57] Explicit valence for atom # 5 N, 4, is greater than permitted


14/30


[01:54:09] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:54:09] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:54:10] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:54:10] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:54:10] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:54:10] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:10] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:10] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:10] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:10] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:10] Explicit valence for atom # 5 N, 4, is greater than permitted


15/30


[01:54:22] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:54:22] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:54:22] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:54:22] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:54:22] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:54:22] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:22] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:22] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:22] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:22] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:22] Explicit valence for atom # 5 N, 4, is greater than permitted


16/30


[01:54:35] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:54:35] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:54:35] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:54:35] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:54:35] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:54:35] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:35] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:35] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:35] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:35] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:35] Explicit valence for atom # 5 N, 4, is greater than permitted


17/30


[01:54:49] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:54:49] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:54:49] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:54:49] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:54:49] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:54:49] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:49] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:49] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:49] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:49] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:54:49] Explicit valence for atom # 5 N, 4, is greater than permitted


18/30


[01:55:02] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:55:02] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:55:03] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:55:03] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:55:03] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:55:03] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:03] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:03] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:03] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:03] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:03] Explicit valence for atom # 5 N, 4, is greater than permitted


19/30


[01:55:16] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:55:16] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:55:16] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:55:16] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:55:16] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:55:16] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:16] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:16] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:16] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:16] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:16] Explicit valence for atom # 5 N, 4, is greater than permitted


20/30


[01:55:29] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:55:29] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:55:29] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:55:29] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:55:29] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:55:29] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:29] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:29] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:29] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:29] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:29] Explicit valence for atom # 5 N, 4, is greater than permitted


21/30


[01:55:43] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:55:43] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:55:43] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:55:43] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:55:43] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:55:43] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:43] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:43] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:43] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:43] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:43] Explicit valence for atom # 5 N, 4, is greater than permitted


22/30


[01:55:58] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:55:58] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:55:58] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:55:58] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:55:58] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:55:58] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:58] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:58] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:58] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:58] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:55:58] Explicit valence for atom # 5 N, 4, is greater than permitted


23/30


[01:56:10] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:56:10] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:56:11] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:56:11] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:56:11] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:56:11] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:11] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:11] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:11] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:11] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:11] Explicit valence for atom # 5 N, 4, is greater than permitted


24/30


[01:56:23] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:56:23] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:56:23] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:56:23] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:56:23] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:56:23] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:23] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:23] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:23] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:23] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:23] Explicit valence for atom # 5 N, 4, is greater than permitted


25/30


[01:56:36] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:56:36] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:56:36] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:56:37] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:56:37] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:56:37] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:37] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:37] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:37] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:37] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:37] Explicit valence for atom # 5 N, 4, is greater than permitted


26/30


[01:56:50] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:56:50] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:56:50] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:56:50] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:56:50] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:56:50] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:50] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:50] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:50] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:50] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:56:50] Explicit valence for atom # 5 N, 4, is greater than permitted


27/30


[01:57:02] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:57:02] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:57:03] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:57:03] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:57:03] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:57:03] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:03] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:03] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:03] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:03] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:03] Explicit valence for atom # 5 N, 4, is greater than permitted


28/30


[01:57:17] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:57:17] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:57:17] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:57:17] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:57:17] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:57:17] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:17] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:17] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:17] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:17] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:17] Explicit valence for atom # 5 N, 4, is greater than permitted


29/30


[01:57:29] Explicit valence for atom # 1 N, 4, is greater than permitted
[01:57:29] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:57:29] Explicit valence for atom # 6 N, 4, is greater than permitted
[01:57:30] Explicit valence for atom # 11 N, 4, is greater than permitted
[01:57:30] Explicit valence for atom # 12 N, 4, is greater than permitted
[01:57:30] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:30] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:30] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:30] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:30] Explicit valence for atom # 5 N, 4, is greater than permitted
[01:57:30] Explicit valence for atom # 5 N, 4, is greater than permitted


-------------------------------------------------------------------------------------------------------------------

iterations = 30
print("Iterations:")
for iter in range(iterations):
    print(str(iter) + "/30")
    seed = iter+1
    random.seed(seed)
    
    #train_X, test_X, train_Y, test_Y = model_selection.train_test_split(fp_array,target_array, test_size=0.3, shuffle=True, random_state=seed)
    
    train, val, test = data_splitter(df, target_name, split=split, dataset=dataset, fp_config=fp_config, seed=iter+1, dtype=dtype)
    train_X, train_Y = train[:]
    val_X, val_Y = val[:]
    test_X, test_Y = test[:]
    
    class_weights = compute_class_weight(class_weight='balanced', classes=np.array([0, 1]), y=np.array(train_Y))
    #class_weights_dict = {0: class_weights[0], 1: class_weights[1]}
    #class_weights_dict = "balanced"


    #################### SVM ####################

    SVM = svm.SVC(C=1.0, kernel='linear', degree=3, gamma='auto', class_weight=class_weights_dict, random_state=seed)

    SVM.fit(train_X,train_Y)

    predictions_SVM = SVM.predict(test_X)
    
    calculate_metrics(metrics_list=svm_metrics, y_true=test_Y, y_pred=predictions_SVM)

    #################### RF ####################

    RF = RandomForestClassifier(max_depth=10, n_estimators=100, class_weight=class_weights_dict, random_state=seed)

    RF.fit(train_X,train_Y)

    predictions_RF = RF.predict(test_X)

    calculate_metrics(metrics_list=rf_metrics, y_true=test_Y, y_pred=predictions_RF)

    #################### XGB ####################
    #pos_weight = sum(train_Y == 0) / sum(train_Y == 1)
    pos_weight=1
    XGB = XGBClassifier(objective="binary:logistic",learning_rate=0.1,max_depth=6,n_estimators=100,scale_pos_weight=pos_weight)

    XGB.fit(train_X,train_Y)

    predictions_XGB = XGB.predict(test_X)

    calculate_metrics(metrics_list=xgb_metrics, y_true=test_Y, y_pred=predictions_XGB)

    #################### KNN ####################

    # Randomized search for knn
    
    if iter == 0:
        KNN = KNeighborsClassifier()

        #Randomized Search
        random_search = RandomizedSearchCV(KNN, knn_param_dist, n_iter=20, cv=5, scoring='roc_auc', random_state=42)
        random_search.fit(train_X, train_Y)

        print("KNN Best Parameters:", random_search.best_params_)
        print("KNN Best Score:", random_search.best_score_)

        knn_best_params = random_search.best_params_

    else:
        KNN = KNeighborsClassifier(
            n_neighbors=knn_best_params['n_neighbors'],
            weights=knn_best_params['weights'],
            metric=knn_best_params['metric'])


    KNN.fit(train_X,train_Y)

    predictions_KNN = KNN.predict(test_X)

    calculate_metrics(metrics_list=knn_metrics, y_true=test_Y, y_pred=predictions_KNN)

    #################### MLP ####################

    #sample_weight = np.array([class_weights[cls] for cls in train_Y])
    #sample_weight = None
    MLP =  MLPClassifier(hidden_layer_sizes=(num_bits), activation='relu', solver='adam', max_iter=200)
    MLP.fit(train_X, train_Y)
    predictions_MLP = MLP.predict(test_X)

    calculate_metrics(metrics_list=mlp_metrics, y_true=test_Y, y_pred=predictions_MLP)


In [13]:
""" #OLD
metrics = [svm_metrics, rf_metrics, xgb_metrics, knn_metrics, mlp_metrics]
metrics_np = np.zeros((len(metrics), 12))

for i, clf in enumerate(metrics):
    metrics_np[i, 0::2] = np.round([np.mean(metric) for metric in clf], 3)
    metrics_np[i, 1::2] = np.round([np.std(metric) for metric in clf], 3)    


metric_names = ['AUC', 'Accuracy', 'F1 Score', 'Precision', 'Sensitivity', 'Specificity']

columns = []
clfs = ["SVM", "RF","XGB", "KNN", "MLP"]
for name in metric_names:
    columns.extend([f'Mean {name}', f'Std {name}'])

df_clfs = pd.DataFrame(clfs, columns=["Classifier"])
df_metrics = pd.DataFrame(metrics_np, columns=columns)
df = pd.concat([df_clfs, df_metrics], axis=1)

filename = f"ml_{dt_file.strip('.csv')}_{fp_type}_{target_name}.csv"
df.to_csv(filename, index=False)
print(filename)
 """

' #OLD\nmetrics = [svm_metrics, rf_metrics, xgb_metrics, knn_metrics, mlp_metrics]\nmetrics_np = np.zeros((len(metrics), 12))\n\nfor i, clf in enumerate(metrics):\n    metrics_np[i, 0::2] = np.round([np.mean(metric) for metric in clf], 3)\n    metrics_np[i, 1::2] = np.round([np.std(metric) for metric in clf], 3)    \n\n\nmetric_names = [\'AUC\', \'Accuracy\', \'F1 Score\', \'Precision\', \'Sensitivity\', \'Specificity\']\n\ncolumns = []\nclfs = ["SVM", "RF","XGB", "KNN", "MLP"]\nfor name in metric_names:\n    columns.extend([f\'Mean {name}\', f\'Std {name}\'])\n\ndf_clfs = pd.DataFrame(clfs, columns=["Classifier"])\ndf_metrics = pd.DataFrame(metrics_np, columns=columns)\ndf = pd.concat([df_clfs, df_metrics], axis=1)\n\nfilename = f"ml_{dt_file.strip(\'.csv\')}_{fp_type}_{target_name}.csv"\ndf.to_csv(filename, index=False)\nprint(filename)\n '

In [14]:
metric_names = ['Acc', 'AUC', 'Sn', 'Sp', 'F1', 'Precision']
metrics = [svm_metrics, rf_metrics, xgb_metrics, knn_metrics, mlp_metrics]
metrics_np = np.zeros((len(metrics), 12))

for i, clf in enumerate(metrics):
    metrics_np[i, 0::2] = np.round([np.mean(metric) for metric in clf], 3)
    metrics_np[i, 1::2] = np.round([np.std(metric) for metric in clf], 3)  

columns = []
for name in metric_names:
    columns.extend([f'Mean {name}', f'Std {name}'])

print(metrics_np)
clfs = ["SVM", "RF","XGB", "KNN", "MLP"]
df_clfs = pd.DataFrame(clfs, columns=["Classifier"])
df_metrics = pd.DataFrame(metrics_np, columns=columns)
df = pd.concat([df_clfs, df_metrics], axis=1)

if fp_type in ['maccs', 'pubchem']:
    filename = f"results\\{dirname}\\ml_{fp_type}_{target_name}.csv"

else:
    filename = f"results\\{dirname}\\ml_{fp_type}_{num_bits}_{target_name}.csv"

df.to_csv(filename, index=False)

print(filename)

[[0.619 0.    0.609 0.    0.832 0.    0.387 0.    0.694 0.    0.596 0.   ]
 [0.614 0.007 0.604 0.007 0.846 0.008 0.362 0.012 0.695 0.005 0.59  0.005]
 [0.588 0.    0.574 0.    0.911 0.    0.237 0.    0.697 0.    0.564 0.   ]
 [0.567 0.    0.553 0.    0.901 0.    0.204 0.    0.684 0.    0.552 0.   ]
 [0.598 0.011 0.586 0.011 0.89  0.015 0.281 0.025 0.698 0.007 0.574 0.007]]
results\BBBP\ml_maccs_p_np.csv


: 