In [2]:
import numpy as np
import pandas as pd

#Service modules
import timeit

#Import tools to work with data
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

#Importing metrics 
from sklearn.metrics import roc_auc_score

# Importing ML models 
from catboost import CatBoostClassifier
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier
from lightgbm import LGBMClassifier

In [3]:
df_train_FCh_descr = pd.read_csv("C:/Users/User/OneDrive/Документы/Python/Hackatone/database/train_rdkit_smote.csv")
# df_train_MoFP_descr = pd.read_csv()
df_train_FCh_MoFP_descr = pd.read_csv("C:/Users/User/OneDrive/Документы/Python/Hackatone/database/train_rdkit_morgan_smote.csv", index_col=0)

In [4]:
models = {"XGBoost":XGBClassifier(random_state=42), 
          "GradientBoosting": GradientBoostingClassifier(random_state=42), 
          "RandomForest": RandomForestClassifier(random_state=42), 
          "ExtraTrees": ExtraTreesClassifier(), 
          "LightGBM": LGBMClassifier(verbosity=-1, random_state=42), 
          "CatBoost": CatBoostClassifier(silent=True, random_state=42)
        }

In [5]:
x_FCh_MoFP = df_train_FCh_MoFP_descr.drop(["Y"], axis=1).copy()
y_FCh_MoFP = df_train_FCh_MoFP_descr["Y"].copy()

x_FCh = df_train_FCh_descr.drop(["Y"], axis=1).copy()
y_FCh = df_train_FCh_descr["Y"].copy()


In [7]:
datas = {"Phys-chem and Morgan desc": {"X": x_FCh_MoFP, "y": y_FCh_MoFP}, 
         "Phys-chem desc": {"X": x_FCh, "y": y_FCh}}

In [10]:
results = list()
count = 0
data_num = len(datas.keys())
model_num = len(models.keys())
for data_name, X_y in datas.items():
    scaler = StandardScaler()
    X_y["X"] = pd.DataFrame(scaler.fit_transform(X_y["X"]), columns=X_y["X"].columns)
    X_train, X_test, y_train, y_test = train_test_split(X_y["X"], X_y["y"], test_size=0.2, random_state=42)
    model_res = list()
    for name, model in models.items():
        mod = model
        start = timeit.default_timer()
        mod.fit(X_train, y_train)
        y_pred = model.predict_proba(X_test)[:, 1]
        auc_roc = round(roc_auc_score(y_test, y_pred), 4)
        stop = timeit.default_timer()
        time = round(stop-start, 3)
        model_res.extend([auc_roc, time])
        count += 1
        print(f" {count}/{data_num*model_num}. Finished model {name} with dataset {data_name}, AUC_ROC: {round(auc_roc, 5)}, spended time: {round(stop-start, 2)} sec")
    results.append(model_res)

[0.9313, 7.691]
Finished model XGBoost with dataset Phys-chem and Morgan desc, AUC_ROC: 0.9313, spended time: 7.69 sec
[0.9313, 7.691, 0.9121, 128.405]
Finished model GradientBoosting with dataset Phys-chem and Morgan desc, AUC_ROC: 0.9121, spended time: 128.4 sec
[0.9313, 7.691, 0.9121, 128.405, 0.9321, 21.244]
Finished model RandomForest with dataset Phys-chem and Morgan desc, AUC_ROC: 0.9321, spended time: 21.24 sec
[0.9313, 7.691, 0.9121, 128.405, 0.9321, 21.244, 0.9312, 23.689]
Finished model ExtraTrees with dataset Phys-chem and Morgan desc, AUC_ROC: 0.9312, spended time: 23.69 sec
[0.9313, 7.691, 0.9121, 128.405, 0.9321, 21.244, 0.9312, 23.689, 0.9308, 5.144]
Finished model LightGBM with dataset Phys-chem and Morgan desc, AUC_ROC: 0.9308, spended time: 5.14 sec
[0.9313, 7.691, 0.9121, 128.405, 0.9321, 21.244, 0.9312, 23.689, 0.9308, 5.144, 0.9318, 68.811]
Finished model CatBoost with dataset Phys-chem and Morgan desc, AUC_ROC: 0.9318, spended time: 68.81 sec
[0.9307, 3.689]
Fini

In [12]:
tup = list()
for name, model in models.items():
    tup.append(tuple([name, "AUC-ROC"]))
    tup.append(tuple([name, "sec"]))
column_index = pd.MultiIndex.from_tuples(tup)

In [14]:
model_comper = pd.DataFrame(results, columns=column_index)
model_comper["Descriptors"] = datas.keys()
model_comper.set_index("Descriptors")

Unnamed: 0_level_0,XGBoost,XGBoost,GradientBoosting,GradientBoosting,RandomForest,RandomForest,ExtraTrees,ExtraTrees,LightGBM,LightGBM,CatBoost,CatBoost
Unnamed: 0_level_1,AUC-ROC,sec,AUC-ROC,sec,AUC-ROC,sec,AUC-ROC,sec,AUC-ROC,sec,AUC-ROC,sec
Descriptors,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
Phys-chem and Morgan desc,0.9313,7.691,0.9121,128.405,0.9321,21.244,0.9312,23.689,0.9308,5.144,0.9318,68.811
Phys-chem desc,0.9307,3.689,0.909,43.315,0.932,7.168,0.9313,2.839,0.9312,1.84,0.9338,29.281
