In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from rdkit import Chem
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
from karateclub import Graph2Vec
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import xgboost as xgb
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')

# Mol to Graph

In [2]:
BBBP = pd.read_csv ("C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/raw/BBBP.csv")
BBBP.shape

(2050, 4)

In [3]:
BBBP.columns

Index(['num', 'name', 'p_np', 'smiles'], dtype='object')

In [4]:
print(BBBP)

       num                               name  p_np  \
0        1                         Propanolol     1   
1        2               Terbutylchlorambucil     1   
2        3                              40730     1   
3        4                                 24     1   
4        5                        cloxacillin     1   
...    ...                                ...   ...   
2045  2049                         licostinel     1   
2046  2050  ademetionine(adenosyl-methionine)     1   
2047  2051                           mesocarb     1   
2048  2052                         tofisoline     1   
2049  2053                      azidamfenicol     1   

                                                 smiles  
0                      [Cl].CC(C)NCC(O)COc1cccc2ccccc12  
1              C(=O)(OC(C)(C)C)CCCc1ccc(cc1)N(CCCl)CCCl  
2     c12c3c(N4CCN(C)CC4)c(F)cc1c(c(C(O)=O)cn2C(C)CO...  
3                      C1CCN(CC1)Cc1cccc(c1)OCCCNC(=O)C  
4     Cc1onc(c2ccccc2Cl)c1C(=O)N[C@H]3[C@H]4SC(C)

In [5]:
BBBP['mol'] = BBBP['smiles'].apply(lambda x: Chem.MolFromSmiles(x, sanitize=False))

In [6]:
def mol_to_nx(mol):
    G = nx.Graph()

    for atom in mol.GetAtoms():
        G.add_node(atom.GetIdx(),
                   atomic_num=atom.GetAtomicNum(),
                   is_aromatic=atom.GetIsAromatic(),
                   atom_symbol=atom.GetSymbol())
        
    for bond in mol.GetBonds():
        G.add_edge(bond.GetBeginAtomIdx(),
                   bond.GetEndAtomIdx(),
                   bond_type=bond.GetBondType())
        
    return G

In [7]:
BBBP['graph'] = BBBP['mol'].apply(lambda x: mol_to_nx(x))

In [8]:
model = Graph2Vec()
model.fit(BBBP['graph'])
BBBP_graph2vec = model.get_embedding()

In [9]:
BBBP_graph2vec = pd.DataFrame(BBBP_graph2vec)
BBBP_graph2vec.to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/BBBP_embd.csv', index = False)

In [10]:
data = pd.read_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/BBBP_embd.csv')

# Performance Calculation

## Imbalance Data

In [11]:
encoded_data = data.values
x = encoded_data[:, :-1]
y = BBBP['p_np']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)

### KNN

In [13]:
knn = KNeighborsClassifier()

In [14]:
knn.fit(X_train, y_train)

In [15]:
yhat = knn.predict(X_test)

In [16]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7504873294346979
Precision: 0.7834394904458599
Recall: 0.9341772151898734
F1 Score: 0.8521939953810623
ROC-AUC: 0.5348852177644282


### SVM

In [17]:
svm = svm.SVC(kernel='rbf')

In [18]:
svm.fit(X_train, y_train)

In [19]:
yhat = svm.predict(X_test)

In [20]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7699805068226121
Precision: 0.7699805068226121
Recall: 1.0
F1 Score: 0.8700440528634361
ROC-AUC: 0.5


### ADABoost

In [21]:
adb =  AdaBoostClassifier()

In [22]:
adb.fit(X_train, y_train)

In [23]:
yhat = adb.predict(X_test)

In [24]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7446393762183235
Precision: 0.7882096069868996
Recall: 0.9139240506329114
F1 Score: 0.8464243845252051
ROC-AUC: 0.5459450761639133


### Decision Tree

In [25]:
dtc = DecisionTreeClassifier()

In [26]:
dtc.fit(X_train, y_train)

In [27]:
yhat = dtc.predict(X_test)

In [28]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6881091617933723
Precision: 0.8116710875331565
Recall: 0.7746835443037975
F1 Score: 0.7927461139896373
ROC-AUC: 0.5864943145247801


### Naive Bayes

In [29]:
nb = GaussianNB()

In [30]:
nb.fit(X_train, y_train)

In [31]:
yhat = nb.predict(X_test)

In [32]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5536062378167641
Precision: 0.8516949152542372
Recall: 0.5088607594936709
F1 Score: 0.6370839936608558
ROC-AUC: 0.6061252950010727


### MLP

In [33]:
mlp = MLPClassifier()

In [34]:
mlp.fit(X_train, y_train)

In [35]:
yhat = mlp.predict(X_test)

In [36]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7738791423001949
Precision: 0.7729941291585127
Recall: 1.0
F1 Score: 0.8719646799116998
ROC-AUC: 0.5084745762711864


### XGBoost

In [37]:
xgb_cl = xgb.XGBClassifier()

In [38]:
xgb_cl.fit(X_train, y_train)

In [39]:
yhat = xgb_cl.predict(X_test)

In [40]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7738791423001949
Precision: 0.7987152034261242
Recall: 0.9443037974683545
F1 Score: 0.8654292343387472
ROC-AUC: 0.5738468139884145


## SMOTE

In [41]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(sampling_strategy='minority')

In [42]:
x_resample_1, y_resample_1 = smote.fit_resample(x, y)
pd.DataFrame(x_resample_1).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_1.csv', index = False)
pd.DataFrame(y_resample_1).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_1.csv', index = False)

In [43]:
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(x_resample_1, y_resample_1, test_size=0.25, random_state=1)

### KNN

In [44]:
knn.fit(X_train_1, y_train_1)

In [45]:
yhat_1 = knn.predict(X_test_1)

In [46]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5191326530612245
Precision: 1.0
Recall: 0.0893719806763285
F1 Score: 0.16407982261640797
ROC-AUC: 0.5446859903381642


### SVM

In [47]:
svm.fit(X_train_1, y_train_1)

In [48]:
yhat_1 = svm.predict(X_test_1)

In [49]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6339285714285714
Precision: 0.777292576419214
Recall: 0.42995169082125606
F1 Score: 0.5536547433903578
ROC-AUC: 0.6460569264917091


### ADABoost

In [50]:
adb.fit(X_train_1, y_train_1)

In [51]:
yhat_1 = adb.predict(X_test_1)

In [52]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7155612244897959
Precision: 0.7546666666666667
Recall: 0.6835748792270532
F1 Score: 0.7173637515842839
ROC-AUC: 0.7174631152892021


### Decision Tree

In [53]:
dtc.fit(X_train_1, y_train_1)

In [54]:
yhat_1 = dtc.predict(X_test_1)

In [55]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7346938775510204
Precision: 0.7768817204301075
Recall: 0.6980676328502415
F1 Score: 0.7353689567430025
ROC-AUC: 0.7368716542629586


### Naive Bayes

In [56]:
nb.fit(X_train_1, y_train_1)

In [57]:
yhat_1 = nb.predict(X_test_1)

In [58]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6288265306122449
Precision: 0.6915887850467289
Recall: 0.5362318840579711
F1 Score: 0.6040816326530613
ROC-AUC: 0.6343321582452018


### MLP

In [59]:
mlp.fit(X_train_1, y_train_1)

In [60]:
yhat_1 = mlp.predict(X_test_1)

In [61]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6849489795918368
Precision: 0.8081180811808119
Recall: 0.5289855072463768
F1 Score: 0.6394160583941606
ROC-AUC: 0.6942224833529183


### XGBoost

In [62]:
xgb_cl.fit(X_train_1, y_train_1)

In [63]:
yhat_1 = xgb_cl.predict(X_test_1)

In [64]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8443877551020408
Precision: 0.8842105263157894
Recall: 0.8115942028985508
F1 Score: 0.8463476070528966
ROC-AUC: 0.8463376419898159


## ADASYN

In [65]:
from imblearn.over_sampling import ADASYN
adasyn = ADASYN(sampling_strategy='not minority')

In [66]:
x_resample_2, y_resample_2 = adasyn.fit_resample(x, y)
pd.DataFrame(x_resample_2).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_2.csv', index = False)
pd.DataFrame(y_resample_2).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_2.csv', index = False)

In [67]:
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(x_resample_2, y_resample_2, test_size=0.25, random_state=1)

### KNN

In [68]:
knn.fit(X_train_2, y_train_2)

In [69]:
yhat_2 = knn.predict(X_test_2)

In [70]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7504873294346979
Precision: 0.7834394904458599
Recall: 0.9341772151898734
F1 Score: 0.8521939953810623
ROC-AUC: 0.5348852177644282


### SVM

In [71]:
svm.fit(X_train_2, y_train_2)

In [72]:
yhat_2 = svm.predict(X_test_2)

In [73]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7699805068226121
Precision: 0.7699805068226121
Recall: 1.0
F1 Score: 0.8700440528634361
ROC-AUC: 0.5


### ADABoost

In [74]:
adb.fit(X_train_2, y_train_2)

In [75]:
yhat_2 = adb.predict(X_test_2)

In [76]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7446393762183235
Precision: 0.7882096069868996
Recall: 0.9139240506329114
F1 Score: 0.8464243845252051
ROC-AUC: 0.5459450761639133


### Decision Tree

In [77]:
dtc.fit(X_train_2, y_train_2)

In [78]:
yhat_2 = dtc.predict(X_test_2)

In [79]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7115009746588694
Precision: 0.8174807197943444
Recall: 0.8050632911392405
F1 Score: 0.8112244897959184
ROC-AUC: 0.6016841879425017


### Naive Bayes

In [80]:
nb.fit(X_train_2, y_train_2)

In [81]:
yhat_2 = nb.predict(X_test_2)

In [82]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5536062378167641
Precision: 0.8516949152542372
Recall: 0.5088607594936709
F1 Score: 0.6370839936608558
ROC-AUC: 0.6061252950010727


### MLP

In [83]:
mlp.fit(X_train_2, y_train_2)

In [84]:
yhat_2 = mlp.predict(X_test_2)

In [85]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7738791423001949
Precision: 0.7729941291585127
Recall: 1.0
F1 Score: 0.8719646799116998
ROC-AUC: 0.5084745762711864


### XGBoost

In [86]:
xgb_cl.fit(X_train_2, y_train_2)

In [87]:
yhat_2 = xgb_cl.predict(X_test_2)

In [88]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7738791423001949
Precision: 0.7987152034261242
Recall: 0.9443037974683545
F1 Score: 0.8654292343387472
ROC-AUC: 0.5738468139884145


## Borderline SMOTE

In [89]:
from imblearn.over_sampling import BorderlineSMOTE
boderline_smote = BorderlineSMOTE(sampling_strategy='minority')

In [90]:
x_resample_3, y_resample_3 = boderline_smote.fit_resample(x, y)
pd.DataFrame(x_resample_3).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_3.csv', index = False)
pd.DataFrame(y_resample_3).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_3.csv', index = False)

In [91]:
X_train_3, X_test_3, y_train_3, y_test_3 = train_test_split(x_resample_3, y_resample_3, test_size=0.25, random_state=1)

### KNN

In [92]:
knn.fit(X_train_3, y_train_3)

In [93]:
yhat_3 = knn.predict(X_test_3)

In [94]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5165816326530612
Precision: 1.0
Recall: 0.08454106280193237
F1 Score: 0.155902004454343
ROC-AUC: 0.5422705314009661


### SVM

In [95]:
svm.fit(X_train_3, y_train_3)

In [96]:
yhat_3 = svm.predict(X_test_3)

In [97]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6339285714285714
Precision: 0.7873303167420814
Recall: 0.42028985507246375
F1 Score: 0.5480314960629922
ROC-AUC: 0.6466314140227184


### ADABoost

In [98]:
adb.fit(X_train_3, y_train_3)

In [99]:
yhat_3 = adb.predict(X_test_3)

In [100]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7410714285714286
Precision: 0.772609819121447
Recall: 0.7222222222222222
F1 Score: 0.7465667915106118
ROC-AUC: 0.7421921921921922


### Decision Tree

In [101]:
dtc.fit(X_train_3, y_train_3)

In [102]:
yhat_3 = dtc.predict(X_test_3)

In [103]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7436224489795918
Precision: 0.7855227882037533
Recall: 0.7077294685990339
F1 Score: 0.7445997458703939
ROC-AUC: 0.7457566261914088


### Naive Bayes

In [104]:
nb.fit(X_train_3, y_train_3)

In [105]:
yhat_3 = nb.predict(X_test_3)

In [106]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6313775510204082
Precision: 0.6959247648902821
Recall: 0.5362318840579711
F1 Score: 0.6057298772169167
ROC-AUC: 0.6370348609479045


### MLP

In [107]:
mlp.fit(X_train_3, y_train_3)

In [108]:
yhat_3 = mlp.predict(X_test_3)

In [109]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7066326530612245
Precision: 0.8285714285714286
Recall: 0.5603864734299517
F1 Score: 0.6685878962536025
ROC-AUC: 0.715328371850111


### XGBoost

In [110]:
xgb_cl.fit(X_train_3, y_train_3)

In [111]:
yhat_3 = xgb_cl.predict(X_test_3)

In [112]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8520408163265306
Precision: 0.8983957219251337
Recall: 0.8115942028985508
F1 Score: 0.8527918781725888
ROC-AUC: 0.854445750097924


## SVM-SMOTE

In [113]:
from imblearn.over_sampling import SVMSMOTE 
svm_smote = SVMSMOTE(sampling_strategy='minority')

In [114]:
x_resample_4, y_resample_4 = svm_smote.fit_resample(x, y)
pd.DataFrame(x_resample_4).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_4.csv', index = False)
pd.DataFrame(y_resample_4).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_4.csv', index = False)

In [115]:
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(x_resample_4, y_resample_4, test_size=0.25, random_state=1)

### KNN

In [116]:
knn.fit(X_train_4, y_train_4)

In [117]:
yhat_4 = knn.predict(X_test_4)

In [118]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5561224489795918
Precision: 0.9714285714285714
Recall: 0.1642512077294686
F1 Score: 0.2809917355371901
ROC-AUC: 0.5794229011620318


### SVM

In [119]:
svm.fit(X_train_4, y_train_4)

In [120]:
yhat_4 = svm.predict(X_test_4)

In [121]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6811224489795918
Precision: 0.8203125
Recall: 0.5072463768115942
F1 Score: 0.6268656716417911
ROC-AUC: 0.6914610262436349


### ADABoost

In [122]:
adb.fit(X_train_4, y_train_4)

In [123]:
yhat_4 = adb.predict(X_test_4)

In [124]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7295918367346939
Precision: 0.7589743589743589
Recall: 0.714975845410628
F1 Score: 0.736318407960199
ROC-AUC: 0.730460895678287


### Decision Tree

In [125]:
dtc.fit(X_train_4, y_train_4)

In [126]:
yhat_4 = dtc.predict(X_test_4)

In [127]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.75
Precision: 0.7853403141361257
Recall: 0.7246376811594203
F1 Score: 0.7537688442211056
ROC-AUC: 0.7515080297688994


### Naive Bayes

In [128]:
nb.fit(X_train_4, y_train_4)

In [129]:
yhat_4 = nb.predict(X_test_4)

In [130]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.673469387755102
Precision: 0.7548387096774194
Recall: 0.5652173913043478
F1 Score: 0.6464088397790055
ROC-AUC: 0.6799059929494712


### MLP

In [131]:
mlp.fit(X_train_4, y_train_4)

In [132]:
yhat_4 = mlp.predict(X_test_4)

In [133]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7461734693877551
Precision: 0.7913279132791328
Recall: 0.7053140096618358
F1 Score: 0.7458492975734355
ROC-AUC: 0.7486029507768638


### XGBoost

In [134]:
xgb_cl.fit(X_train_4, y_train_4)

In [135]:
yhat_4 = xgb_cl.predict(X_test_4)

In [136]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.860969387755102
Precision: 0.8784119106699751
Recall: 0.855072463768116
F1 Score: 0.8665850673194615
ROC-AUC: 0.8613200156678419


## Cluster Centroid

In [137]:
from imblearn.under_sampling import ClusterCentroids
cc = ClusterCentroids(sampling_strategy = 'majority')

In [138]:
x_resample_5, y_resample_5 = cc.fit_resample(x, y)
pd.DataFrame(x_resample_5).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_5.csv', index = False)
pd.DataFrame(y_resample_5).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_5.csv', index = False)

In [139]:
X_train_5, X_test_5, y_train_5, y_test_5 = train_test_split(x_resample_5, y_resample_5, test_size=0.25, random_state=1)

### KNN

In [140]:
knn.fit(X_train_5, y_train_5)

In [141]:
yhat_5 = knn.predict(X_test_5)

In [142]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5289256198347108
Precision: 0.5289256198347108
Recall: 1.0
F1 Score: 0.6918918918918919
ROC-AUC: 0.5


### SVM

In [143]:
svm.fit(X_train_5, y_train_5)

In [144]:
yhat_5 = svm.predict(X_test_5)

In [145]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5495867768595041
Precision: 0.7209302325581395
Recall: 0.2421875
F1 Score: 0.36257309941520466
ROC-AUC: 0.5684621710526316


### ADABoost

In [146]:
adb.fit(X_train_5, y_train_5)

In [147]:
yhat_5 = adb.predict(X_test_5)

In [148]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7644628099173554
Precision: 0.7709923664122137
Recall: 0.7890625
F1 Score: 0.7799227799227799
ROC-AUC: 0.762952302631579


### Decision Tree

In [149]:
dtc.fit(X_train_5, y_train_5)

In [150]:
yhat_5 = dtc.predict(X_test_5)

In [151]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6694214876033058
Precision: 0.6875
Recall: 0.6875
F1 Score: 0.6875
ROC-AUC: 0.6683114035087719


### Naive Bayes

In [152]:
nb.fit(X_train_5, y_train_5)

In [153]:
yhat_5 = nb.predict(X_test_5)

In [154]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5950413223140496
Precision: 0.6595744680851063
Recall: 0.484375
F1 Score: 0.5585585585585585
ROC-AUC: 0.6018366228070176


### MLP

In [155]:
mlp.fit(X_train_5, y_train_5)

In [156]:
yhat_5 = mlp.predict(X_test_5)

In [157]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6322314049586777
Precision: 0.6611570247933884
Recall: 0.625
F1 Score: 0.642570281124498
ROC-AUC: 0.6326754385964912


### XGBoost

In [158]:
xgb_cl.fit(X_train_5, y_train_5)

In [159]:
yhat_5 = xgb_cl.predict(X_test_5)

In [160]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7727272727272727
Precision: 0.7829457364341085
Recall: 0.7890625
F1 Score: 0.7859922178988327
ROC-AUC: 0.7717242324561403


## EditedNearestNeighbours

In [161]:
from imblearn.under_sampling import EditedNearestNeighbours
enn = EditedNearestNeighbours(sampling_strategy = 'majority') 

In [162]:
x_resample_6, y_resample_6 = enn.fit_resample(x, y)
pd.DataFrame(x_resample_6).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_6.csv', index = False)
pd.DataFrame(y_resample_6).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_6.csv', index = False)

In [163]:
X_train_6, X_test_6, y_train_6, y_test_6 = train_test_split(x_resample_6, y_resample_6, test_size=0.25, random_state=1)

### KNN

In [164]:
knn.fit(X_train_6, y_train_6)

In [165]:
yhat_6 = knn.predict(X_test_6)

In [166]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7577464788732394
Precision: 0.7551020408163265
Recall: 0.940677966101695
F1 Score: 0.8377358490566039
ROC-AUC: 0.6678179746474862


### SVM

In [167]:
svm.fit(X_train_6, y_train_6)

In [168]:
yhat_6 = svm.predict(X_test_6)

In [169]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6647887323943662
Precision: 0.6647887323943662
Recall: 1.0
F1 Score: 0.7986463620981388
ROC-AUC: 0.5


### ADABoost

In [170]:
adb.fit(X_train_6, y_train_6)

In [171]:
yhat_6 = adb.predict(X_test_6)

In [172]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6929577464788732
Precision: 0.7490196078431373
Recall: 0.809322033898305
F1 Score: 0.7780040733197556
ROC-AUC: 0.6357534539239424


### Decision Tree

In [173]:
dtc.fit(X_train_6, y_train_6)

In [174]:
yhat_6 = dtc.predict(X_test_6)

In [175]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.647887323943662
Precision: 0.7193675889328063
Recall: 0.7711864406779662
F1 Score: 0.7443762781186094
ROC-AUC: 0.5872738926078906


### Naive Bayes

In [176]:
nb.fit(X_train_6, y_train_6)

In [177]:
yhat_6 = nb.predict(X_test_6)

In [178]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6140845070422535
Precision: 0.7928994082840237
Recall: 0.5677966101694916
F1 Score: 0.6617283950617284
ROC-AUC: 0.636839481555334


### MLP

In [179]:
mlp.fit(X_train_6, y_train_6)

In [180]:
yhat_6 = mlp.predict(X_test_6)

In [181]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7014084507042253
Precision: 0.696969696969697
Recall: 0.9745762711864406
F1 Score: 0.8127208480565371
ROC-AUC: 0.5671200683663296


### XGBoost

In [182]:
xgb_cl.fit(X_train_6, y_train_6)

In [183]:
yhat_6 = xgb_cl.predict(X_test_6)

In [184]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7352112676056338
Precision: 0.75177304964539
Recall: 0.8983050847457628
F1 Score: 0.8185328185328186
ROC-AUC: 0.6550348953140579


## Near Miss

In [185]:
from imblearn.under_sampling import NearMiss
nm = NearMiss(sampling_strategy='majority')

In [186]:
x_resample_7, y_resample_7 = nm.fit_resample(x, y)
pd.DataFrame(x_resample_7).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_7.csv', index = False)
pd.DataFrame(y_resample_7).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_7.csv', index = False)

In [187]:
X_train_7, X_test_7, y_train_7, y_test_7 = train_test_split(x_resample_7, y_resample_7, test_size=0.25, random_state=1)

### KNN

In [188]:
knn.fit(X_train_7, y_train_7)

In [189]:
yhat_7 = knn.predict(X_test_7)

In [190]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5537190082644629
Precision: 0.5609756097560976
Recall: 0.71875
F1 Score: 0.6301369863013699
ROC-AUC: 0.5435855263157895


### SVM

In [191]:
svm.fit(X_train_7, y_train_7)

In [192]:
yhat_7 = svm.predict(X_test_7)

In [193]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6652892561983471
Precision: 0.6766917293233082
Recall: 0.703125
F1 Score: 0.689655172413793
ROC-AUC: 0.6629660087719298


### ADABoost

In [194]:
adb.fit(X_train_7, y_train_7)

In [195]:
yhat_7 = adb.predict(X_test_7)

In [196]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5743801652892562
Precision: 0.6050420168067226
Recall: 0.5625
F1 Score: 0.5829959514170039
ROC-AUC: 0.575109649122807


### Decision Tree

In [197]:
dtc.fit(X_train_7, y_train_7)

In [198]:
yhat_7 = dtc.predict(X_test_7)

In [199]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5413223140495868
Precision: 0.580952380952381
Recall: 0.4765625
F1 Score: 0.5236051502145923
ROC-AUC: 0.5452987938596491


### Naive Bayes

In [200]:
nb.fit(X_train_7, y_train_7)

In [201]:
yhat_7 = nb.predict(X_test_7)

In [202]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6570247933884298
Precision: 0.6618705035971223
Recall: 0.71875
F1 Score: 0.6891385767790262
ROC-AUC: 0.653234649122807


### MLP

In [203]:
mlp.fit(X_train_7, y_train_7)

In [204]:
yhat_7 = mlp.predict(X_test_7)

In [205]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6528925619834711
Precision: 0.6833333333333333
Recall: 0.640625
F1 Score: 0.6612903225806451
ROC-AUC: 0.6536458333333335


### XGBoost

In [206]:
xgb_cl.fit(X_train_7, y_train_7)

In [207]:
yhat_7 = xgb_cl.predict(X_test_7)

In [208]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6033057851239669
Precision: 0.6290322580645161
Recall: 0.609375
F1 Score: 0.6190476190476191
ROC-AUC: 0.6029331140350878


## NeighbourhoodCleaningRule

In [209]:
from imblearn.under_sampling import NeighbourhoodCleaningRule
ncr = NeighbourhoodCleaningRule(sampling_strategy='majority')

In [210]:
x_resample_8, y_resample_8 = ncr.fit_resample(x, y)
pd.DataFrame(x_resample_8).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_8.csv', index = False)
pd.DataFrame(y_resample_8).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_8.csv', index = False)

In [211]:
X_train_8, X_test_8, y_train_8, y_test_8 = train_test_split(x_resample_8, y_resample_8, test_size=0.25, random_state=1)

### KNN

In [212]:
knn.fit(X_train_8, y_train_8)

In [213]:
yhat_8 = knn.predict(X_test_8)

In [214]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7457212713936431
Precision: 0.805111821086262
Recall: 0.8542372881355932
F1 Score: 0.8289473684210525
ROC-AUC: 0.6595747844186738


### SVM

In [215]:
svm.fit(X_train_8, y_train_8)

In [216]:
yhat_8 = svm.predict(X_test_8)

In [217]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7212713936430318
Precision: 0.7212713936430318
Recall: 1.0
F1 Score: 0.8380681818181819
ROC-AUC: 0.5


### ADABoost

In [218]:
adb.fit(X_train_8, y_train_8)

In [219]:
yhat_8 = adb.predict(X_test_8)

In [220]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.687041564792176
Precision: 0.7719869706840391
Recall: 0.8033898305084746
F1 Score: 0.7873754152823922
ROC-AUC: 0.5946773713945881


### Decision Tree

In [221]:
dtc.fit(X_train_8, y_train_8)

In [222]:
yhat_8 = dtc.predict(X_test_8)

In [223]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6674816625916871
Precision: 0.7933579335793358
Recall: 0.7288135593220338
F1 Score: 0.7597173144876326
ROC-AUC: 0.6187927445732977


### Naive Bayes

In [224]:
nb.fit(X_train_8, y_train_8)

In [225]:
yhat_8 = nb.predict(X_test_8)

In [226]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5843520782396088
Precision: 0.8205128205128205
Recall: 0.5423728813559322
F1 Score: 0.6530612244897959
ROC-AUC: 0.6176776687481415


### MLP

In [227]:
mlp.fit(X_train_8, y_train_8)

In [228]:
yhat_8 = mlp.predict(X_test_8)

In [229]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7212713936430318
Precision: 0.7212713936430318
Recall: 1.0
F1 Score: 0.8380681818181819
ROC-AUC: 0.5


### XGBoost

In [230]:
xgb_cl.fit(X_train_8, y_train_8)

In [231]:
yhat_8 = xgb_cl.predict(X_test_8)

In [232]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7506112469437652
Precision: 0.7846607669616519
Recall: 0.9016949152542373
F1 Score: 0.8391167192429022
ROC-AUC: 0.6306720190306274


## SMOTE ENN

In [233]:
from imblearn.combine import SMOTEENN
smote_enn = SMOTEENN(sampling_strategy='auto')

In [234]:
x_resample_9, y_resample_9 = smote_enn.fit_resample(x, y)
pd.DataFrame(x_resample_9).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_9.csv', index = False)
pd.DataFrame(y_resample_9).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_9.csv', index = False)

In [235]:
X_train_9, X_test_9, y_train_9, y_test_9 = train_test_split(x_resample_9, y_resample_9, test_size=0.25, random_state=1)

### KNN

In [236]:
knn.fit(X_train_9, y_train_9)

In [237]:
yhat_9 = knn.predict(X_test_9)

In [238]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9901234567901235
Precision: 1.0
Recall: 0.6666666666666666
F1 Score: 0.8
ROC-AUC: 0.8333333333333333


### SVM

In [239]:
svm.fit(X_train_9, y_train_9)

In [240]:
yhat_9 = svm.predict(X_test_9)

In [241]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9728395061728395
Precision: 1.0
Recall: 0.08333333333333333
F1 Score: 0.15384615384615385
ROC-AUC: 0.5416666666666666


### ADABoost

In [242]:
adb.fit(X_train_9, y_train_9)

In [243]:
yhat_9 = adb.predict(X_test_9)

In [244]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9975308641975309
Precision: 1.0
Recall: 0.9166666666666666
F1 Score: 0.9565217391304348
ROC-AUC: 0.9583333333333333


### Decision Tree

In [245]:
dtc.fit(X_train_9, y_train_9)

In [246]:
yhat_9 = dtc.predict(X_test_9)

In [247]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9925925925925926
Precision: 0.8461538461538461
Recall: 0.9166666666666666
F1 Score: 0.8799999999999999
ROC-AUC: 0.9557888040712467


### Naive Bayes

In [248]:
nb.fit(X_train_9, y_train_9)

In [249]:
yhat_9 = nb.predict(X_test_9)

In [250]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9160493827160494
Precision: 0.2608695652173913
Recall: 1.0
F1 Score: 0.41379310344827586
ROC-AUC: 0.9567430025445293


### MLP

In [251]:
mlp.fit(X_train_9, y_train_9)

In [252]:
yhat_9 = mlp.predict(X_test_9)

In [253]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9703703703703703
Precision: 0.0
Recall: 0.0
F1 Score: 0.0
ROC-AUC: 0.5


### XGBoost

In [254]:
xgb_cl.fit(X_train_9, y_train_9)

In [255]:
yhat_9 = xgb_cl.predict(X_test_9)

In [256]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
ROC-AUC: 1.0


## SMOTE Tomek

In [257]:
from imblearn.combine import SMOTETomek
smote_tomek = SMOTETomek(sampling_strategy='auto')

In [258]:
x_resample_10, y_resample_10= smote_tomek.fit_resample(x, y)
pd.DataFrame(x_resample_10).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_10.csv', index = False)
pd.DataFrame(y_resample_10).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_10.csv', index = False)

In [259]:
X_train_10, X_test_10, y_train_10, y_test_10 = train_test_split(x_resample_10, y_resample_10, test_size=0.25, random_state=1)

### KNN

In [260]:
knn.fit(X_train_10, y_train_10)

In [261]:
yhat_10 = knn.predict(X_test_10)

In [262]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5153061224489796
Precision: 1.0
Recall: 0.0821256038647343
F1 Score: 0.1517857142857143
ROC-AUC: 0.5410628019323671


### SVM

In [263]:
svm.fit(X_train_10, y_train_10)

In [264]:
yhat_10 = svm.predict(X_test_10)

In [265]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6415816326530612
Precision: 0.8064516129032258
Recall: 0.4227053140096618
F1 Score: 0.554675118858954
ROC-AUC: 0.6545959002480742


### ADABoost

In [266]:
adb.fit(X_train_10, y_train_10)

In [267]:
yhat_10 = adb.predict(X_test_10)

In [268]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7181122448979592
Precision: 0.7559681697612732
Recall: 0.6884057971014492
F1 Score: 0.7206068268015171
ROC-AUC: 0.7198785742264003


### Decision Tree

In [269]:
dtc.fit(X_train_10, y_train_10)

In [270]:
yhat_10 = dtc.predict(X_test_10)

In [271]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7295918367346939
Precision: 0.791907514450867
Recall: 0.6618357487922706
F1 Score: 0.7210526315789474
ROC-AUC: 0.733620577098838


### Naive Bayes

In [272]:
nb.fit(X_train_10, y_train_10)

In [273]:
yhat_10 = nb.predict(X_test_10)

In [274]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6454081632653061
Precision: 0.7236842105263158
Recall: 0.5314009661835749
F1 Score: 0.6128133704735377
ROC-AUC: 0.652186969578274


### MLP

In [275]:
mlp.fit(X_train_10, y_train_10)

In [276]:
yhat_10 = mlp.predict(X_test_10)

In [277]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6964285714285714
Precision: 0.782051282051282
Recall: 0.5893719806763285
F1 Score: 0.6721763085399449
ROC-AUC: 0.7027940984462723


### XGBoost

In [278]:
xgb_cl.fit(X_train_10, y_train_10)

In [279]:
yhat_10 = xgb_cl.predict(X_test_10)

In [280]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8660714285714286
Precision: 0.9033942558746736
Recall: 0.8357487922705314
F1 Score: 0.8682559598494354
ROC-AUC: 0.8678743961352658


## Experiment (SMOTE-NCR)

In [281]:
x_resample_11, y_resample_11= smote.fit_resample(x, y)

In [282]:
ncr_1 = NeighbourhoodCleaningRule(sampling_strategy='majority')
x_resample_11, y_resample_11= ncr.fit_resample(x_resample_11, y_resample_11)

In [283]:
pd.DataFrame(x_resample_11).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/x_resample_11.csv', index = False)
pd.DataFrame(y_resample_11).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/y_resample_11.csv', index = False)

In [284]:
X_train_11, X_test_11, y_train_11, y_test_11 = train_test_split(x_resample_11, y_resample_11, test_size=0.25, random_state=1)

### KNN

In [285]:
knn.fit(X_train_11, y_train_11)

In [286]:
yhat_11 = knn.predict(X_test_11)

In [287]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5216836734693877
Precision: 0.9545454545454546
Recall: 0.10120481927710843
F1 Score: 0.18300653594771243
ROC-AUC: 0.5478923825382832


### SVM

In [288]:
svm.fit(X_train_11, y_train_11)

In [289]:
yhat_11 = svm.predict(X_test_11)

In [290]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6033163265306123
Precision: 0.7184873949579832
Recall: 0.41204819277108434
F1 Score: 0.5237366003062788
ROC-AUC: 0.6152381885264636


### ADABoost

In [291]:
adb.fit(X_train_11, y_train_11)

In [292]:
yhat_11 = adb.predict(X_test_11)

In [293]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7308673469387755
Precision: 0.7756756756756756
Recall: 0.691566265060241
F1 Score: 0.7312101910828024
ROC-AUC: 0.7333170078688738


### Decision Tree

In [294]:
dtc.fit(X_train_11, y_train_11)

In [295]:
yhat_11 = dtc.predict(X_test_11)

In [296]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7525510204081632
Precision: 0.7885117493472585
Recall: 0.727710843373494
F1 Score: 0.7568922305764412
ROC-AUC: 0.7540993241257714


### Naive Bayes

In [297]:
nb.fit(X_train_11, y_train_11)

In [298]:
yhat_11 = nb.predict(X_test_11)

In [299]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6160714285714286
Precision: 0.680379746835443
Recall: 0.5180722891566265
F1 Score: 0.5882352941176471
ROC-AUC: 0.6221797760146276


### MLP

In [300]:
mlp.fit(X_train_11, y_train_11)

In [301]:
yhat_11 = mlp.predict(X_test_11)

In [302]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6709183673469388
Precision: 0.6673773987206824
Recall: 0.7542168674698795
F1 Score: 0.7081447963800904
ROC-AUC: 0.6657263199138015


### XGBoost

In [303]:
xgb_cl.fit(X_train_11, y_train_11)

In [304]:
yhat_11 = xgb_cl.predict(X_test_11)

In [305]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8392857142857143
Precision: 0.889487870619946
Recall: 0.7951807228915663
F1 Score: 0.8396946564885496
ROC-AUC: 0.8420348058902276
