In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from rdkit import Chem
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
from karateclub import Graph2Vec
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import xgboost as xgb
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')

# Mol to Graph

In [2]:
bace = pd.read_csv ("C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/raw/bace.csv")
bace.shape

(1513, 595)

In [3]:
bace.columns

Index(['mol', 'CID', 'Class', 'Model', 'pIC50', 'MW', 'AlogP', 'HBA', 'HBD',
       'RB',
       ...
       'PEOE6 (PEOE6)', 'PEOE7 (PEOE7)', 'PEOE8 (PEOE8)', 'PEOE9 (PEOE9)',
       'PEOE10 (PEOE10)', 'PEOE11 (PEOE11)', 'PEOE12 (PEOE12)',
       'PEOE13 (PEOE13)', 'PEOE14 (PEOE14)', 'canvasUID'],
      dtype='object', length=595)

In [4]:
print(bace)

                                                    mol        CID  Class  \
0     O1CC[C@@H](NC(=O)[C@@H](Cc2cc3cc(ccc3nc2N)-c2c...     BACE_1      1   
1     Fc1cc(cc(F)c1)C[C@H](NC(=O)[C@@H](N1CC[C@](NC(...     BACE_2      1   
2     S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](...     BACE_3      1   
3     S1(=O)(=O)C[C@@H](Cc2cc(O[C@H](COCC)C(F)(F)F)c...     BACE_4      1   
4     S1(=O)(=O)N(c2cc(cc3c2n(cc3CC)CC1)C(=O)N[C@H](...     BACE_5      1   
...                                                 ...        ...    ...   
1508          Clc1cc2nc(n(c2cc1)C(CC(=O)NCC1CCOCC1)CC)N  BACE_1543      0   
1509          Clc1cc2nc(n(c2cc1)C(CC(=O)NCc1ncccc1)CC)N  BACE_1544      0   
1510             Brc1cc(ccc1)C1CC1C=1N=C(N)N(C)C(=O)C=1  BACE_1545      0   
1511       O=C1N(C)C(=NC(=C1)C1CC1c1cc(ccc1)-c1ccccc1)N  BACE_1546      0   
1512                Clc1cc2nc(n(c2cc1)CCCC(=O)NCC1CC1)N  BACE_1547      0   

      Model     pIC50         MW   AlogP  HBA  HBD  RB  ...  PEOE6 (PEOE6) 

In [5]:
bace['mol'] = bace['mol'].apply(lambda x: Chem.MolFromSmiles(x))

In [6]:
def mol_to_nx(mol):
    G = nx.Graph()

    for atom in mol.GetAtoms():
        G.add_node(atom.GetIdx(),
                   atomic_num=atom.GetAtomicNum(),
                   is_aromatic=atom.GetIsAromatic(),
                   atom_symbol=atom.GetSymbol())
        
    for bond in mol.GetBonds():
        G.add_edge(bond.GetBeginAtomIdx(),
                   bond.GetEndAtomIdx(),
                   bond_type=bond.GetBondType())
        
    return G

In [7]:
bace['graph'] = bace['mol'].apply(lambda x: mol_to_nx(x))

In [8]:
model = Graph2Vec()
model.fit(bace['graph'])
bace_graph2vec = model.get_embedding()

In [9]:
bace_graph2vec = pd.DataFrame(bace_graph2vec)
bace_graph2vec.to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/bace_embd.csv', index = False)

In [10]:
data = pd.read_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/bace_embd.csv')

# Performance Calculation

## Imbalance Data

In [11]:
encoded_data = data.values
x = encoded_data[:, :-1]
y = bace['Class']

In [12]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)

### KNN

In [13]:
knn = KNeighborsClassifier()

In [14]:
knn.fit(X_train, y_train)

In [15]:
yhat = knn.predict(X_test)

In [16]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.554089709762533
Precision: 0.5116279069767442
Recall: 0.38372093023255816
F1 Score: 0.4385382059800665
ROC-AUC: 0.5396865520728008


### SVM

In [17]:
svm = svm.SVC(kernel='rbf')

In [18]:
svm.fit(X_train, y_train)

In [19]:
yhat = svm.predict(X_test)

In [20]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5461741424802111
Precision: 0.0
Recall: 0.0
F1 Score: 0.0
ROC-AUC: 0.5


### ADABoost

In [21]:
adb =  AdaBoostClassifier()

In [22]:
adb.fit(X_train, y_train)

In [23]:
yhat = adb.predict(X_test)

In [24]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.554089709762533
Precision: 0.5098039215686274
Recall: 0.45348837209302323
F1 Score: 0.48000000000000004
ROC-AUC: 0.5455847657566565


### Decision Tree

In [25]:
dtc = DecisionTreeClassifier()

In [26]:
dtc.fit(X_train, y_train)

In [27]:
yhat = dtc.predict(X_test)

In [28]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5329815303430079
Precision: 0.4840764331210191
Recall: 0.4418604651162791
F1 Score: 0.46200607902735563
ROC-AUC: 0.525278058645096


### Naive Bayes

In [29]:
nb = GaussianNB()

In [30]:
nb.fit(X_train, y_train)

In [31]:
yhat = nb.predict(X_test)

In [32]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5092348284960422
Precision: 0.47307692307692306
Recall: 0.7151162790697675
F1 Score: 0.5694444444444444
ROC-AUC: 0.5266402651387484


### MLP

In [33]:
mlp = MLPClassifier()

In [34]:
mlp.fit(X_train, y_train)

In [35]:
yhat = mlp.predict(X_test)

In [36]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5672823218997362
Precision: 0.5444444444444444
Recall: 0.28488372093023256
F1 Score: 0.37404580152671757
ROC-AUC: 0.5434080440399954


### XGBoost

In [37]:
xgb_cl = xgb.XGBClassifier()

In [38]:
xgb_cl.fit(X_train, y_train)

In [39]:
yhat = xgb_cl.predict(X_test)

In [40]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.554089709762533
Precision: 0.5093167701863354
Recall: 0.47674418604651164
F1 Score: 0.4924924924924925
ROC-AUC: 0.5475508369846085


## SMOTE

In [41]:
from imblearn.over_sampling import SMOTE
smote = SMOTE(sampling_strategy='minority')

In [42]:
x_resample_1, y_resample_1 = smote.fit_resample(x, y)
pd.DataFrame(x_resample_1).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_1.csv', index = False)
pd.DataFrame(y_resample_1).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_1.csv', index = False)

In [43]:
X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(x_resample_1, y_resample_1, test_size=0.25, random_state=1)

In [44]:
y_resample_1.value_counts()

1    822
0    822
Name: Class, dtype: int64

### KNN

In [45]:
knn.fit(X_train_1, y_train_1)

In [46]:
yhat_1 = knn.predict(X_test_1)

In [47]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5182481751824818
Precision: 0.5013262599469496
Recall: 0.949748743718593
F1 Score: 0.65625
ROC-AUC: 0.5314781454442021


### SVM

In [48]:
svm.fit(X_train_1, y_train_1)

In [49]:
yhat_1 = svm.predict(X_test_1)

In [50]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5401459854014599
Precision: 0.5149700598802395
Recall: 0.864321608040201
F1 Score: 0.6454033771106941
ROC-AUC: 0.5500853323219873


### ADABoost

In [51]:
adb.fit(X_train_1, y_train_1)

In [52]:
yhat_1 = adb.predict(X_test_1)

In [53]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5401459854014599
Precision: 0.5245098039215687
Recall: 0.5376884422110553
F1 Score: 0.5310173697270472
ROC-AUC: 0.5400706361998673


### Decision Tree

In [54]:
dtc.fit(X_train_1, y_train_1)

In [55]:
yhat_1 = dtc.predict(X_test_1)

In [56]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.51338199513382
Precision: 0.497737556561086
Recall: 0.5527638190954773
F1 Score: 0.5238095238095238
ROC-AUC: 0.51458945671755


### Naive Bayes

In [57]:
nb.fit(X_train_1, y_train_1)

In [58]:
yhat_1 = nb.predict(X_test_1)

In [59]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.49635036496350365
Precision: 0.4854014598540146
Recall: 0.6683417085427136
F1 Score: 0.5623678646934461
ROC-AUC: 0.501623684460036


### MLP

In [60]:
mlp.fit(X_train_1, y_train_1)

In [61]:
yhat_1 = mlp.predict(X_test_1)

In [62]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.48418491484184917
Precision: 0.48418491484184917
Recall: 1.0
F1 Score: 0.6524590163934426
ROC-AUC: 0.5


### XGBoost

In [63]:
xgb_cl.fit(X_train_1, y_train_1)

In [64]:
yhat_1 = xgb_cl.predict(X_test_1)

In [65]:
acc = accuracy_score(y_test_1, yhat_1)
print("Accuracy:", acc)
prec = precision_score(y_test_1, yhat_1)
print("Precision:", prec)
rec = recall_score(y_test_1, yhat_1)
print("Recall:", rec)
f1 = f1_score(y_test_1, yhat_1)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_1, yhat_1)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5644768856447688
Precision: 0.5471698113207547
Recall: 0.5829145728643216
F1 Score: 0.5644768856447688
ROC-AUC: 0.5650421920925381


## ADASYN

In [66]:
from imblearn.over_sampling import ADASYN
adasyn = ADASYN(sampling_strategy='not minority')

In [67]:
x_resample_2, y_resample_2 = adasyn.fit_resample(x, y)
pd.DataFrame(x_resample_2).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_2.csv', index = False)
pd.DataFrame(y_resample_2).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_2.csv', index = False)

In [68]:
X_train_2, X_test_2, y_train_2, y_test_2 = train_test_split(x_resample_2, y_resample_2, test_size=0.25, random_state=1)

### KNN

In [69]:
knn.fit(X_train_2, y_train_2)

In [70]:
yhat_2 = knn.predict(X_test_2)

In [71]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.554089709762533
Precision: 0.5116279069767442
Recall: 0.38372093023255816
F1 Score: 0.4385382059800665
ROC-AUC: 0.5396865520728008


### SVM

In [72]:
svm.fit(X_train_2, y_train_2)

In [73]:
yhat_2 = svm.predict(X_test_2)

In [74]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5461741424802111
Precision: 0.0
Recall: 0.0
F1 Score: 0.0
ROC-AUC: 0.5


### ADABoost

In [75]:
adb.fit(X_train_2, y_train_2)

In [76]:
yhat_2 = adb.predict(X_test_2)

In [77]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.554089709762533
Precision: 0.5098039215686274
Recall: 0.45348837209302323
F1 Score: 0.48000000000000004
ROC-AUC: 0.5455847657566565


### Decision Tree

In [78]:
dtc.fit(X_train_2, y_train_2)

In [79]:
yhat_2 = dtc.predict(X_test_2)

In [80]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5435356200527705
Precision: 0.4968944099378882
Recall: 0.46511627906976744
F1 Score: 0.4804804804804805
ROC-AUC: 0.5369059656218402


### Naive Bayes

In [81]:
nb.fit(X_train_2, y_train_2)

In [82]:
yhat_2 = nb.predict(X_test_2)

In [83]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5092348284960422
Precision: 0.47307692307692306
Recall: 0.7151162790697675
F1 Score: 0.5694444444444444
ROC-AUC: 0.5266402651387484


### MLP

In [84]:
mlp.fit(X_train_2, y_train_2)

In [85]:
yhat_2 = mlp.predict(X_test_2)

In [86]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5593667546174143
Precision: 0.5301204819277109
Recall: 0.2558139534883721
F1 Score: 0.34509803921568627
ROC-AUC: 0.5337040781934614


### XGBoost

In [87]:
xgb_cl.fit(X_train_2, y_train_2)

In [88]:
yhat_2 = xgb_cl.predict(X_test_2)

In [89]:
acc = accuracy_score(y_test_2, yhat_2)
print("Accuracy:", acc)
prec = precision_score(y_test_2, yhat_2)
print("Precision:", prec)
rec = recall_score(y_test_2, yhat_2)
print("Recall:", rec)
f1 = f1_score(y_test_2, yhat_2)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_2, yhat_2)
print("ROC-AUC:", roc_auc)

Accuracy: 0.554089709762533
Precision: 0.5093167701863354
Recall: 0.47674418604651164
F1 Score: 0.4924924924924925
ROC-AUC: 0.5475508369846085


## Borderline SMOTE

In [90]:
from imblearn.over_sampling import BorderlineSMOTE
boderline_smote = BorderlineSMOTE(sampling_strategy='minority')

In [91]:
x_resample_3, y_resample_3 = boderline_smote.fit_resample(x, y)
pd.DataFrame(x_resample_3).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_3.csv', index = False)
pd.DataFrame(y_resample_3).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_3.csv', index = False)

In [92]:
X_train_3, X_test_3, y_train_3, y_test_3 = train_test_split(x_resample_3, y_resample_3, test_size=0.25, random_state=1)

### KNN

In [93]:
knn.fit(X_train_3, y_train_3)

In [94]:
yhat_3 = knn.predict(X_test_3)

In [95]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5206812652068127
Precision: 0.5027624309392266
Recall: 0.914572864321608
F1 Score: 0.6488413547237077
ROC-AUC: 0.5327581302740115


### SVM

In [96]:
svm.fit(X_train_3, y_train_3)

In [97]:
yhat_3 = svm.predict(X_test_3)

In [98]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5425790754257908
Precision: 0.5164179104477612
Recall: 0.8693467336683417
F1 Score: 0.6479400749063671
ROC-AUC: 0.5525978951360576


### ADABoost

In [99]:
adb.fit(X_train_3, y_train_3)

In [100]:
yhat_3 = adb.predict(X_test_3)

In [101]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5255474452554745
Precision: 0.5093457943925234
Recall: 0.5477386934673367
F1 Score: 0.5278450363196125
ROC-AUC: 0.5262278372997061


### Decision Tree

In [102]:
dtc.fit(X_train_3, y_train_3)

In [103]:
yhat_3 = dtc.predict(X_test_3)

In [104]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.583941605839416
Precision: 0.5679611650485437
Recall: 0.5879396984924623
F1 Score: 0.5777777777777777
ROC-AUC: 0.5840641888688727


### Naive Bayes

In [105]:
nb.fit(X_train_3, y_train_3)

In [106]:
yhat_3 = nb.predict(X_test_3)

In [107]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5060827250608273
Precision: 0.4928571428571429
Recall: 0.6934673366834171
F1 Score: 0.5762004175365344
ROC-AUC: 0.51182800796435


### MLP

In [108]:
mlp.fit(X_train_3, y_train_3)

In [109]:
yhat_3 = mlp.predict(X_test_3)

In [110]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5547445255474452
Precision: 0.5412371134020618
Recall: 0.5276381909547738
F1 Score: 0.5343511450381679
ROC-AUC: 0.5539134351000284


### XGBoost

In [111]:
xgb_cl.fit(X_train_3, y_train_3)

In [112]:
yhat_3 = xgb_cl.predict(X_test_3)

In [113]:
acc = accuracy_score(y_test_3, yhat_3)
print("Accuracy:", acc)
prec = precision_score(y_test_3, yhat_3)
print("Precision:", prec)
rec = recall_score(y_test_3, yhat_3)
print("Recall:", rec)
f1 = f1_score(y_test_3, yhat_3)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_3, yhat_3)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5790754257907542
Precision: 0.5637254901960784
Recall: 0.5778894472361809
F1 Score: 0.5707196029776674
ROC-AUC: 0.579039063240732


## SVM-SMOTE

In [114]:
from imblearn.over_sampling import SVMSMOTE 
svm_smote = SVMSMOTE(sampling_strategy='minority')

In [115]:
x_resample_4, y_resample_4 = svm_smote.fit_resample(x, y)
pd.DataFrame(x_resample_4).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_4.csv', index = False)
pd.DataFrame(y_resample_4).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_4.csv', index = False)

In [116]:
X_train_4, X_test_4, y_train_4, y_test_4 = train_test_split(x_resample_4, y_resample_4, test_size=0.25, random_state=1)

### KNN

In [117]:
knn.fit(X_train_4, y_train_4)

In [118]:
yhat_4 = knn.predict(X_test_4)

In [119]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5036496350364964
Precision: 0.49279538904899134
Recall: 0.8592964824120602
F1 Score: 0.6263736263736264
ROC-AUC: 0.5145539015833887


### SVM

In [120]:
svm.fit(X_train_4, y_train_4)

In [121]:
yhat_4 = svm.predict(X_test_4)

In [122]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5328467153284672
Precision: 0.5100286532951289
Recall: 0.8944723618090452
F1 Score: 0.6496350364963503
ROC-AUC: 0.5439342941120698


### ADABoost

In [123]:
adb.fit(X_train_4, y_train_4)

In [124]:
yhat_4 = adb.predict(X_test_4)

In [125]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5450121654501217
Precision: 0.529126213592233
Recall: 0.5477386934673367
F1 Score: 0.5382716049382716
ROC-AUC: 0.545095761828008


### Decision Tree

In [126]:
dtc.fit(X_train_4, y_train_4)

In [127]:
yhat_4 = dtc.predict(X_test_4)

In [128]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.559610705596107
Precision: 0.5424528301886793
Recall: 0.5778894472361809
F1 Score: 0.559610705596107
ROC-AUC: 0.5601711387124301


### Naive Bayes

In [129]:
nb.fit(X_train_4, y_train_4)

In [130]:
yhat_4 = nb.predict(X_test_4)

In [131]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5182481751824818
Precision: 0.5017543859649123
Recall: 0.7185929648241206
F1 Score: 0.5909090909090909
ROC-AUC: 0.5243908220347018


### MLP

In [132]:
mlp.fit(X_train_4, y_train_4)

In [133]:
yhat_4 = mlp.predict(X_test_4)

In [134]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5717761557177615
Precision: 0.5572139303482587
Recall: 0.5628140703517588
F1 Score: 0.56
ROC-AUC: 0.5715013747985209


### XGBoost

In [135]:
xgb_cl.fit(X_train_4, y_train_4)

In [136]:
yhat_4 = xgb_cl.predict(X_test_4)

In [137]:
acc = accuracy_score(y_test_4, yhat_4)
print("Accuracy:", acc)
prec = precision_score(y_test_4, yhat_4)
print("Precision:", prec)
rec = recall_score(y_test_4, yhat_4)
print("Recall:", rec)
f1 = f1_score(y_test_4, yhat_4)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_4, yhat_4)
print("ROC-AUC:", roc_auc)

Accuracy: 0.559610705596107
Precision: 0.5412844036697247
Recall: 0.592964824120603
F1 Score: 0.5659472422062349
ROC-AUC: 0.5606333554565279


## Cluster Centroid

In [138]:
from imblearn.under_sampling import ClusterCentroids
cc = ClusterCentroids(sampling_strategy='majority', voting='hard')

In [139]:
x_resample_5, y_resample_5 = cc.fit_resample(x, y)
pd.DataFrame(x_resample_5).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_5.csv', index = False)
pd.DataFrame(y_resample_5).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_5.csv', index = False)

In [140]:
X_train_5, X_test_5, y_train_5, y_test_5 = train_test_split(x_resample_5, y_resample_5, test_size=0.25, random_state=1)

### KNN

In [141]:
knn.fit(X_train_5, y_train_5)

In [142]:
yhat_5 = knn.predict(X_test_5)

In [143]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.49421965317919075
Precision: 0.5029239766081871
Recall: 0.48863636363636365
F1 Score: 0.4956772334293948
ROC-AUC: 0.4943181818181819


### SVM

In [144]:
svm.fit(X_train_5, y_train_5)

In [145]:
yhat_5 = svm.predict(X_test_5)

In [146]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5289017341040463
Precision: 0.5258964143426295
Recall: 0.75
F1 Score: 0.6182669789227166
ROC-AUC: 0.525


### ADABoost

In [147]:
adb.fit(X_train_5, y_train_5)

In [148]:
yhat_5 = adb.predict(X_test_5)

In [149]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.523121387283237
Precision: 0.5341614906832298
Recall: 0.48863636363636365
F1 Score: 0.5103857566765578
ROC-AUC: 0.5237299465240642


### Decision Tree

In [150]:
dtc.fit(X_train_5, y_train_5)

In [151]:
yhat_5 = dtc.predict(X_test_5)

In [152]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5260115606936416
Precision: 0.5333333333333333
Recall: 0.5454545454545454
F1 Score: 0.5393258426966293
ROC-AUC: 0.5256684491978609


### Naive Bayes

In [153]:
nb.fit(X_train_5, y_train_5)

In [154]:
yhat_5 = nb.predict(X_test_5)

In [155]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5549132947976878
Precision: 0.5454545454545454
Recall: 0.75
F1 Score: 0.631578947368421
ROC-AUC: 0.5514705882352942


### MLP

In [156]:
mlp.fit(X_train_5, y_train_5)

In [157]:
yhat_5 = mlp.predict(X_test_5)

In [158]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5289017341040463
Precision: 0.5278969957081545
Recall: 0.6988636363636364
F1 Score: 0.6014669926650367
ROC-AUC: 0.5259024064171123


### XGBoost

In [159]:
xgb_cl.fit(X_train_5, y_train_5)

In [160]:
yhat_5 = xgb_cl.predict(X_test_5)

In [161]:
acc = accuracy_score(y_test_5, yhat_5)
print("Accuracy:", acc)
prec = precision_score(y_test_5, yhat_5)
print("Precision:", prec)
rec = recall_score(y_test_5, yhat_5)
print("Recall:", rec)
f1 = f1_score(y_test_5, yhat_5)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_5, yhat_5)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5202312138728323
Precision: 0.5328947368421053
Recall: 0.4602272727272727
F1 Score: 0.49390243902439024
ROC-AUC: 0.5212901069518716


## EditedNearestNeighbours

In [162]:
from imblearn.under_sampling import EditedNearestNeighbours
enn = EditedNearestNeighbours(sampling_strategy = 'majority') 

In [163]:
x_resample_6, y_resample_6 = enn.fit_resample(x, y)
pd.DataFrame(x_resample_6).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_6.csv', index = False)
pd.DataFrame(y_resample_6).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_6.csv', index = False)

In [164]:
X_train_6, X_test_6, y_train_6, y_test_6 = train_test_split(x_resample_6, y_resample_6, test_size=0.25, random_state=1)

### KNN

In [165]:
knn.fit(X_train_6, y_train_6)

In [166]:
yhat_6 = knn.predict(X_test_6)

In [167]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.827906976744186
Precision: 0.8556701030927835
Recall: 0.9485714285714286
F1 Score: 0.899728997289973
ROC-AUC: 0.6242857142857143


### SVM

In [168]:
svm.fit(X_train_6, y_train_6)

In [169]:
yhat_6 = svm.predict(X_test_6)

In [170]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.813953488372093
Precision: 0.813953488372093
Recall: 1.0
F1 Score: 0.8974358974358974
ROC-AUC: 0.5


### ADABoost

In [171]:
adb.fit(X_train_6, y_train_6)

In [172]:
yhat_6 = adb.predict(X_test_6)

In [173]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7627906976744186
Precision: 0.8229166666666666
Recall: 0.9028571428571428
F1 Score: 0.8610354223433241
ROC-AUC: 0.5264285714285715


### Decision Tree

In [174]:
dtc.fit(X_train_6, y_train_6)

In [175]:
yhat_6 = dtc.predict(X_test_6)

In [176]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7813953488372093
Precision: 0.872093023255814
Recall: 0.8571428571428571
F1 Score: 0.8645533141210374
ROC-AUC: 0.6535714285714285


### Naive Bayes

In [177]:
nb.fit(X_train_6, y_train_6)

In [178]:
yhat_6 = nb.predict(X_test_6)

In [179]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7674418604651163
Precision: 0.8571428571428571
Recall: 0.8571428571428571
F1 Score: 0.8571428571428571
ROC-AUC: 0.6160714285714286


### MLP

In [180]:
mlp.fit(X_train_6, y_train_6)

In [181]:
yhat_6 = mlp.predict(X_test_6)

In [182]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.813953488372093
Precision: 0.813953488372093
Recall: 1.0
F1 Score: 0.8974358974358974
ROC-AUC: 0.5


### XGBoost

In [183]:
xgb_cl.fit(X_train_6, y_train_6)

In [184]:
yhat_6 = xgb_cl.predict(X_test_6)

In [185]:
acc = accuracy_score(y_test_6, yhat_6)
print("Accuracy:", acc)
prec = precision_score(y_test_6, yhat_6)
print("Precision:", prec)
rec = recall_score(y_test_6, yhat_6)
print("Recall:", rec)
f1 = f1_score(y_test_6, yhat_6)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_6, yhat_6)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8418604651162791
Precision: 0.8373205741626795
Recall: 1.0
F1 Score: 0.9114583333333334
ROC-AUC: 0.575


## Near Miss

In [186]:
from imblearn.under_sampling import NearMiss
nm = NearMiss(sampling_strategy='majority')

In [187]:
x_resample_7, y_resample_7 = nm.fit_resample(x, y)
pd.DataFrame(x_resample_7).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_7.csv', index = False)
pd.DataFrame(y_resample_7).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_7.csv', index = False)

In [188]:
X_train_7, X_test_7, y_train_7, y_test_7 = train_test_split(x_resample_7, y_resample_7, test_size=0.25, random_state=1)

### KNN

In [189]:
knn.fit(X_train_7, y_train_7)

In [190]:
yhat_7 = knn.predict(X_test_7)

In [191]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5144508670520231
Precision: 0.5273972602739726
Recall: 0.4375
F1 Score: 0.47826086956521735
ROC-AUC: 0.5158088235294117


### SVM

In [192]:
svm.fit(X_train_7, y_train_7)

In [193]:
yhat_7 = svm.predict(X_test_7)

In [194]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5115606936416185
Precision: 0.64
Recall: 0.09090909090909091
F1 Score: 0.15920398009950248
ROC-AUC: 0.5189839572192513


### ADABoost

In [195]:
adb.fit(X_train_7, y_train_7)

In [196]:
yhat_7 = adb.predict(X_test_7)

In [197]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5202312138728323
Precision: 0.5316455696202531
Recall: 0.4772727272727273
F1 Score: 0.5029940119760479
ROC-AUC: 0.5209893048128342


### Decision Tree

In [198]:
dtc.fit(X_train_7, y_train_7)

In [199]:
yhat_7 = dtc.predict(X_test_7)

In [200]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5433526011560693
Precision: 0.55
Recall: 0.5625
F1 Score: 0.5561797752808989
ROC-AUC: 0.543014705882353


### Naive Bayes

In [201]:
nb.fit(X_train_7, y_train_7)

In [202]:
yhat_7 = nb.predict(X_test_7)

In [203]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5317919075144508
Precision: 0.57
Recall: 0.32386363636363635
F1 Score: 0.4130434782608695
ROC-AUC: 0.535461229946524


### MLP

In [204]:
mlp.fit(X_train_7, y_train_7)

In [205]:
yhat_7 = mlp.predict(X_test_7)

In [206]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5375722543352601
Precision: 0.6666666666666666
Recall: 0.18181818181818182
F1 Score: 0.28571428571428575
ROC-AUC: 0.5438502673796791


### XGBoost

In [207]:
xgb_cl.fit(X_train_7, y_train_7)

In [208]:
yhat_7 = xgb_cl.predict(X_test_7)

In [209]:
acc = accuracy_score(y_test_7, yhat_7)
print("Accuracy:", acc)
prec = precision_score(y_test_7, yhat_7)
print("Precision:", prec)
rec = recall_score(y_test_7, yhat_7)
print("Recall:", rec)
f1 = f1_score(y_test_7, yhat_7)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_7, yhat_7)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5
Precision: 0.5088757396449705
Recall: 0.48863636363636365
F1 Score: 0.4985507246376812
ROC-AUC: 0.5002005347593583


## NeighbourhoodCleaningRule

In [210]:
from imblearn.under_sampling import NeighbourhoodCleaningRule
ncr = NeighbourhoodCleaningRule(sampling_strategy='majority')

In [211]:
x_resample_8, y_resample_8 = ncr.fit_resample(x, y)
pd.DataFrame(x_resample_8).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_8.csv', index = False)
pd.DataFrame(y_resample_8).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_8.csv', index = False)

In [212]:
X_train_8, X_test_8, y_train_8, y_test_8 = train_test_split(x_resample_8, y_resample_8, test_size=0.25, random_state=1)

### KNN

In [213]:
knn.fit(X_train_8, y_train_8)

In [214]:
yhat_8 = knn.predict(X_test_8)

In [215]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7725321888412017
Precision: 0.7660550458715596
Recall: 0.9881656804733728
F1 Score: 0.8630490956072352
ROC-AUC: 0.5956453402366864


### SVM

In [216]:
svm.fit(X_train_8, y_train_8)

In [217]:
yhat_8 = svm.predict(X_test_8)

In [218]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7253218884120172
Precision: 0.7253218884120172
Recall: 1.0
F1 Score: 0.8407960199004975
ROC-AUC: 0.5


### ADABoost

In [219]:
adb.fit(X_train_8, y_train_8)

In [220]:
yhat_8 = adb.predict(X_test_8)

In [221]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6995708154506438
Precision: 0.7487437185929648
Recall: 0.8816568047337278
F1 Score: 0.8097826086956522
ROC-AUC: 0.5502034023668638


### Decision Tree

In [222]:
dtc.fit(X_train_8, y_train_8)

In [223]:
yhat_8 = dtc.predict(X_test_8)

In [224]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6394849785407726
Precision: 0.7575757575757576
Recall: 0.7396449704142012
F1 Score: 0.7485029940119761
ROC-AUC: 0.5573224852071006


### Naive Bayes

In [225]:
nb.fit(X_train_8, y_train_8)

In [226]:
yhat_8 = nb.predict(X_test_8)

In [227]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6995708154506438
Precision: 0.7619047619047619
Recall: 0.8520710059171598
F1 Score: 0.8044692737430168
ROC-AUC: 0.5744730029585798


### MLP

In [228]:
mlp.fit(X_train_8, y_train_8)

In [229]:
yhat_8 = mlp.predict(X_test_8)

In [230]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7253218884120172
Precision: 0.7253218884120172
Recall: 1.0
F1 Score: 0.8407960199004975
ROC-AUC: 0.5


### XGBoost

In [231]:
xgb_cl.fit(X_train_8, y_train_8)

In [232]:
yhat_8 = xgb_cl.predict(X_test_8)

In [233]:
acc = accuracy_score(y_test_8, yhat_8)
print("Accuracy:", acc)
prec = precision_score(y_test_8, yhat_8)
print("Precision:", prec)
rec = recall_score(y_test_8, yhat_8)
print("Recall:", rec)
f1 = f1_score(y_test_8, yhat_8)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_8, yhat_8)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7510729613733905
Precision: 0.7581395348837209
Recall: 0.9644970414201184
F1 Score: 0.8489583333333333
ROC-AUC: 0.5759985207100592


## SMOTE ENN

In [234]:
from imblearn.combine import SMOTEENN
smote_enn = SMOTEENN(sampling_strategy='auto')

In [235]:
x_resample_9, y_resample_9 = smote_enn.fit_resample(x, y)
pd.DataFrame(x_resample_9).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_9.csv', index = False)
pd.DataFrame(y_resample_9).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_9.csv', index = False)

In [236]:
X_train_9, X_test_9, y_train_9, y_test_9 = train_test_split(x_resample_9, y_resample_9, test_size=0.25, random_state=1)

### KNN

In [237]:
knn.fit(X_train_9, y_train_9)

In [238]:
yhat_9 = knn.predict(X_test_9)

In [239]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9652777777777778
Precision: 0.9652777777777778
Recall: 1.0
F1 Score: 0.9823321554770319
ROC-AUC: 0.5


### SVM

In [240]:
svm.fit(X_train_9, y_train_9)

In [241]:
yhat_9 = svm.predict(X_test_9)

In [242]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9652777777777778
Precision: 0.9652777777777778
Recall: 1.0
F1 Score: 0.9823321554770319
ROC-AUC: 0.5


### ADABoost

In [243]:
adb.fit(X_train_9, y_train_9)

In [244]:
yhat_9 = adb.predict(X_test_9)

In [245]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9722222222222222
Precision: 0.972027972027972
Recall: 1.0
F1 Score: 0.9858156028368793
ROC-AUC: 0.6


### Decision Tree

In [246]:
dtc.fit(X_train_9, y_train_9)

In [247]:
yhat_9 = dtc.predict(X_test_9)

In [248]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9722222222222222
Precision: 0.9787234042553191
Recall: 0.9928057553956835
F1 Score: 0.9857142857142858
ROC-AUC: 0.6964028776978417


### Naive Bayes

In [249]:
nb.fit(X_train_9, y_train_9)

In [250]:
yhat_9 = nb.predict(X_test_9)

In [251]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9166666666666666
Precision: 1.0
Recall: 0.9136690647482014
F1 Score: 0.9548872180451127
ROC-AUC: 0.9568345323741008


### MLP

In [252]:
mlp.fit(X_train_9, y_train_9)

In [253]:
yhat_9 = mlp.predict(X_test_9)

In [254]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9652777777777778
Precision: 0.9652777777777778
Recall: 1.0
F1 Score: 0.9823321554770319
ROC-AUC: 0.5


### XGBoost

In [255]:
xgb_cl.fit(X_train_9, y_train_9)

In [256]:
yhat_9 = xgb_cl.predict(X_test_9)

In [257]:
acc = accuracy_score(y_test_9, yhat_9)
print("Accuracy:", acc)
prec = precision_score(y_test_9, yhat_9)
print("Precision:", prec)
rec = recall_score(y_test_9, yhat_9)
print("Recall:", rec)
f1 = f1_score(y_test_9, yhat_9)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_9, yhat_9)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9652777777777778
Precision: 0.9652777777777778
Recall: 1.0
F1 Score: 0.9823321554770319
ROC-AUC: 0.5


## SMOTE Tomek

In [258]:
from imblearn.combine import SMOTETomek
smote_tomek = SMOTETomek(sampling_strategy='auto')

In [259]:
x_resample_10, y_resample_10= smote_tomek.fit_resample(x, y)
pd.DataFrame(x_resample_10).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_10.csv', index = False)
pd.DataFrame(y_resample_10).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_10.csv', index = False)

In [260]:
X_train_10, X_test_10, y_train_10, y_test_10 = train_test_split(x_resample_10, y_resample_10, test_size=0.25, random_state=1)

### KNN

In [261]:
knn.fit(X_train_10, y_train_10)

In [262]:
yhat_10 = knn.predict(X_test_10)

In [263]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5261845386533666
Precision: 0.5084269662921348
Recall: 0.923469387755102
F1 Score: 0.6557971014492753
ROC-AUC: 0.5349054255848681


### SVM

In [264]:
svm.fit(X_train_10, y_train_10)

In [265]:
yhat_10 = svm.predict(X_test_10)

In [266]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.513715710723192
Precision: 0.5014749262536873
Recall: 0.8673469387755102
F1 Score: 0.6355140186915887
ROC-AUC: 0.5214783474365356


### ADABoost

In [267]:
adb.fit(X_train_10, y_train_10)

In [268]:
yhat_10 = adb.predict(X_test_10)

In [269]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5261845386533666
Precision: 0.5157894736842106
Recall: 0.5
F1 Score: 0.5077720207253886
ROC-AUC: 0.525609756097561


### Decision Tree

In [270]:
dtc.fit(X_train_10, y_train_10)

In [271]:
yhat_10 = dtc.predict(X_test_10)

In [272]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5012468827930174
Precision: 0.4897959183673469
Recall: 0.4897959183673469
F1 Score: 0.4897959183673469
ROC-AUC: 0.5009955201592833


### Naive Bayes

In [273]:
nb.fit(X_train_10, y_train_10)

In [274]:
yhat_10 = nb.predict(X_test_10)

In [275]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5037406483790524
Precision: 0.4948805460750853
Recall: 0.7397959183673469
F1 Score: 0.5930470347648261
ROC-AUC: 0.508922349427576


### MLP

In [276]:
mlp.fit(X_train_10, y_train_10)

In [277]:
yhat_10 = mlp.predict(X_test_10)

In [278]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5012468827930174
Precision: 0.4838709677419355
Recall: 0.30612244897959184
F1 Score: 0.375
ROC-AUC: 0.49696366351418614


### XGBoost

In [279]:
xgb_cl.fit(X_train_10, y_train_10)

In [280]:
yhat_10 = xgb_cl.predict(X_test_10)

In [281]:
acc = accuracy_score(y_test_10, yhat_10)
print("Accuracy:", acc)
prec = precision_score(y_test_10, yhat_10)
print("Precision:", prec)
rec = recall_score(y_test_10, yhat_10)
print("Recall:", rec)
f1 = f1_score(y_test_10, yhat_10)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_10, yhat_10)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5810473815461347
Precision: 0.5666666666666667
Recall: 0.6071428571428571
F1 Score: 0.5862068965517241
ROC-AUC: 0.5816202090592335


## Experiment (SMOTE-NCR)

In [282]:
x_resample_11, y_resample_11= smote.fit_resample(x, y)

In [283]:
ncr_1 = NeighbourhoodCleaningRule(sampling_strategy='majority')
x_resample_11, y_resample_11= ncr.fit_resample(x_resample_11, y_resample_11)

In [284]:
pd.DataFrame(x_resample_11).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/x_resample_11.csv', index = False)
pd.DataFrame(y_resample_11).to_csv('C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/y_resample_11.csv', index = False)

In [285]:
X_train_11, X_test_11, y_train_11, y_test_11 = train_test_split(x_resample_11, y_resample_11, test_size=0.25, random_state=1)

### KNN

In [286]:
knn.fit(X_train_11, y_train_11)

In [287]:
yhat_11 = knn.predict(X_test_11)

In [288]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.9098712446351931
Precision: 0.9255813953488372
Recall: 0.9754901960784313
F1 Score: 0.9498806682577566
ROC-AUC: 0.7118830290736984


### SVM

In [289]:
svm.fit(X_train_11, y_train_11)

In [290]:
yhat_11 = svm.predict(X_test_11)

In [291]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8755364806866953
Precision: 0.8755364806866953
Recall: 1.0
F1 Score: 0.9336384439359269
ROC-AUC: 0.5


### ADABoost

In [292]:
adb.fit(X_train_11, y_train_11)

In [293]:
yhat_11 = adb.predict(X_test_11)

In [294]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8798283261802575
Precision: 0.9271844660194175
Recall: 0.9362745098039216
F1 Score: 0.9317073170731707
ROC-AUC: 0.7095165652467883


### Decision Tree

In [295]:
dtc.fit(X_train_11, y_train_11)

In [296]:
yhat_11 = dtc.predict(X_test_11)

In [297]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8626609442060086
Precision: 0.9174757281553398
Recall: 0.9264705882352942
F1 Score: 0.921951219512195
ROC-AUC: 0.6701318458417851


### Naive Bayes

In [298]:
nb.fit(X_train_11, y_train_11)

In [299]:
yhat_11 = nb.predict(X_test_11)

In [300]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8025751072961373
Precision: 0.9438202247191011
Recall: 0.8235294117647058
F1 Score: 0.8795811518324608
ROC-AUC: 0.7393509127789046


### MLP

In [301]:
mlp.fit(X_train_11, y_train_11)

In [302]:
yhat_11 = mlp.predict(X_test_11)

In [303]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8755364806866953
Precision: 0.8755364806866953
Recall: 1.0
F1 Score: 0.9336384439359269
ROC-AUC: 0.5


### XGBoost

In [304]:
xgb_cl.fit(X_train_11, y_train_11)

In [305]:
yhat_11 = xgb_cl.predict(X_test_11)

In [306]:
acc = accuracy_score(y_test_11, yhat_11)
print("Accuracy:", acc)
prec = precision_score(y_test_11, yhat_11)
print("Precision:", prec)
rec = recall_score(y_test_11, yhat_11)
print("Recall:", rec)
f1 = f1_score(y_test_11, yhat_11)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test_11, yhat_11)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8927038626609443
Precision: 0.92018779342723
Recall: 0.9607843137254902
F1 Score: 0.9400479616306955
ROC-AUC: 0.6872887085868831
