In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from rdkit import Chem
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
from karateclub import Graph2Vec
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import xgboost as xgb
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')

# Mol to Graph

In [2]:
bace = pd.read_csv ("C://Users/Soumyajit/Downloads/datasets/moleculenet/bace/processed/Bace_TFMO_resampled.csv")
bace.shape

(1644, 129)

In [3]:
bace.columns

Index(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       ...
       '119', '120', '121', '122', '123', '124', '125', '126', '127', 'Class'],
      dtype='object', length=129)

In [4]:
print(bace)

             0         1         2         3         4         5         6  \
0     0.134081 -0.036366 -0.160861  0.068974 -0.017671  0.068820  0.000564   
1     0.132166 -0.045650 -0.174000  0.080382 -0.002982  0.067586  0.008451   
2     0.104327 -0.045941 -0.153605  0.064360 -0.016102  0.071676  0.007026   
3     0.136537 -0.050100 -0.167538  0.076022 -0.016369  0.074332  0.007529   
4     0.119371 -0.037776 -0.150386  0.060499 -0.017058  0.065569  0.009040   
...        ...       ...       ...       ...       ...       ...       ...   
1639  0.100278 -0.035331 -0.129158  0.060177 -0.000288  0.064054  0.001886   
1640  0.100551 -0.034180 -0.143588  0.057954 -0.009681  0.058073 -0.001441   
1641  0.105274 -0.031395 -0.140184  0.059192 -0.006173  0.059365 -0.004173   
1642  0.128985 -0.038702 -0.170457  0.081371 -0.003698  0.069411 -0.000612   
1643  0.120858 -0.041615 -0.158512  0.068692 -0.006153  0.061594  0.000436   

             7         8         9  ...       119       120    

# Performance Calculation

In [5]:
x = bace.drop(['Class'], axis=1)
y = bace['Class']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)

### KNN

In [7]:
knn = KNeighborsClassifier()

In [8]:
knn.fit(X_train, y_train)

In [9]:
yhat = knn.predict(X_test)

In [10]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5474452554744526
Precision: 0.54375
Recall: 0.8130841121495327
F1 Score: 0.6516853932584269
ROC-AUC: 0.5359836804402486


### SVM

In [11]:
svm = svm.SVC(kernel='rbf')

In [12]:
svm.fit(X_train, y_train)

In [13]:
yhat = svm.predict(X_test)

In [14]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5279805352798054
Precision: 0.5324675324675324
Recall: 0.7663551401869159
F1 Score: 0.6283524904214559
ROC-AUC: 0.5176953365909198


### ADABoost

In [15]:
adb =  AdaBoostClassifier()

In [16]:
adb.fit(X_train, y_train)

In [17]:
yhat = adb.predict(X_test)

In [18]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5523114355231143
Precision: 0.5797872340425532
Recall: 0.5093457943925234
F1 Score: 0.5422885572139304
ROC-AUC: 0.5541652829830638


### Decision Tree

In [19]:
dtc = DecisionTreeClassifier()

In [20]:
dtc.fit(X_train, y_train)

In [21]:
yhat = dtc.predict(X_test)

In [22]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5547445255474452
Precision: 0.5756097560975609
Recall: 0.5514018691588785
F1 Score: 0.5632458233890214
ROC-AUC: 0.5548887518383225


### Naive Bayes

In [23]:
nb = GaussianNB()

In [24]:
nb.fit(X_train, y_train)

In [25]:
yhat = nb.predict(X_test)

In [26]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5182481751824818
Precision: 0.5275862068965518
Recall: 0.7149532710280374
F1 Score: 0.6071428571428572
ROC-AUC: 0.5097608994734095


### MLP

In [27]:
mlp = MLPClassifier()

In [28]:
mlp.fit(X_train, y_train)

In [29]:
yhat = mlp.predict(X_test)

In [30]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.5206812652068127
Precision: 0.5841584158415841
Recall: 0.2757009345794392
F1 Score: 0.3746031746031746
ROC-AUC: 0.531251482518146


### XGBoost

In [31]:
xgb_cl = xgb.XGBClassifier()

In [32]:
xgb_cl.fit(X_train, y_train)

In [33]:
yhat = xgb_cl.predict(X_test)

In [34]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6082725060827251
Precision: 0.6292682926829268
Recall: 0.602803738317757
F1 Score: 0.6157517899761337
ROC-AUC: 0.60850846814365
