In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from rdkit import Chem
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
from karateclub import Graph2Vec
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
import xgboost as xgb
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings('ignore')

# Mol to Graph

In [2]:
BBBP = pd.read_csv ("C://Users/Soumyajit/Downloads/datasets/moleculenet/BBBP/processed/BBBP_TFMO_resampled.csv")
BBBP.shape

(3134, 129)

In [3]:
BBBP.columns

Index(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
       ...
       '119', '120', '121', '122', '123', '124', '125', '126', '127', 'Class'],
      dtype='object', length=129)

In [4]:
print(BBBP)

             0         1         2         3         4         5         6  \
0     0.076421 -0.011295 -0.134177  0.118973 -0.089925  0.012127  0.006361   
1     0.080794 -0.015179 -0.133254  0.113524 -0.100304  0.010671  0.018901   
2     0.086102 -0.006576 -0.132386  0.117592 -0.099823  0.016245  0.016412   
3     0.076138 -0.005036 -0.138811  0.113744 -0.107843  0.010975  0.011835   
4     0.081171 -0.002656 -0.143518  0.121120 -0.108073  0.010528  0.015266   
...        ...       ...       ...       ...       ...       ...       ...   
3129  0.112147 -0.014710 -0.172296  0.162636 -0.133153  0.005629  0.013475   
3130  0.063028 -0.011622 -0.111879  0.100560 -0.083496  0.008192  0.009518   
3131  0.119393 -0.021270 -0.207109  0.175513 -0.162405  0.007976  0.013992   
3132  0.070222 -0.007029 -0.119129  0.110409 -0.091168  0.001711  0.003893   
3133  0.108925 -0.006092 -0.171895  0.150003 -0.128561  0.014424  0.007455   

             7         8         9  ...       119       120    

# Performance Calculation

In [5]:
x = BBBP.drop(['Class'], axis=1)
y = BBBP['Class']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)

### KNN

In [7]:
knn = KNeighborsClassifier()

In [8]:
knn.fit(X_train, y_train)

In [9]:
yhat = knn.predict(X_test)

In [10]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6836734693877551
Precision: 0.8839285714285714
Recall: 0.4714285714285714
F1 Score: 0.6149068322981367
ROC-AUC: 0.7000000000000001


### SVM

In [11]:
svm = svm.SVC(kernel='rbf')

In [12]:
svm.fit(X_train, y_train)

In [13]:
yhat = svm.predict(X_test)

In [14]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6760204081632653
Precision: 0.8705357142857143
Recall: 0.4642857142857143
F1 Score: 0.6055900621118012
ROC-AUC: 0.6923076923076924


### ADABoost

In [15]:
adb =  AdaBoostClassifier()

In [16]:
adb.fit(X_train, y_train)

In [17]:
yhat = adb.predict(X_test)

In [18]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.764030612244898
Precision: 0.8347578347578347
Recall: 0.6976190476190476
F1 Score: 0.7600518806744486
ROC-AUC: 0.7691391941391941


### Decision Tree

In [19]:
dtc = DecisionTreeClassifier()

In [20]:
dtc.fit(X_train, y_train)

In [21]:
yhat = dtc.predict(X_test)

In [22]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7742346938775511
Precision: 0.8292682926829268
Recall: 0.7285714285714285
F1 Score: 0.7756653992395437
ROC-AUC: 0.7777472527472528


### Naive Bayes

In [23]:
nb = GaussianNB()

In [24]:
nb.fit(X_train, y_train)

In [25]:
yhat = nb.predict(X_test)

In [26]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.6862244897959183
Precision: 0.7919463087248322
Recall: 0.5619047619047619
F1 Score: 0.6573816155988857
ROC-AUC: 0.6957875457875458


### MLP

In [27]:
mlp = MLPClassifier()

In [28]:
mlp.fit(X_train, y_train)

In [29]:
yhat = mlp.predict(X_test)

In [30]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.7742346938775511
Precision: 0.8626865671641791
Recall: 0.6880952380952381
F1 Score: 0.7655629139072848
ROC-AUC: 0.7808608058608059


### XGBoost

In [31]:
xgb_cl = xgb.XGBClassifier()

In [32]:
xgb_cl.fit(X_train, y_train)

In [33]:
yhat = xgb_cl.predict(X_test)

In [34]:
acc = accuracy_score(y_test, yhat)
print("Accuracy:", acc)
prec = precision_score(y_test, yhat)
print("Precision:", prec)
rec = recall_score(y_test, yhat)
print("Recall:", rec)
f1 = f1_score(y_test, yhat)
print("F1 Score:", f1)
roc_auc = roc_auc_score(y_test, yhat)
print("ROC-AUC:", roc_auc)

Accuracy: 0.8520408163265306
Precision: 0.88
Recall: 0.8380952380952381
F1 Score: 0.8585365853658538
ROC-AUC: 0.8531135531135532
