In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score


In [3]:
dataset = pd.read_csv('drug_embedding.csv')
print(len(dataset))
print(dataset.head())

300
   Monoisopropylphosphorylserine   Heparin  N,N-dimethylglycine  \
0                       0.019025 -0.030460            -0.003457   
1                      -0.098076 -0.021950            -0.064426   
2                       0.167589  0.040664             0.037584   
3                       0.029540 -0.042136             0.059411   
4                       0.108366  0.182456            -0.016930   

   5-(2-METHOXYPHENYL)-2-FUROIC ACID  \
0                           0.007247   
1                          -0.138912   
2                          -0.122577   
3                           0.178150   
4                          -0.087986   

   N~4~-(3-methyl-1H-indazol-6-yl)-N~2~-(3,4,5-trimethoxyphenyl)pyrimidine-2,4-diamine  \
0                                           0.007044                                     
1                                          -0.016831                                     
2                                          -0.136665                              

In [4]:
X = dataset.iloc[:, 10].values.reshape(-1, 1)  # Features
Y = dataset.iloc[:, 8].values  # Labels
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0, test_size=0.2)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


Y_train_categorical = pd.cut(Y_train, bins=5, labels=False)
Y_test_categorical = pd.cut(Y_test, bins=5, labels=False)

# k-NN classifier
classifier = KNeighborsClassifier(n_neighbors=15, p=2, metric='euclidean')
classifier.fit(X_train_scaled, Y_train_categorical)

y_pred_categorical = classifier.predict(X_test_scaled)

In [4]:
conf = confusion_matrix(Y_test_categorical, y_pred_categorical)
print('confusion matrix', conf)

confusion matrix [[ 0  0  1  3  0]
 [ 0  0  6  4  0]
 [ 0  0 15  8  0]
 [ 0  0  8  6  0]
 [ 0  0  4  5  0]]


In [5]:
f1 = f1_score(Y_test_categorical, y_pred_categorical, average='weighted') 
print("F1 Score:", f1)

F1 Score: 0.27175438596491225


In [6]:
accuracy = accuracy_score(Y_test_categorical, y_pred_categorical)
print("Accuracy Score:", accuracy)

Accuracy Score: 0.35
