In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score

In [2]:
dataset = pd.read_csv('drug_embedding.csv')
print(len(dataset))
print(dataset.head())

300
   pyrvinium-pamoate   A205804   EUK-134  daunorubicin  dexniguldipine  \
0          -0.006054  0.000506 -0.011725     -0.005724       -0.010652   
1           0.130309  0.068734 -0.032970      0.040210        0.007962   
2          -0.053154  0.063960  0.027820      0.110401        0.029102   
3           0.294405  0.209754  0.002412     -0.132192       -0.074386   
4           0.216226  0.015498 -0.169008      0.054968       -0.137887   

   beclomethasone      SNAP   AZD3463  amflutizole  pipotiazine  ...  \
0        0.018029 -0.000721  0.004899    -0.005324    -0.004234  ...   
1       -0.000337 -0.045450  0.119616     0.140661     0.008128  ...   
2        0.049825 -0.025751 -0.003509    -0.031793    -0.139935  ...   
3       -0.135147 -0.039456 -0.049063     0.168349    -0.136895  ...   
4       -0.007779  0.167150  0.142278     0.109497    -0.211754  ...   

   SUN-B-8155   AZD9668  LY310762   MK-1775  LY2603618   TP-0903  ibrolipim  \
0   -0.002826 -0.003448  0.010602 -0.02

In [3]:
X = dataset.iloc[:, 10].values.reshape(-1, 1)  # Features
Y = dataset.iloc[:, 8].values  # Labels
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, random_state=0, test_size=0.2)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


Y_train_categorical = pd.cut(Y_train, bins=5, labels=False)
Y_test_categorical = pd.cut(Y_test, bins=5, labels=False)

# k-NN classifier
classifier = KNeighborsClassifier(n_neighbors=15, p=2, metric='euclidean')
classifier.fit(X_train_scaled, Y_train_categorical)

y_pred_categorical = classifier.predict(X_test_scaled)

In [4]:
conf = confusion_matrix(Y_test_categorical, y_pred_categorical)
print('confusion matrix', conf)

confusion matrix [[ 0  1  7  4  0]
 [ 0  2 18  4  0]
 [ 0  0 12  3  0]
 [ 0  1  3  2  0]
 [ 0  0  3  0  0]]


In [5]:
f1 = f1_score(Y_test_categorical, y_pred_categorical, average='weighted') 
print("F1 Score:", f1)

F1 Score: 0.18164376458387346


In [6]:
accuracy = accuracy_score(Y_test_categorical, y_pred_categorical)
print("Accuracy Score:", accuracy)

Accuracy Score: 0.26666666666666666
