### Import neccessary libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report,confusion_matrix,roc_auc_score,accuracy_score,f1_score,recall_score,precision_score
from imblearn.over_sampling import SMOTE
from collections import Counter

### Data preprocessing

In [2]:
df = pd.read_csv('anc dataset.csv')
dummies = pd.get_dummies(df.Presentation, prefix='Presentaion')
dfConcat = pd.concat([df,dummies],axis='columns')
dfFinal = dfConcat.drop(['Presentation'],axis='columns')

### Data spliting

In [3]:
X = dfFinal.drop('Complication',axis='columns')
y = dfFinal.Complication
X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=10, stratify=y)

### Model fitting and predicting

In [4]:
model = KNeighborsClassifier(n_neighbors=5)
model.fit(X_train,y_train)
y_predict = model.predict(X_test)

### Model performance measuring

In [5]:
print(classification_report(y_test, y_predict))
print(confusion_matrix(y_test, y_predict))

print("Accuracy: %.2f%%" % (accuracy_score(y_test,y_predict)*100))
print("AUC: %.2f%%" % (roc_auc_score(y_test,y_predict)*100))
print("Precision: %.2f%%" % (precision_score(y_test,y_predict)*100))
print("F1-Score: %.2f%%" % (f1_score(y_test, y_predict)*100))
print("Recall: %.2f%%" % (recall_score(y_test, y_predict)*100))

              precision    recall  f1-score   support

           0       0.90      0.94      0.92       504
           1       0.61      0.45      0.51        96

    accuracy                           0.86       600
   macro avg       0.75      0.70      0.72       600
weighted avg       0.85      0.86      0.86       600

[[476  28]
 [ 53  43]]
Accuracy: 86.50%
AUC: 69.62%
Precision: 60.56%
F1-Score: 51.50%
Recall: 44.79%


### Balancing training dataset via Synthetic Minority Oversampling Technique (SMOT)

In [6]:
smote = SMOTE()
X_train_smote, y_train_smote = smote.fit_sample(X_train,y_train)

print("Before SMOTE :" , Counter(y_train))
print("After SMOTE :" , Counter(y_train_smote))

Before SMOTE : Counter({0: 1176, 1: 224})
After SMOTE : Counter({0: 1176, 1: 1176})


### Model fitting and predicting after applying SMOT

In [7]:
model.fit(X_train_smote,y_train_smote)
y_predict = model.predict(X_test)

### Model performance measuring after applying SMOT

In [8]:
print(classification_report(y_test,y_predict))
print(confusion_matrix(y_test, y_predict))
print(accuracy_score(y_test,y_predict))

print("Accuracy: %.2f%%" % (accuracy_score(y_test,y_predict)*100))
print("AUC: %.2f%%" % (roc_auc_score(y_test,y_predict)*100))
print("Precision: %.2f%%" % (precision_score(y_test,y_predict)*100))
print("F1-Score: %.2f%%" % (f1_score(y_test, y_predict)*100))
print("Recall: %.2f%%" % (recall_score(y_test, y_predict)*100))

              precision    recall  f1-score   support

           0       0.97      0.87      0.92       504
           1       0.55      0.86      0.67        96

    accuracy                           0.86       600
   macro avg       0.76      0.86      0.79       600
weighted avg       0.90      0.86      0.88       600

[[436  68]
 [ 13  83]]
0.865
Accuracy: 86.50%
AUC: 86.48%
Precision: 54.97%
F1-Score: 67.21%
Recall: 86.46%
