### Import neccessary libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report,confusion_matrix,roc_auc_score,accuracy_score,f1_score,recall_score,precision_score
from imblearn.over_sampling import SMOTE
from collections import Counter

### Data preprocessing

In [2]:
df = pd.read_csv('anc dataset.csv')
dummies = pd.get_dummies(df.Presentation, prefix='Presentaion')
dfConcat = pd.concat([df,dummies],axis='columns')
dfFinal = dfConcat.drop(['Presentation'],axis='columns')

### Data spliting

In [3]:
X = dfFinal.drop('Complication',axis='columns')
y = dfFinal.Complication

X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=10, stratify=y)

### Model fitting and predicting

In [4]:
model = RandomForestClassifier()
model.fit(X_train,y_train)
y_predict = model.predict(X_test)

In [5]:
# print(model.feature_importances_)
# fdic = dict(zip(X.columns, list(model.feature_importances_)))
# fdicDF = pd.DataFrame(fdic, index=[0])
# fdicDF.T.plot.bar(title="Feature Importance", legend=False)

### Model performance measuring

In [6]:
print(classification_report(y_test, y_predict))
print(confusion_matrix(y_test, y_predict))
print("Accuracy: %.2f%%" % (accuracy_score(y_test,y_predict)*100))
print("ROC AUC: %.2f%%" % (roc_auc_score(y_test,y_predict)*100))
print("Precision: %.2f%%" % (precision_score(y_test,y_predict)*100))
print("F1-Score: %.2f%%" % (f1_score(y_test, y_predict)*100))
print("Recall: %.2f%%" % (recall_score(y_test, y_predict)*100))

              precision    recall  f1-score   support

           0       0.97      0.98      0.97       504
           1       0.88      0.84      0.86        96

    accuracy                           0.96       600
   macro avg       0.93      0.91      0.92       600
weighted avg       0.96      0.96      0.96       600

[[493  11]
 [ 15  81]]
Accuracy: 95.67%
ROC AUC: 91.10%
Precision: 88.04%
F1-Score: 86.17%
Recall: 84.38%


### Balancing training dataset via Synthetic Minority Oversampling Technique (SMOT)

In [7]:
smote = SMOTE()
X_train_smote, y_train_smote = smote.fit_sample(X_train,y_train)

print("Before SMOTE :" , Counter(y_train))
print("After SMOTE :" , Counter(y_train_smote))

Before SMOTE : Counter({0: 1176, 1: 224})
After SMOTE : Counter({0: 1176, 1: 1176})


### Model fitting and predicting after applying SMOT

In [8]:
model.fit(X_train_smote,y_train_smote)
y_predict = model.predict(X_test)

### Model performance measuring after applying SMOT

In [9]:
print(classification_report(y_test, y_predict))
print(confusion_matrix(y_test, y_predict))
print("Accuracy: %.2f%%" % (accuracy_score(y_test,y_predict)*100))
print("AUC: %.2f%%" % (roc_auc_score(y_test,y_predict)*100))
print("Precision: %.2f%%" % (precision_score(y_test,y_predict)*100))
print("F1-Score: %.2f%%" % (f1_score(y_test, y_predict)*100))
print("Recall: %.2f%%" % (recall_score(y_test, y_predict)*100))

              precision    recall  f1-score   support

           0       0.99      0.96      0.98       504
           1       0.83      0.95      0.88        96

    accuracy                           0.96       600
   macro avg       0.91      0.96      0.93       600
weighted avg       0.96      0.96      0.96       600

[[485  19]
 [  5  91]]
Accuracy: 96.00%
AUC: 95.51%
Precision: 82.73%
F1-Score: 88.35%
Recall: 94.79%
