### Import neccessary libraries

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report,confusion_matrix,mean_absolute_error,roc_auc_score
from imblearn.over_sampling import SMOTE
from collections import Counter

### Data preprocessing

In [2]:
df = pd.read_csv('anc dataset.csv')
dummies = pd.get_dummies(df.Presentation, prefix='Presentaion')
dfConcat = pd.concat([df,dummies],axis='columns')
dfFinal = dfConcat.drop(['Presentation'],axis='columns')

### Data spliting

In [3]:
X = dfFinal.drop('Complication',axis='columns')
y = dfFinal.Complication

X_train,X_test, y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=10, stratify=y)

### Model fitting and predicting

In [4]:
model = LogisticRegression(solver='lbfgs', max_iter=1000)
model.fit(X_train,y_train)
y_predict = model.predict(X_test)

### Model performance measuring

In [6]:
#print(classification_report(y_test, y_predict))
print(confusion_matrix(y_test, y_predict))
mae = mean_absolute_error(y_test, y_predict)
print('MAE: %.3f' % mae)
print(roc_auc_score(y_test, y_predict))

[[496   8]
 [ 82  14]]
MAE: 0.150
0.5649801587301587


In [7]:
#print(model.coef_)
#fdic = dict(zip(X.columns, list(model.coef_)))
#fdic
#fdicDF = pd.DataFrame(fdic, index=[0])
#fdicDF
#fdicDF.T.plot.bar(title="Feature Importance", legend=False)

### Balancing training dataset via Synthetic Minority Oversampling Technique (SMOT)

In [8]:
smote = SMOTE()
X_train_smote, y_train_smote = smote.fit_sample(X_train,y_train)
print("Before SMOTE :" , Counter(y_train))
print("After SMOTE :" , Counter(y_train_smote))

Before SMOTE : Counter({0: 1176, 1: 224})
After SMOTE : Counter({0: 1176, 1: 1176})


### Model fitting and predicting after applying SMOT

In [9]:
model.fit(X_train_smote,y_train_smote)
y_predict = model.predict(X_test)

### Model performance measuring after applying SMOT

In [10]:
print(confusion_matrix(y_test, y_predict))
mae = mean_absolute_error(y_test, y_predict)
print('MAE: %.3f' % mae)
print("ROC AUC: %.3f%%" % roc_auc_score(y_test, y_predict))

[[346 158]
 [ 37  59]]
MAE: 0.325
ROC AUC: 0.651%
