In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
import tensorflow as tf

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn import metrics

import warnings
warnings.filterwarnings('ignore')

ModuleNotFoundError: No module named 'tensorflow'

In [10]:
peripartum_df = pd.read_csv("../Dataset/peripartum_processed.csv")

In [11]:
x = peripartum_df.drop(['RiskLevel'], axis=1)
y = peripartum_df['RiskLevel']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=5, stratify=y)

### Deafult model configuration

In [12]:
model = DecisionTreeClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.79      0.83       102
           1       0.73      0.82      0.78        84
           2       0.88      0.87      0.87        68

    accuracy                           0.82       254
   macro avg       0.83      0.83      0.83       254
weighted avg       0.83      0.82      0.82       254



In [13]:
model = XGBClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.88      0.83      0.85       102
           1       0.77      0.81      0.79        84
           2       0.86      0.87      0.86        68

    accuracy                           0.83       254
   macro avg       0.83      0.84      0.84       254
weighted avg       0.84      0.83      0.84       254



In [14]:
model = RandomForestClassifier()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.91      0.83      0.87       102
           1       0.78      0.86      0.82        84
           2       0.88      0.90      0.89        68

    accuracy                           0.86       254
   macro avg       0.86      0.86      0.86       254
weighted avg       0.86      0.86      0.86       254



### Parameter tuning and cross validation

In [None]:
features = peripartum_df.drop(['RiskLevel'], axis=1)
target = peripartum_df['RiskLevel']

In [None]:
skf = StratifiedKFold(n_splits=5)
skf.get_n_splits(features, target)


for train_index, val_index in skf.split(features, target.astype("category")):
    X_train, X_val = features.iloc[train_index], features.iloc[val_index]
    y_train, y_val = target[train_index], target[val_index]
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    
    model = XGBClassifier(n_estimators=100, max_depth=7, eta=0.1, subsample=0.3, colsample_bytree=0.4,objective="multi:softmax",verbosity=0, use_label_encoder=False)
    model.fit(X_train_scaled, y_train)
    y_val_hat = model.predict(X_val_scaled)
    y_prob = model.predict_proba(X_val_scaled)
    print("Accuracy:")
    print(metrics.accuracy_score(tf.keras.utils.to_categorical(y_val),tf.keras.utils.to_categorical(np.argmax(y_prob,axis=1))))
    print("Confusion matrix:")
    sns.heatmap(metrics.confusion_matrix(np.asarray(y_val, dtype=int),np.argmax(y_prob,axis=1)), annot=True)
    plt.show()