#### Context
Reduction of child mortality is reflected in several of the United Nations' Sustainable Development Goals and is a key indicator of human progress.
The UN expects that by 2030, countries end preventable deaths of newborns and children under 5 years of age, with all countries aiming to reduce under‑5 mortality to at least as low as 25 per 1,000 live births.

Parallel to notion of child mortality is of course maternal mortality, which accounts for 295 000 deaths during and following pregnancy and childbirth (as of 2017). The vast majority of these deaths (94%) occurred in low-resource settings, and most could have been prevented.

In light of what was mentioned above, Cardiotocograms (CTGs) are a simple and cost accessible option to assess fetal health, allowing healthcare professionals to take action in order to prevent child and maternal mortality. The equipment itself works by sending ultrasound pulses and reading its response, thus shedding light on fetal heart rate (FHR), fetal movements, uterine contractions and more.

#### Data
This dataset contains 2126 records of features extracted from Cardiotocogram exams, which were then classified by three expert obstetritians into 3 classes:

- Normal
- Suspect
- Pathological


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('../input/fetal-health-classification/fetal_health.csv')
df.head()

In [None]:
df.shape

In [None]:
df.describe().T #checking the measures of central tendency and dispersion

In [None]:
df.info() #checking the "dtype" and "missing values"

In [None]:
df.isna().sum().sum() #there are no missing values

### Exploratory Data Analysis

In [None]:
plt.figure(figsize=(18,5))
plt.subplot(1,2,1)
sns.countplot(x=df['fetal_health'], palette=['#845ec2','#ec4646','#00af91'])
plt.subplot(1,2,2)
df['fetal_health'].value_counts().plot(kind='pie', autopct='%.2f%%', explode=[0,0.1,0.1], 
                                       startangle=90, colors=['#845ec2','#ec4646','#00af91'],
                                      labels = ['Normal','Suspect','Pathological'])
plt.suptitle('Distribution of the target variable')
plt.show()

In [None]:
plt.figure(figsize=(18,21))
for i,col in enumerate(df.columns[:-1]):
    plt.subplot(7,3,i+1)
    sns.histplot(x=df[col], color='#75cfb8')
plt.suptitle('Distribution of Independent Variables', size=16, y=1.01)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(18,21))
for i,col in enumerate(df.columns[:-1]):
    plt.subplot(7,3,i+1)
    sns.boxplot(x=df[col], color='#75cfb8')
plt.suptitle('Outliers in Independent Variables', size=16, y=1.01)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(18,21))
for i,col in enumerate(df.columns[:-1]):
    plt.subplot(7,3,i+1)
    sns.kdeplot(x=df[col],hue=df['fetal_health'])
plt.suptitle('Distribution of Independent Variables w.r.t. Dependent Variable', size=16, y=1.01)
plt.tight_layout()
plt.show()

In [None]:
plt.figure(figsize=(15,10))
mask = np.triu(np.ones_like(df.corr(), dtype=bool))
sns.heatmap(df.corr(), mask=mask, cmap='coolwarm')
plt.show()

### Decision Tree

In [None]:
y = df['fetal_health']
X = df.drop('fetal_health',1)

In [None]:
import statsmodels.api as sm
Xc = sm.add_constant(X)
model = sm.MNLogit(y,Xc)
result = model.fit()
print(result.summary())

In [None]:
from sklearn.tree import DecisionTreeClassifier
dtree = DecisionTreeClassifier(max_depth=3)
dtree.fit(X,y)

In [None]:
from IPython.display import Image
from sklearn.tree import export_graphviz
import pydotplus

In [None]:
dot_data = export_graphviz(dtree, feature_names=X.columns)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, f1_score
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
#decision tree
dtree = DecisionTreeClassifier()
dtree.fit(X_train,y_train)
y_train_pred = dtree.predict(X_train)

y_test_pred = dtree.predict(X_test)

print(f'Confusion_matrix - Train: \n{confusion_matrix(y_train,y_train_pred)}')
print(f'Accuracy Score - Train --> {accuracy_score(y_train,y_train_pred)}')
print(f1_score(y_train, y_train_pred, average='weighted'))
print('----------------------------------')
print(f'Confusion_matrix - Test: \n{confusion_matrix(y_test,y_test_pred)}')
print(f'Accuracy Score - Test --> {accuracy_score(y_test,y_test_pred)}')
print(f1_score(y_test, y_test_pred, average='weighted'))

In [None]:
y.value_counts()

In [None]:
1655/295, 1655/176

In [None]:
#Random Forest
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(class_weight='balanced', max_depth=10)
rf.fit(X_train,y_train)
y_train_pred = rf.predict(X_train)
y_test_pred = rf.predict(X_test)

print(f'Confusion_matrix - Train: \n{confusion_matrix(y_train,y_train_pred)}')
print(f'Accuracy Score - Train --> {accuracy_score(y_train,y_train_pred)}')
print(f1_score(y_train, y_train_pred, average='weighted'))
print('----------------------------------')
print(f'Confusion_matrix - Test: \n{confusion_matrix(y_test,y_test_pred)}')
print(f'Accuracy Score - Test --> {accuracy_score(y_test,y_test_pred)}')
print(f1_score(y_test, y_test_pred, average='weighted'))

In [None]:
#Extra Trees
from sklearn.ensemble import ExtraTreesClassifier
et = ExtraTreesClassifier()
et.fit(X_train,y_train)
y_train_pred = et.predict(X_train)
y_test_pred = et.predict(X_test)

print(f'Confusion_matrix - Train: \n{confusion_matrix(y_train,y_train_pred)}')
print(f'Accuracy Score - Train --> {accuracy_score(y_train,y_train_pred)}')
print(f1_score(y_train, y_train_pred, average='weighted'))
print('----------------------------------')
print(f'Confusion_matrix - Test: \n{confusion_matrix(y_test,y_test_pred)}')
print(f'Accuracy Score - Test --> {accuracy_score(y_test,y_test_pred)}')
print(f1_score(y_test, y_test_pred, average='weighted'))

In [None]:
from xgboost import XGBClassifier
xb = XGBClassifier()
xb.fit(X_train,y_train)
y_train_pred = xb.predict(X_train)
y_test_pred = xb.predict(X_test)

print(f'Confusion_matrix - Train: \n{confusion_matrix(y_train,y_train_pred)}')
print(f'Accuracy Score - Train --> {accuracy_score(y_train,y_train_pred)}')
print(f1_score(y_train, y_train_pred, average='weighted'))
print('----------------------------------')
print(f'Confusion_matrix - Test: \n{confusion_matrix(y_test,y_test_pred)}')
print(f'Accuracy Score - Test --> {accuracy_score(y_test,y_test_pred)}')
print(f1_score(y_test, y_test_pred, average='weighted'))

In [None]:
from sklearn.ensemble import GradientBoostingClassifier
gb = ExtraTreesClassifier()
gb.fit(X_train,y_train)
y_train_pred = gb.predict(X_train)
y_test_pred = gb.predict(X_test)

print(f'Confusion_matrix - Train: \n{confusion_matrix(y_train,y_train_pred)}')
print(f'Accuracy Score - Train --> {accuracy_score(y_train,y_train_pred)}')
print(f1_score(y_train, y_train_pred, average='weighted'))
print('----------------------------------')
print(f'Confusion_matrix - Test: \n{confusion_matrix(y_test,y_test_pred)}')
print(f'Accuracy Score - Test --> {accuracy_score(y_test,y_test_pred)}')
print(f1_score(y_test, y_test_pred, average='weighted'))

In [None]:
from sklearn.svm import SVC
svc = SVC()
svc.fit(X_train,y_train)
y_train_pred = svc.predict(X_train)
y_test_pred = svc.predict(X_test)

print(f'Confusion_matrix - Train: \n{confusion_matrix(y_train,y_train_pred)}')
print(f'Accuracy Score - Train --> {accuracy_score(y_train,y_train_pred)}')
print(f1_score(y_train, y_train_pred, average='weighted'))
print('----------------------------------')
print(f'Confusion_matrix - Test: \n{confusion_matrix(y_test,y_test_pred)}')
print(f'Accuracy Score - Test --> {accuracy_score(y_test,y_test_pred)}')
print(f1_score(y_test, y_test_pred, average='weighted'))