<h1 style="color:blue">Fetal Health EDA 📊 + Classification 🔬</h1>

<h3 style="color:red"><i>If you like this notebook, don't forget to give it a vote!</i></h3>
<hr>

In [None]:
! pip install -q dabl

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import plotly.express as px
import plotly.graph_objs as go
import plotly.figure_factory as ff

import dabl
from pandas_profiling import ProfileReport

from colorama import Fore, Style

In [None]:
def cout(string: str, color=Fore.RED):
    """
    Saves some work 😅
    """
    print(color+string+Style.RESET_ALL)
    
def statistics(dataframe, column):
    cout(f"The Average value in {column} is: {dataframe[column].mean():.4f}", Fore.RED)
    cout(f"The Maximum value in {column} is: {dataframe[column].max()}", Fore.BLUE)
    cout(f"The Minimum value in {column} is: {dataframe[column].min()}", Fore.YELLOW)
    cout(f"The 25th Quantile of {column} is: {dataframe[column].quantile(0.25):.4f}", Fore.GREEN)
    cout(f"The 50th Quantile of {column} is: {dataframe[column].quantile(0.50):.4f}", Fore.CYAN)
    cout(f"The 75th Quantile of {column} is: {dataframe[column].quantile(0.75):.4f}", Fore.MAGENTA)

In [None]:
data = pd.read_csv("../input/fetal-health-classification/fetal_health.csv")
data.head()

<h2 style="color:aqua">Exploratory Data Analysis 💻</h2>

Let's start with Exploratory Data Analysis of the Data.
<hr>

<h3 style="color:seagreen">1. Dataset Profile Report</h3>

In [None]:
pr = ProfileReport(data)

In [None]:
pr.to_notebook_iframe()

<h3 style="color:seagreen">2. DABL Plot</h3>

In [None]:
dabl.plot(data, target_col='fetal_health')

<h3 style="color:seagreen">2. Baseline Fetal Heartrate</h3>

In [None]:
statistics(data, column='baseline value')

In [None]:
plt.style.use("classic")
sns.distplot(data['baseline value'])
plt.title(f"Baseline Fetal Heartrate [\u03BC : {data['baseline value'].mean():.2f} bpm | \u03C3 : {data['baseline value'].std():.2f} bpm]")
plt.xlabel("Heart Rate (in bpm)")
plt.ylabel("Count")
plt.show()

# Also the plotly figure
fig = ff.create_distplot(
    hist_data=[data['baseline value'].tolist()],
    group_labels=['baseline value'],
    colors=['#0B43EA'],
    show_hist=False,
    show_rug=False,
)

fig.layout.update({'title':"Baseline Fetal Heartrate"})

fig.show()

<h3 style="color:seagreen">3. Accelerations</h3>

In [None]:
statistics(data, column='accelerations')

In [None]:
plt.style.use("classic")
sns.distplot(data['accelerations'], color='magenta')
plt.title(f"Accelerations Per Second")
plt.xlabel("Accelerations")
plt.ylabel("Count")
plt.show()

# Also the plotly figure
fig = ff.create_distplot(
    hist_data=[data['accelerations'].tolist()],
    group_labels=['accelerations'],
    colors=['#E00DAB'],
    show_hist=False,
    show_rug=False,
)

fig.layout.update({'title':"Accelerations Per Second"})

fig.show()

<h3 style="color:seagreen">4. Fetal Movement</h3>

In [None]:
statistics(data, column='fetal_movement')

In [None]:
plt.style.use("classic")
sns.distplot(data['fetal_movement'], color='green')
plt.title(f"Fetal Movement")
plt.xlabel("Movements per second")
plt.ylabel("Count")
plt.show()

# Also the plotly figure
fig = ff.create_distplot(
    hist_data=[data['fetal_movement'].tolist()],
    group_labels=['fetal_movement'],
    colors=['#0BE047'],
    show_hist=False,
    show_rug=False,
)

fig.layout.update({'title':"Movements Per Second"})

fig.show()

<h3 style="color:seagreen">5. Uterine Contractions</h3>

In [None]:
statistics(data, column='uterine_contractions')

In [None]:
plt.style.use("classic")
sns.distplot(data['uterine_contractions'], color='red')
plt.title(f"Uterine Contractions")
plt.xlabel("Contractions per second")
plt.ylabel("Count")
plt.show()

# Also the plotly figure
fig = ff.create_distplot(
    hist_data=[data['uterine_contractions'].tolist()],
    group_labels=['uterine_contractions'],
    colors=['#FF001D'],
    show_hist=False,
    show_rug=False,
)

fig.layout.update({'title':"Uterine Contractions Per Second"})

fig.show()

<h3 style="color:seagreen">6. Abnormal Short-term Variability</h3>

In [None]:
statistics(data, column='abnormal_short_term_variability')

In [None]:
plt.style.use("classic")
sns.distplot(data['abnormal_short_term_variability'], color='orange')
plt.title(f"Percentage of Time with Abnormal Short Term Variability")
plt.xlabel("Percentage of Time")
plt.ylabel("Count")
plt.show()

# Also the plotly figure
fig = ff.create_distplot(
    hist_data=[data['abnormal_short_term_variability'].tolist()],
    group_labels=['abnormal_short_term_variability'],
    colors=['#FFB600'],
    show_hist=False,
    show_rug=False,
)

fig.layout.update({'title':"Percentage of Time with Abnormal Short Term Variability"})

fig.show()

<h3 style="color:seagreen">7. Fetal Health</h3>

In [None]:
statistics(data, column="fetal_health")

In [None]:
names = list(dict(data['fetal_health'].value_counts()).keys())
values = data['fetal_health'].value_counts().tolist()

fig = go.Bar(x = names,
            y = values,
            marker = dict(color = 'rgba(0, 255, 0, 0.5)',
                         line=dict(color='rgb(0,0,50)',width=1.5)),
            text = names)

layout = go.Layout()
fig = go.Figure(data = fig, layout = layout)
fig.update_layout(title_text='Fetal Health (Target Variable)')
fig.show()

In [None]:
vals = [len(data[data['fetal_health']==1.0]['fetal_health']), len(data[data['fetal_health']==2.0]['fetal_health']), len(data[data['fetal_health']==3.0]['fetal_health'])]
idx = ['Normal', 'Suspect', 'Pathological']
fig = px.pie(
    values=vals,
    names=idx,
    title='Fetal Health Pie Chart (Target Variable)',
    color_discrete_sequence=px.colors.sequential.Agsunset
)
fig.show()

<h2 style="color:aqua">Classification 🎮</h2>

<h3 style="color:green">[ADDING SOON...]</h3>

<hr>