# Medical Insurance Cost Analysis

This notebook is dedicated to exploring the dataset, visualizing distributions, and understanding relationships between features.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set the visualisation style
sns.set(style='whitegrid')

In [2]:
# Load the dataset
data_path = '../data/processed/sleep_cleaned.csv'
df = pd.read_csv(data_path)

# Display the first few rows of the dataframe
df.head()

In [3]:
# Summary statistics of the dataset
df.describe()

In [4]:
# Visualizing the distribution of Sleep Hours
plt.figure(figsize=(10, 6))
sns.histplot(df['Sleep_Hours'], bins=10, kde=True)
plt.title('Distribution of Sleep Hours')
plt.xlabel('Sleep Hours')
plt.ylabel('Frequency')
plt.show()

In [5]:
# Visualizing the relationship between Sleep Quality and Stress Level
plt.figure(figsize=(10, 6))
sns.boxplot(x='Stress_Level', y='Sleep_Quality', data=df)
plt.title('Sleep Quality vs Stress Level')
plt.xlabel('Stress Level')
plt.ylabel('Sleep Quality')
plt.show()

In [6]:
# Correlation heatmap
plt.figure(figsize=(12, 8))
correlation = df.corr()
sns.heatmap(correlation, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap')
plt.show()