# Sleep health and lifestyle - Insomnia Symptomatology in Adolescents

Data obtained from [here](https://www.kaggle.com/datasets/uom190346a/sleep-health-and-lifestyle-dataset/code?datasetId=3321433&sortBy=voteCount)

In [None]:
# Importing libraries
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns
import plotly.express as px
%matplotlib inline

In [None]:
# Setting display options for better visibility
pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth', 400)

# Exploratory Data Analysis - EDA

In [None]:
# Loading the dataset
df = pd.read_csv("Sleep_health_and_lifestyle_dataset.csv")

In [None]:
# Displaying basic information about the dataset
df.info()

In [None]:
# Handling missing values in 'Sleep Disorder' column
df['Sleep Disorder'] = df['Sleep Disorder'].fillna('None')

In [None]:
# Exploring duplicate records
duplicate_count = df.duplicated().sum()
df = df.drop_duplicates()

## Descriptive statistics of the dataset

In [None]:
# Descriptive statistics of the dataset
df.describe()

In [None]:
# Exploring unique values in different columns
print("Unique values in 'Gender':", df['Gender'].unique())
print("Unique values in 'Occupation':", df['Occupation'].unique())
print("Unique values in 'BMI Category':", df['BMI Category'].unique())
print("Unique values in 'Blood Pressure':", df['Blood Pressure'].unique())
print("Unique values in 'Heart Rate':", df['Heart Rate'].unique())

In [None]:
# Data counts for categorical columns
gender_count = df['Gender'].value_counts().reset_index()
age_count = df['Age'].value_counts().reset_index()
occupation_count = df['Occupation'].value_counts().reset_index()
physical_activity_level_count = df['Physical Activity Level'].value_counts().reset_index()
quality_of_sleep_count = df['Quality of Sleep'].value_counts().reset_index()
sleep_duration_count = df['Sleep Duration'].value_counts().reset_index()
stress_level_count = df['Stress Level'].value_counts().reset_index()
bmi_category_count = df['BMI Category'].value_counts().reset_index()
blood_pressure_count = df['Blood Pressure'].value_counts().reset_index()
heart_rate_count = df['Heart Rate'].value_counts().reset_index()
daily_steps_count5 = df['Daily Steps'].value_counts().reset_index().sort_values(by='count', ascending=False).head(5)
sleep_disorder_count = df['Sleep Disorder'].value_counts().reset_index()

## Data Visualization

In [None]:
# Pie chart for gender distribution
fig = px.pie(gender_count, values='count', names='Gender', title='Gender Distribution')
fig.show()

In [None]:
# Bar chart for age distribution
fig = px.bar(age_count, x='Age', y='count', title='Age Distribution')
fig.show()

In [None]:
# Bar chart for physical activity level distribution
fig = px.bar(physical_activity_level_count, x='Physical Activity Level', y='count', title='Physical Activity Level Distribution')
fig.show()

In [None]:
# Bar chart for quality of sleep distribution
fig = px.bar(quality_of_sleep_count, x='Quality of Sleep', y='count', title='Quality of Sleep Distribution')
fig.show()

In [None]:
# Bar chart for sleep duration distribution
fig = px.bar(sleep_duration_count, x='Sleep Duration', y='count', title='Sleep Duration Distribution')
fig.show()

In [None]:
# Bar chart for occupation distribution
fig = px.bar(occupation_count, x='Occupation', y='count', title='Occupation Distribution')
fig.show()

In [None]:
# Pie chart for stress level distribution
fig = px.pie(stress_level_count, values='count', names='Stress Level', title='Stress Level Distribution')
fig.show()

In [None]:
# Stacked bar chart for stress level and occupation
sns.barplot(data=position_stress, x='Occupation', y='count', hue='Stress Level', alpha=0.8, width=0.8) 
plt.title('Stress Level Distribution in Each Occupation')
plt.xlabel('Occupation')
plt.ylabel('Count')
plt.xticks(rotation=90)
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))

In [None]:
# Pie chart for BMI category distribution
fig = px.pie(bmi_category_count, values='count', names='BMI Category', title='BMI Category Distribution')
fig.show()

In [None]:
# Bar chart for blood pressure distribution
fig = px.bar(blood_pressure_count, x="Blood Pressure", y="count", title="Blood Pressure Distribution")
fig.show()

In [None]:
# Bar chart for heart rate distribution
fig = px.bar(heart_rate_count, x="Heart Rate", y="count", title="Heart Rate Distribution")
fig.show()

In [None]:
# Bar chart for top 5 daily steps
fig = px.bar(daily_steps_count5, x='Daily Steps', y='count', title="Top 5 Daily Steps")
fig.show()

In [None]:
# Pie chart for sleep disorder distribution
fig = px.pie(sleep_disorder_count, values='count', names='Sleep Disorder', title="Sleep Disorder Distribution")
fig.show()

In [None]:
# Saving the cleaned dataset to a CSV file
df.to_csv("Cleaned_Data.csv")