In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sleep_df = pd.read_csv("OneDrive/Desktop/Capstone_week9/sleepDay_merged.csv")

In [None]:
sleep_df.head(5)

In [None]:
sleep_df.tail(5)

In [None]:
sleep_df.describe()

In [None]:
sleep_df.info()

In [None]:
sleep_df.isna().sum()

In [None]:
sleep_df['SleepDay'] = pd.to_datetime(sleep_df['SleepDay'])

In [None]:
sleep_df['IdleTime'] = sleep_df['TotalTimeInBed'] - sleep_df['TotalMinutesAsleep']

In [None]:
sleep_df['SleepDuration'] = sleep_df['TotalMinutesAsleep'] / 60

In [None]:
sleep_df.sample(5)

In [None]:
fig,axes = plt.subplots(1,3,figsize=(18,6))

sns.boxplot(x='TotalSleepRecords', data=sleep_df, ax=axes[0])
axes[0].set_title('Boxplot of Sleep Records')
axes[0].set_xlabel('Sleep Records')

sns.violinplot(x='TotalMinutesAsleep', data=sleep_df, ax=axes[1])
axes[1].set_title('Boxplot of Total Minutes of Sleep')
axes[1].set_xlabel('Total Minutes')

sns.boxplot(x='TotalTimeInBed', data=sleep_df, ax=axes[2])
axes[2].set_title('Boxplot of Total Time in Bed')
axes[2].set_xlabel('Total MInutes')

plt.tight_layout()
plt.show()

In [None]:
corr_df = sleep_df[['TotalSleepRecords','TotalMinutesAsleep','TotalTimeInBed','IdleTime','SleepDuration']]
corr = corr_df.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f", annot_kws={"size": 10})
plt.title('Correlation Heatmap')
plt.show()

In [None]:
avg_sleep_duration = sleep_df.groupby('Id')['SleepDuration'].mean()

# Visualize the distribution of sleep duration across all users
plt.figure(figsize=(10, 6))
sns.histplot(avg_sleep_duration, bins=20, kde=True)
plt.title('Distribution of Average Sleep Duration')
plt.xlabel('Average Sleep Duration (hours)')
plt.ylabel('Frequency')
plt.show()

In [None]:
avg_sleep_duration_per_day = sleep_df.groupby('SleepDay')['SleepDuration'].mean() / 60

# Plot the average sleep duration over time
plt.figure(figsize=(12, 6))
plt.plot(avg_sleep_duration_per_day.index, avg_sleep_duration_per_day.values, marker='o', linestyle='-')
plt.title('Average Sleep Duration Over Time')
plt.xlabel('Date')
plt.ylabel('Average Sleep Duration (hours)')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
sleep_df['DayOfWeek'] = sleep_df['SleepDay'].dt.day_name()

# Calculate sleep metrics for each day of the week
sleep_metrics_per_day = sleep_df.groupby(['Id', 'DayOfWeek']).agg({'TotalMinutesAsleep': 'mean', 'TotalTimeInBed': 'mean'})

# Plot sleep metrics for each day of the week
plt.figure(figsize=(12, 6))

# Loop through each user
for user_id, user_data in sleep_metrics_per_day.groupby(level=0):
    plt.plot(user_data.index.get_level_values('DayOfWeek'), user_data['TotalMinutesAsleep'], label=f'User {user_id}')

plt.title('Average Sleep Duration by Day of the Week')
plt.xlabel('Day of the Week')
plt.ylabel('Average Sleep Duration (minutes)')
plt.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
total_sleep_per_user = sleep_df.groupby('Id')['TotalMinutesAsleep'].sum()

# Select top N users with the highest total sleep duration
N = 5
top_users = total_sleep_per_user.nlargest(N)

# Filter sleep data for the selected users
selected_users_data = sleep_df[sleep_df['Id'].isin(top_users.index)]

# Plot sleep data for selected users
num_rows = (N + 1) // 2
fig, axes = plt.subplots(nrows=num_rows, ncols=2, figsize=(15, 6 * num_rows))

# Plot sleep data for each user
for i, (user_id, user_data) in enumerate(selected_users_data.groupby('Id')):
    row = i // 2
    col = i % 2
    ax = axes[row, col]
    ax.plot(user_data['SleepDay'], user_data['TotalMinutesAsleep'])
    ax.set_xlabel('Date')
    ax.set_ylabel('Total Minutes Asleep')
    ax.set_title(f'Sleep Duration for User {user_id}')
    ax.tick_params(axis='x', rotation=45)
    ax.grid(True)

# Hide empty subplots if N is odd
if N % 2 != 0:
    axes[-1, -1].axis('off')

plt.tight_layout()
plt.show()