# Sleep and Step Data Analysis

This notebook consolidates the data processing and visualization steps for analyzing the relationship between sleep duration and daily step counts.

### Dual Y-Axis Time Series Chart

In [None]:

import pandas as pd
import matplotlib.pyplot as plt

# Load the merged dataset
file_path = 'merged_sleep_step_data.csv'  # Replace with your file name
data = pd.read_csv(file_path)

# Convert the date column to datetime format
data['date'] = pd.to_datetime(data['date'])

# Create the plot
fig, ax1 = plt.subplots(figsize=(12, 6))

# Plot sleep duration on the left y-axis
ax1.set_xlabel('Date')
ax1.set_ylabel('Sleep Duration (hours)', color='blue')
ax1.plot(data['date'], data['duration_hours'], label='Sleep Duration (hours)', color='blue')
ax1.tick_params(axis='y', labelcolor='blue')

# Create a twin axis for the step count
ax2 = ax1.twinx()
ax2.set_ylabel('Step Count', color='green')
ax2.plot(data['date'], data['steps'], label='Step Count', color='green', alpha=0.7)
ax2.tick_params(axis='y', labelcolor='green')

# Add a title
plt.title('Time Series of Sleep Duration and Step Count')

# Add a grid and show the plot
fig.tight_layout()
plt.grid()
plt.show()


![Dual Y-Axis Time Series Chart](assets/Dual_Y-Axis_Time_Series_Chart.png)

### Hexbin Plot

In [None]:

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the CSV file
file_path = 'merged_sleep_step_data.csv'  # Replace with your file name
data = pd.read_csv(file_path)

plt.figure(figsize=(12, 6))
plt.hexbin(data['duration_hours'], data['steps'], gridsize=30, cmap='Blues', mincnt=1)
plt.colorbar(label='Count')
plt.title('Hexbin Plot of Sleep Duration vs Step Count', fontsize=14)
plt.xlabel('Sleep Duration (hours)', fontsize=12)
plt.ylabel('Step Count', fontsize=12)
plt.show()


![Hexbin Plot](assets/Hexbin_Plot.png)

### Histogram

In [None]:

import pandas as pd
import matplotlib.pyplot as plt

# Load the CSV file
file_path = 'merged_sleep_step_data.csv'  # Replace with your file name
data = pd.read_csv(file_path)

# Create histograms for Sleep Duration and Step Count separately
plt.figure(figsize=(12, 6))

# Histogram for Sleep Duration
plt.subplot(1, 2, 1)  # 1 row, 2 columns, position 1
plt.hist(data['duration_hours'], bins=20, color='skyblue', edgecolor='black', alpha=0.7)
plt.title('Histogram of Sleep Duration')
plt.xlabel('Hours')
plt.ylabel('Frequency')

# Histogram for Step Count
plt.subplot(1, 2, 2)  # 1 row, 2 columns, position 2
plt.hist(data['steps'], bins=20, color='lightgreen', edgecolor='black', alpha=0.7)
plt.title('Histogram of Step Count')
plt.xlabel('Steps')
plt.ylabel('Frequency')

# Display the plots
plt.tight_layout()
plt.show()


![Histogram](assets/Histogram_Separately.png)

### Joint Plot

In [None]:

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the CSV file
file_path = 'merged_sleep_step_data.csv'  # Replace with your file name
data = pd.read_csv(file_path)

# Set Seaborn style
sns.set(style="whitegrid", palette="muted")

# Create a joint plot
joint_plot = sns.jointplot(
    x='duration_hours',
    y='steps',
    data=data,
    kind='scatter',
    color='blue',
    marginal_kws=dict(bins=30, fill=True)
)

# Set axis labels
joint_plot.set_axis_labels("Sleep Duration (hours)", "Step Count", fontsize=12)

# Set a title for the plot
plt.subplots_adjust(top=0.9)
joint_plot.fig.suptitle("Joint Plot of Sleep Duration vs Step Count", fontsize=14)

plt.show()


![Joint Plot](assets/Joint_Plot.png)

### Violin Plot

In [None]:

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the CSV file
file_path = 'merged_sleep_step_data.csv'  # Replace with your file name
data = pd.read_csv(file_path)

# Set up the figure
plt.figure(figsize=(12, 6))

# Create the violin plot for Sleep Duration
plt.subplot(1, 2, 1)  # 1 row, 2 columns, position 1
sns.violinplot(y=data['duration_hours'], color='skyblue')
plt.title('Violin Plot of Sleep Duration')
plt.ylabel('Sleep Duration (hours)')
plt.grid()

# Create the violin plot for Step Count
plt.subplot(1, 2, 2)  # 1 row, 2 columns, position 2
sns.violinplot(y=data['steps'], color='lightgreen')
plt.title('Violin Plot of Step Count')
plt.ylabel('Step Count')
plt.grid()

# Display the plots
plt.tight_layout()
plt.show()


![Violin Plot](assets/Violin_Plot_Separately.png)

### Correlation Matrix

In [None]:

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the CSV file
file_path = 'merged_sleep_step_data.csv'  # Replace with your file name
data = pd.read_csv(file_path)

# Compute the correlation matrix
correlation_matrix = data[['duration_hours', 'steps']].corr()

# Plot the correlation matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix')
plt.show()


![Correlation Matrix](assets/Corelation_Matrix.png)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv('merged_sleep_step_data.csv')  # Replace with your file name

plt.figure(figsize=(10, 6))
plt.subplot(1, 2, 1)
plt.boxplot(data['duration_hours'], patch_artist=True, boxprops=dict(facecolor="skyblue"))
plt.title('Boxplot of Sleep Duration')
plt.ylabel('Hours')

plt.subplot(1, 2, 2)
plt.boxplot(data['steps'], patch_artist=True, boxprops=dict(facecolor="lightgreen"))
plt.title('Boxplot of Step Count')
plt.ylabel('Steps')

plt.tight_layout()
plt.show()


![Boxplot](assets/Boxplot_Separately.png)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Load the merged dataset
file_path = 'merged_sleep_step_data.csv'  # Replace with your file name
data = pd.read_csv(file_path)

# Convert the date column to datetime format
data['date'] = pd.to_datetime(data['date'])

plt.figure(figsize=(8, 6))
plt.scatter(data['duration_hours'], data['steps'], alpha=0.6, color='purple')
plt.title('Correlation between Sleep Duration and Step Count')
plt.xlabel('Sleep Duration (hours)')
plt.ylabel('Step Count')
plt.grid()
plt.show()


![Scatter Plot](assets/Scatter_Plot.png)