In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates

In [None]:
df1 = pd.read_csv("OneDrive/Desktop/Capstone_week9/minuteCaloriesNarrow_merged.csv")
df2 = pd.read_csv("OneDrive/Desktop/Capstone_week9/minuteCaloriesWide_merged.csv")
df3 = pd.read_csv("OneDrive/Desktop/Capstone_week9/minuteIntensitiesNarrow_merged.csv")
df4 = pd.read_csv("OneDrive/Desktop/Capstone_week9/minuteIntensitiesWide_merged.csv")
df5 = pd.read_csv("OneDrive/Desktop/Capstone_week9/minuteMETsNarrow_merged.csv")
df6 = pd.read_csv("OneDrive/Desktop/Capstone_week9/minuteSleep_merged.csv")
df7 = pd.read_csv("OneDrive/Desktop/Capstone_week9/minuteStepsNarrow_merged.csv")
df8 = pd.read_csv("OneDrive/Desktop/Capstone_week9/minuteStepsWide_merged.csv")

In [None]:
mergedWide_df = pd.merge(df2, df4)
mergedWide_df = pd.merge(mergedWide_df,df8)

In [None]:
mergedWide_df.head(5)

In [None]:
mergedWide_df.tail(5)

In [None]:
mergedWide_df.info()

In [None]:
duplicate_rows = mergedWide_df.duplicated()

# Display rows that are duplicates
duplicate_rows = mergedWide_df[duplicate_rows]
print("Duplicate Rows:")
print(duplicate_rows)

In [None]:
mergedWide_df.isna().sum().sort_values()

In [None]:
mergedWide_df['ActivityHour'] = pd.to_datetime(mergedWide_df['ActivityHour'])

In [None]:
calorie_columns = mergedWide_df.filter(like='Calories')

# Calculate the average calorie burn for each minute
average_calorie_burn_per_minute = calorie_columns.mean()

# Print the result
print("Average Calorie Burn per Minute:")
print(average_calorie_burn_per_minute)

In [None]:
plt.figure(figsize=(16, 6))
plt.plot(average_calorie_burn_per_minute.index, average_calorie_burn_per_minute.values, marker='o', linestyle='-')
plt.title('Average Calorie Burn per Minute')
plt.xlabel('Minute')
plt.ylabel('Average Calorie Burn')
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
hourly_data = mergedWide_df.groupby('ActivityHour').sum()

# Visualize the patterns
plt.figure(figsize=(12, 8))

# Plot hourly trends for calories
plt.plot(hourly_data.index, hourly_data.filter(like='Calories').sum(axis=1), label='Calories', color='blue')

# Plot hourly trends for steps
plt.plot(hourly_data.index, hourly_data.filter(like='Steps').sum(axis=1), label='Steps', color='green')

# Plot hourly trends for intensity
plt.plot(hourly_data.index, hourly_data.filter(like='Intensity').sum(axis=1), label='Intensity', color='orange')

plt.title('Hourly Trends of Calorie Burn, Steps, and Intensity')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

In [None]:
calories_steps_intensity = mergedWide_df.iloc[:, 2:]  # Assuming the first two columns are ID and ActivityHour

# Calculate correlation coefficients
correlation_matrix = calories_steps_intensity.corr()

# Print correlation matrix
print("Correlation Matrix:")
print(correlation_matrix)

In [None]:
numeric_columns = mergedWide_df.select_dtypes(include=[np.number]).columns

# Aggregate data for each user
user_metrics = mergedWide_df.groupby('Id')[numeric_columns].sum()

# Calculate additional metrics
user_metrics['Total Calories'] = user_metrics.filter(like='Calories').sum(axis=1)
user_metrics['Total Steps'] = user_metrics.filter(like='Steps').sum(axis=1)
user_metrics['Average Intensity'] = user_metrics.filter(like='Intensity').mean(axis=1)

# Find user with highest total calories burned
max_calories_user = user_metrics['Total Calories'].idxmax()

# Find user with highest total steps taken
max_steps_user = user_metrics['Total Steps'].idxmax()

# Find user with highest average intensity
max_intensity_user = user_metrics['Average Intensity'].idxmax()

# Visualize the metrics using bar plots
plt.figure(figsize=(12, 8))

# Bar plot for total calories burned
plt.subplot(2, 2, 1)
user_metrics['Total Calories'].plot(kind='bar', color='blue')
plt.title('Total Calories Burned by User')
plt.xlabel('User ID')
plt.ylabel('Total Calories Burned')

# Bar plot for total steps taken
plt.subplot(2, 2, 2)
user_metrics['Total Steps'].plot(kind='bar', color='green')
plt.title('Total Steps Taken by User')
plt.xlabel('User ID')
plt.ylabel('Total Steps Taken')

# Bar plot for average intensity
plt.subplot(2, 2, 3)
user_metrics['Average Intensity'].plot(kind='bar', color='orange')
plt.title('Average Intensity by User')
plt.xlabel('User ID')
plt.ylabel('Average Intensity')

plt.tight_layout()
plt.show()

# Print users with highest metrics
print("User with highest total calories burned:", max_calories_user)
print("User with highest total steps taken:", max_steps_user)
print("User with highest average intensity:", max_intensity_user)

In [None]:
max_calories_user

In [None]:
user_data = mergedWide_df[mergedWide_df['Id'] == 8378563200].iloc[:,1:]
user_data.set_index('ActivityHour', inplace=True)

In [None]:
user_data

In [None]:
total_daily_calories = user_data.filter(like='Calories').sum(axis=1)
total_daily_calories = total_daily_calories.resample('D').sum()

In [None]:
total_daily_calories

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(total_daily_calories.index, total_daily_calories.values, marker='o', linestyle='-')
plt.title(f'Total Daily Calories for User with Most Calorie Burns: {max_calories_user}')
plt.xlabel('Date')
plt.ylabel('Total Daily Calories')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
total_daily_steps = user_data.filter(like='Steps').sum(axis=1)
total_daily_steps = total_daily_steps.resample('D').sum()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(total_daily_steps.index, total_daily_steps.values, marker='o', linestyle='-',color='coral')
plt.title(f'Total Daily Steps for User with Most Steps Taken: {max_steps_user}')
plt.xlabel('Date')
plt.ylabel('Total Steps')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
max_daily_intensity = user_data.filter(like='Intensity').sum(axis=1)
max_daily_intensity = max_daily_intensity.resample('D').sum()

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(max_daily_intensity.index, max_daily_intensity.values, marker='o', linestyle='-',color='forestgreen')
plt.title(f'Daily Intenisty for User with Most Intensity: {max_intensity_user}')
plt.xlabel('Date')
plt.ylabel('Total Intensity')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()