In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [None]:
weight_df = pd.read_csv("OneDrive/Desktop/Capstone_week9/weightLogInfo_merged.csv")

In [None]:
weight_df.head(5)

In [None]:
weight_df.tail(5)

In [None]:
weight_df.describe()

In [None]:
weight_df.info()

In [None]:
weight_df.isna().sum()

In [None]:
weight_df.drop(columns='Fat',inplace=True)

In [None]:
weight_df.isna().sum()

In [None]:
weight_df['Date'] = pd.to_datetime(weight_df['Date'])

weight_df['Day'] = weight_df['Date'].dt.date
weight_df['Month'] = weight_df['Date'].dt.month


In [None]:
weight_df['Day'] = pd.to_datetime(weight_df['Day'])

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

sns.boxplot(x='WeightKg', data=weight_df, ax=axes[0])
axes[0].set_title('Boxplot of Weight in KG')
axes[0].set_xlabel('Weight (kg)')

sns.boxplot(x='WeightPounds', data=weight_df, ax=axes[1])
axes[1].set_title('Boxplot of Weight in Pounds')
axes[1].set_xlabel('Weight (Pounds)')

sns.violinplot(x='BMI', data=weight_df, ax=axes[2])
axes[2].set_title('Violin Plot of BMI')
axes[2].set_xlabel('BMI')

plt.show()

In [None]:
corr_df = weight_df[['WeightKg','WeightPounds','BMI']]
corr = corr_df.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f", annot_kws={"size": 10})
plt.title('Correlation Heatmap')
plt.show()

In [None]:
# Group by 'Id' and calculate the mean weight for each user
mean_weight_per_user = weight_df.groupby('Id')['WeightKg'].mean()

# Print the resulting Series
print(mean_weight_per_user)


In [None]:
mean_weight = mean_weight_per_user.mean()
mean_weight

In [None]:
plt.figure(figsize=(10, 6))
sns.barplot(x=mean_weight_per_user.index, y=mean_weight_per_user.values, color='skyblue', label='Mean Weight per User')

# Plot trend line for mean weight of all users
plt.axhline(y=mean_weight, color='red', linestyle='--', label='Mean Weight of All Users')

# Add labels and title
plt.xlabel('User ID')
plt.ylabel('Mean Weight (kg)')
plt.title('Mean Weight of Users with Trend Line')

# Add legend
plt.legend()

In [None]:
grouped_data = weight_df.groupby(['Id','Day'])
grouped_data

In [None]:
filtered_data = grouped_data.filter(lambda x: x['Id'].iloc[0] == 1927972279 )
print(filtered_data)

In [None]:
import matplotlib.pyplot as plt

# Group the DataFrame by 'Id'
grouped_data = weight_df.groupby('Id')

# Determine the number of subplots needed based on the number of unique users
num_subplots = len(grouped_data)

# Define the number of rows and columns for the subplots grid
num_rows = num_subplots
num_cols = 1  # Each user gets their own subplot

# Create subplots
fig, axes = plt.subplots(num_rows, num_cols, figsize=(10, 6*num_rows), sharex=True)

# Iterate over each group (i.e., each user) and plot their weight measurements over time
for i, (user_id, group_data) in enumerate(grouped_data):
    row = i
    ax = axes if num_rows == 1 else axes[row]
    
    # Check if the user has sufficient data points for plotting
    if len(group_data) >= 2:  # Adjust this threshold as needed
        ax.plot(group_data['Date'], group_data['WeightKg'], label=f'User {user_id}')
        ax.set_title(f'Weight Trend for User {user_id}')
        ax.set_xlabel('Date')
        ax.set_ylabel('Weight (Kg)')
        ax.grid(True)
        ax.legend()
    else:
        ax.text(0.5, 0.5, f'User {user_id} has insufficient data', 
                horizontalalignment='center', verticalalignment='center', 
                transform=ax.transAxes, fontsize=12, color='red')

# Adjust layout and show plot
plt.show()


In [None]:
manual_measurements = weight_df[weight_df['IsManualReport']]
automated_measurements = weight_df[~weight_df['IsManualReport']]

plt.figure(figsize=(10, 6))
sns.violinplot(x='IsManualReport', y='WeightKg', data=weight_df)
plt.title('Violin Plot of Weight Measurements by Reporting Type')
plt.xlabel('Reporting Type')
plt.ylabel('Weight (kg)')
plt.xticks([0, 1], ['Automated', 'Manual Report'])
plt.show()

# Plot boxplots
plt.figure(figsize=(10, 6))
sns.boxplot(x='IsManualReport', y='WeightKg', data=weight_df)
plt.title('Boxplot of Weight Measurements by Reporting Type')
plt.xlabel('Reporting Type')
plt.ylabel('Weight (kg)')
plt.xticks([0, 1], ['Automated', 'Manual Report'])
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
colors = ['#1f77b4', '#ff7f0e']
sns.countplot(x='IsManualReport', data=weight_df,palette=colors)
plt.title('Count of Weight Measurements by Reporting Type')
plt.xlabel('Reporting Type')
plt.ylabel('Count')
plt.xticks([0, 1], ['Automated', 'Manual Report'])
plt.show()