HEALTH AND FITNESS APP- USER ACTIVIT DATA

In [None]:
import pandas as pd
import numpy as np
df = pd.read_csv('fitness_user_data.csv', delimiter='\t')

In [None]:
#part 1: Data PreProcessing Tasks

#Missing value handling:

# Fill WorkoutType with mode
df['WorkoutType'].fillna(df['WorkoutType'].mode()[0], inplace=True)

# Fill CaloriesBurned with median per WorkoutType
df['CaloriesBurned'] = df.groupby('WorkoutType')['CaloriesBurned'].transform(lambda x: x.fillna(x.median()))

# Drop EngagementScore missing rows
df.dropna(subset=['EngagementScore'], inplace=True)

# Fill missing City with "Unknown"
df['City'].fillna('Unknown', inplace=True)


In [None]:
#Duplicate Removal
df.drop_duplicates(inplace=True)

In [None]:
#Encoding

from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])
df['SubscriptionType'] = le.fit_transform(df['SubscriptionType'])

# One-hot encode WorkoutType and City
df = pd.get_dummies(df, columns=['WorkoutType', 'City'], drop_first=True)

In [None]:
#Part 2: Data Exploration Tasks

#A. Basic Summary Stats

#use decribe() to view: mean, median of age, caloriesburned, and engagementscore
print(df[['Age', 'CaloriesBurned', 'EngagementScore']].describe())

#max and min caloriesburned
print("Max CaloriesBurned:", df['CaloriesBurned'].max())
print("Min CaloriesBurned:", df['CaloriesBurned'].min())

#count unique values using unique() for each column
print(df.nunique())

In [None]:
#B. Basic Analysis

#use values_count() :

# Most common WorkoutType
print(df['WorkoutType'].value_counts())

# Most subscribed SubscriptionType
print(df['SubscriptionType'].value_counts())

# Avg CaloriesBurned by WorkoutType
print(df.groupby('WorkoutType')['CaloriesBurned'].mean())

# Gender-based average EngagementScore
print(df.groupby('Gender')['EngagementScore'].mean())

# Gender counts
print(df['Gender'].value_counts())


In [None]:
#Part 3: Filtering and Insights

# Users with high Calories and Engagement
filtered1 = df[(df['CaloriesBurned'] > 500) & (df['EngagementScore'] > 8)]

# Delhi, Cardio, Premium
filtered2 = df[(df['City'] == 'Delhi') & (df['WorkoutType'] == 'Cardio') & (df['SubscriptionType'] == 'Premium')]

# Users < 25 and EngagementScore <= 6
filtered3 = df[(df['Age'] < 25) & (df['EngagementScore'] <= 6)]

# Sort by CaloriesBurned
sorted_by_calories = df.sort_values(by='CaloriesBurned', ascending=False)

# Top 5 by EngagementScore
top5_engaged = df.sort_values(by='EngagementScore', ascending=False).head(5)
