In [4]:
# Import Libraries
import pandas as pd

# Helper function to clean column names
def clean_columns(df):
    df.columns = df.columns.str.strip().str.lower().str.replace(" ", "_")
    return df


In [5]:
# Load All CSV Files
activitylogs = clean_columns(pd.read_csv('activitylogs.csv'))
dietlogs = clean_columns(pd.read_csv('dietlogs.csv'))
moodlogs = clean_columns(pd.read_csv('moodlogs_cleaned.csv'))
regions = clean_columns(pd.read_csv('regions.csv'))
users = clean_columns(pd.read_csv('users.csv'))
wearable = clean_columns(pd.read_csv('wearabledata.csv'))


In [6]:
activitylogs.head() 

Unnamed: 0,activity_id,user_id,activity_type,duration_mins,calories_burned,activity_date
0,1,1,Lunges,10,119,18-05-2025
1,2,2,Boxing,76,828,19-05-2025
2,3,3,Boxing,104,808,29-05-2025
3,4,4,Weight Lifting,36,204,20-05-2025
4,5,5,Martial Arts,98,285,13-06-2025


In [7]:
dietlogs.head()

Unnamed: 0,diet_id,user_id,meal_type,food_items,calories_intake,meal_date
0,1,1,Snack,"Mutton Rogan Josh, Cereal",930,29-05-2025
1,2,2,Snack,"Coffee, Spaghetti Bolognese",1111,23-05-2025
2,3,3,Breakfast,"Porridge, Fried Rice, Pad Thai, Green Tea",1183,22-05-2025
3,4,4,Dinner,"Idli, Fried Rice, Spring Rolls, Upma, Muesli",427,24-05-2025
4,5,5,Breakfast,"Pho, Burrito, Pad Thai",866,08-06-2025


In [8]:
moodlogs.head()


Unnamed: 0,mood_id,user_id,mood_rating,mood_notes,mood_date
0,1,1,2,Deep in thought.,2025-06-06
1,2,2,7,Hurt by someone’s words.,2025-06-06
2,3,3,2,Laughing helped reduce stress.,2025-06-10
3,4,4,8,Messed up something and feeling guilty.,2025-06-07
4,5,5,2,Sick and a bit cranky.,2025-06-15


In [9]:
regions.head()

Unnamed: 0,region_id,region_name
0,1,"North Judithbury, Northern Mariana Islands"
1,2,"Lake Curtis, Czech Republic"
2,3,"New Roberttown, Saint Vincent and the Grenadines"
3,4,"East William, Bermuda"
4,5,"Lake Debra, Anguilla"


In [10]:
users.head()

Unnamed: 0,user_id,name,age,gender,email,region_id
0,1,Brandon Russell,66,F,robersonnancy@example.com,202
1,2,Evelyn Christian,52,M,derekhoffman@example.net,705
2,3,Aaron Graham,18,M,johnsnicholas@example.org,261
3,4,Jerome Whitehead,39,Other,estradadavid@example.org,516
4,5,Jeffrey Woods,70,Other,xmonroe@example.com,361


In [11]:
wearable.head()

Unnamed: 0,wearable_id,user_id,timestamp,heart_rate,steps_count,sleep_hours,device_name
0,1,1,09-06-2025 19:19,71,3494,4.9,Apple Watch SE
1,2,2,10-06-2025 01:10,113,14849,4.7,Realme Watch 3 Pro
2,3,3,12-06-2025 06:56,87,16361,5.5,Withings ScanWatch
3,4,4,10-06-2025 04:37,114,17509,7.0,Apple Watch Series 9
4,5,5,15-06-2025 17:55,83,2935,4.6,Titan Smart 2


In [12]:
# Count duplicates in each DataFrame
print("Duplicate rows in activitylogs:", activitylogs.duplicated().sum())
print("Duplicate rows in dietlogs:", dietlogs.duplicated().sum())
print("Duplicate rows in moodlogs:", moodlogs.duplicated().sum())
print("Duplicate rows in regions:", regions.duplicated().sum())
print("Duplicate rows in users:", users.duplicated().sum())
print("Duplicate rows in wearable:", wearable.duplicated().sum())

Duplicate rows in activitylogs: 0
Duplicate rows in dietlogs: 0
Duplicate rows in moodlogs: 0
Duplicate rows in regions: 0
Duplicate rows in users: 0
Duplicate rows in wearable: 0


In [13]:
# Clean and Transform Wearable Data
wearable['timestamp'] = pd.to_datetime(wearable['timestamp'], format="%d-%m-%Y %H:%M", errors='coerce')
wearable['sleep_hours'] = wearable['sleep_hours'].fillna(wearable['sleep_hours'].median())

def classify_activity_level(steps):
    if steps >= 15000:
        return 'Very Active'
    elif steps >= 10000:
        return 'Active'
    elif steps >= 5000:
        return 'Moderate'
    else:
        return 'Low'

wearable['activity_level'] = wearable['steps_count'].apply(classify_activity_level)
wearable = wearable.merge(users, on='user_id', how='left').merge(regions, on='region_id', how='left')


In [14]:
# Clean and Transform Activity Logs

activitylogs['activity_date'] = pd.to_datetime(activitylogs['activity_date'], format="%d-%m-%Y", errors='coerce')

activitylogs['duration_mins'] = activitylogs['duration_mins'].fillna(0)

activitylogs = activitylogs.merge(users, on='user_id', how='left').merge(regions, on='region_id', how='left')


In [15]:
# Clean and Transform Diet Logs

# Convert meal_date to datetime
dietlogs['meal_date'] = pd.to_datetime(dietlogs['meal_date'], errors='coerce')

# Fill missing values in calories_intake
dietlogs['calories_intake'] = dietlogs['calories_intake'].fillna(dietlogs['calories_intake'].mean())

# Merge with users and regions
dietlogs = dietlogs.merge(users, on='user_id', how='left').merge(regions, on='region_id', how='left')

# Show cleaned data
dietlogs.head()


  dietlogs['meal_date'] = pd.to_datetime(dietlogs['meal_date'], errors='coerce')


Unnamed: 0,diet_id,user_id,meal_type,food_items,calories_intake,meal_date,name,age,gender,email,region_id,region_name
0,1,1,Snack,"Mutton Rogan Josh, Cereal",930,2025-05-29,Brandon Russell,66,F,robersonnancy@example.com,202,"South Danny, Niue"
1,2,2,Snack,"Coffee, Spaghetti Bolognese",1111,2025-05-23,Evelyn Christian,52,M,derekhoffman@example.net,705,"Jeffreyport, Antigua and Barbuda"
2,3,3,Breakfast,"Porridge, Fried Rice, Pad Thai, Green Tea",1183,2025-05-22,Aaron Graham,18,M,johnsnicholas@example.org,261,"Baileyport, Montserrat"
3,4,4,Dinner,"Idli, Fried Rice, Spring Rolls, Upma, Muesli",427,2025-05-24,Jerome Whitehead,39,Other,estradadavid@example.org,516,"Cardenasberg, Ukraine"
4,5,5,Breakfast,"Pho, Burrito, Pad Thai",866,2025-06-08,Jeffrey Woods,70,Other,xmonroe@example.com,361,"Williamport, Azerbaijan"


In [16]:
# Clean and Transform Mood Logs

# Convert mood_date to datetime
moodlogs['mood_date'] = pd.to_datetime(moodlogs['mood_date'], errors='coerce')

# Fill missing mood ratings with median
moodlogs['mood_rating'] = moodlogs['mood_rating'].fillna(moodlogs['mood_rating'].median())

# Merge with users and regions
moodlogs = moodlogs.merge(users, on='user_id', how='left').merge(regions, on='region_id', how='left')

# Display cleaned mood logs
moodlogs.head()


Unnamed: 0,mood_id,user_id,mood_rating,mood_notes,mood_date,name,age,gender,email,region_id,region_name
0,1,1,2,Deep in thought.,2025-06-06,Brandon Russell,66,F,robersonnancy@example.com,202,"South Danny, Niue"
1,2,2,7,Hurt by someone’s words.,2025-06-06,Evelyn Christian,52,M,derekhoffman@example.net,705,"Jeffreyport, Antigua and Barbuda"
2,3,3,2,Laughing helped reduce stress.,2025-06-10,Aaron Graham,18,M,johnsnicholas@example.org,261,"Baileyport, Montserrat"
3,4,4,8,Messed up something and feeling guilty.,2025-06-07,Jerome Whitehead,39,Other,estradadavid@example.org,516,"Cardenasberg, Ukraine"
4,5,5,2,Sick and a bit cranky.,2025-06-15,Jeffrey Woods,70,Other,xmonroe@example.com,361,"Williamport, Azerbaijan"


In [17]:
# Save Cleaned Data to CSV
wearable.to_csv('cleaned_wearable_data.csv', index=False)
activitylogs.to_csv('cleaned_activitylogs.csv', index=False)
dietlogs.to_csv('cleaned_dietlogs.csv', index=False)
moodlogs.to_csv('cleaned_moodlogs.csv', index=False)
