# Predicting Stress Levels from Smart Watch Data

## Data Wrangling and Cleaning

In [32]:
import pandas as pd
import numpy as np

watch_data = pd.read_csv("unclean_smartwatch_health_data.csv").dropna()

watch_data_filtered = watch_data[watch_data['Sleep Duration (hours)'] != "ERROR"]

watch_data_filtered = watch_data_filtered.drop('User ID', axis=1)

watch_data_filtered

#Activity Level reports have 3 main categories, but spelling varies across entries

# Strip leading/trailing whitespace and map based on first letter
watch_data_filtered["Activity Level"] = watch_data_filtered["Activity Level"].str.strip().str.upper().map(
    lambda x: "High" if x.startswith("H") else
              "Average" if x.startswith("A") else
              "Sedentary" if x.startswith("S") else x
)


def clean_activity_level(level):
    level = level.strip().lower()
    if level.startswith('h'):
        return 'Highly_Active'
    elif level.startswith('a'):
        return 'Active'
    elif level.startswith('s'):
        return 'Sedentary'
    return level

watch_data_filtered['Activity Level'] = watch_data_filtered['Activity Level'].apply(clean_activity_level)

watch_data_filtered


Unnamed: 0,Heart Rate (BPM),Blood Oxygen Level (%),Step Count,Sleep Duration (hours),Activity Level,Stress Level
0,58.939776,98.809650,5450.390578,7.167235622316564,Highly_Active,1
3,40.000000,96.894213,13797.338044,7.367789630207228,Active,3
5,96.285938,94.202910,10205.992256,8.378342673824589,Highly_Active,10
6,47.272257,95.389760,3208.781177,7.871146008904113,Sedentary,2
7,81.733497,95.981343,6051.249857,5.224139066195455,Sedentary,1
...,...,...,...,...,...,...
9994,77.912299,98.640583,10061.145291,5.428634630125767,Sedentary,10
9995,78.819386,98.931927,2948.491953,7.402748595032027,Active,7
9996,48.632659,95.773035,4725.623070,6.3821659358529015,Sedentary,2
9997,73.834442,97.945874,2571.492060,6.91654920303435,Sedentary,4


## Processing Data for Machine Learning

### One Hot Encoding for Categorical Data

In [33]:
watch_data_encoded = pd.get_dummies(
    watch_data_filtered,
    columns=['Activity Level'],
    prefix='activity',
    prefix_sep='_',
)

watch_data_encoded


Unnamed: 0,Heart Rate (BPM),Blood Oxygen Level (%),Step Count,Sleep Duration (hours),Stress Level,activity_Active,activity_Highly_Active,activity_Sedentary
0,58.939776,98.809650,5450.390578,7.167235622316564,1,0,1,0
3,40.000000,96.894213,13797.338044,7.367789630207228,3,1,0,0
5,96.285938,94.202910,10205.992256,8.378342673824589,10,0,1,0
6,47.272257,95.389760,3208.781177,7.871146008904113,2,0,0,1
7,81.733497,95.981343,6051.249857,5.224139066195455,1,0,0,1
...,...,...,...,...,...,...,...,...
9994,77.912299,98.640583,10061.145291,5.428634630125767,10,0,0,1
9995,78.819386,98.931927,2948.491953,7.402748595032027,7,1,0,0
9996,48.632659,95.773035,4725.623070,6.3821659358529015,2,0,0,1
9997,73.834442,97.945874,2571.492060,6.91654920303435,4,0,0,1
