# Cleaning Data
I made use of **fitbit_helpers.py** which I wrote to help me clean the data without having too much code in this notebook. Also, it helps too make this notebook more easily readable. 

**Important:** I didn't add all the individual files since that would clutter up my assignment. Thus, this code is simply to show my process and **cannot** be executed. 

In [18]:
import time
import pandas as pd
import fitbit_helper as fh
import numpy as np

start = time.time()

# Loading intra-day Data
heart_rate = fh.read_files('../Files/HeartRate/')
steps = fh.read_files('../Files/Steps/')
calories = fh.read_files('../Files/calories/')
distance = fh.read_files('../Files/Distance/')
floors = fh.read_files('../Files/Floors/')

# Loading sleep data
sleep = fh.read_sleep()

# Loading summaries per day
summaries = fh.read_and_combine_summaries('activityCalories', 'calories', 'caloriesBMR', 'distance', 'elevation', 'floors', 
             'minutesFairlyActive', 'minutesLightlyActive', 'minutesSedentary', 'minutesVeryActive', 'steps')

# Resolving duplicate indices for intraday dataframes
fh.resolve_duplicate_index(heart_rate, steps, calories, distance, floors, sleep)

# the index to merge on
fh.set_index_df(heart_rate, steps, calories, distance, floors, field = 'time')

# Rename columns for easier concatenation
floors.rename(columns={'value': 'floors'}, inplace=True)
distance.rename(columns={'value': 'distance'}, inplace=True)
calories.rename(columns={'value': 'calories'}, inplace=True)
steps.rename(columns={'value': 'steps'}, inplace=True)
heart_rate.rename(columns={'value': 'heartrate'}, inplace=True)
calories.drop(['level', 'mets'], axis = 1, inplace = True)

# Resample to prevent NaNs
heart_rate = heart_rate.resample('60s').bfill()
steps = steps.resample('60s').bfill()
calories = calories.resample('60s').bfill()
distance = distance.resample('60s').bfill()
floors = floors.resample('60s').bfill()

# Create intraday dataframe of all intra-day data
intraday = pd.concat([floors, distance, calories, steps, heart_rate], axis = 1)

# Saving all the created dataframes to files
sleep.to_csv("sleep.csv", index = False)
intraday.to_csv("intraday.csv", index = True)
summaries.to_csv("summaries.csv", index = False)

end = time.time()
print(end - start)

51.729416370391846
