# AutoSleep Data 


In [139]:
import pandas as pd
import numpy as np 


# First we need to import our data from a csv file: 
df = pd.read_csv("AutoSleep.csv", header=0)


## Explore the Data & Handle Missing Values
* With this dataset, it appears that the nights I wasn't wearing my watch were not recorded so there are no missing values
* There are, however, `np.NaN` values in the columns tracking blood oxygen levels because my Apple Watch doesn't have that capability. 
    * Since this data is missing throughout the entire time interval of the dataset, I will be deleting those rows from the dataframe.
* This is nice except when I want to compare it so another source of data, it will be a little more difficult to combine 
without filling in the missing dates on the autosleep data or removing those dates from the other dataset. 


In [140]:
# What are the column names?
print(df.columns)
# It might be useful if I make a table that translates some of the column nates such as "ISO8601" and "qualityAvg7"

# print(df.head(5))
# print(df.tail(5))

# this for loop returns true if a column has any null values
for col in df:
    check_for_nan = df[col].isnull().values.any()
    if check_for_nan == True:
        print (col, check_for_nan, df[col].isnull().count())
        df.drop(col, axis=1, inplace=True)

print("after dropping empty columns:", df.head())

Index(['ISO8601', 'fromDate', 'toDate', 'bedtime', 'waketime', 'inBed',
       'awake', 'fellAsleepIn', 'sessions', 'asleep', 'asleepAvg7',
       'efficiency', 'efficiencyAvg7', 'quality', 'qualityAvg7', 'deep',
       'deepAvg7', 'sleepBPM', 'sleepBPMAvg7', 'dayBPM', 'dayBPMAvg7',
       'wakingBPM', 'wakingBPMAvg7', 'hrv', 'hrvAvg7', 'SpO2Avg', 'SpO2Min',
       'SpO2Max', 'respAvg', 'respMin', 'respMax', 'tags', 'notes'],
      dtype='object')
deep True 48
deepAvg7 True 48
dayBPM True 48
dayBPMAvg7 True 48
SpO2Avg True 48
SpO2Min True 48
SpO2Max True 48
respAvg True 48
respMin True 48
respMax True 48
tags True 48
notes True 48
after dropping empty columns:                      ISO8601                fromDate                  toDate  \
0  2022-01-28T20:59:59-08:00  Thursday, Jan 27, 2022    Friday, Jan 28, 2022   
1  2022-01-29T20:59:59-08:00    Friday, Jan 28, 2022  Saturday, Jan 29, 2022   
2  2022-01-30T20:59:59-08:00  Saturday, Jan 29, 2022    Sunday, Jan 30, 2022   
3  2022-02-

The following Columns have missing values for every instance in the dataset:
* deep 
* deepAvg7 
* dayBPM 
* dayBPMAvg7 
* SpO2Avg 
* SpO2Min 
* SpO2Max 
* respAvg 
* respMin 
* respMax 
* tags 
* notes 

## Now for some Summary Statistics: 
* Summarize the following into a pandas Series:
    * `nights_total` = total number nights logged
    * `avg_sleep` = average sleep 
    * `avg_[day]` = average sleep for each day of the week
    * `std_sleep` = standard deviation of sleep 
    * `std_[day]` = standard deviation for each day of the week
    * `mode_hours` = most common sleep length (rounded to the nearest hour)
    * `mode_day` = most common day of the week when mode sleep length occurred


In [141]:
# collecting summary statistics:
import utils 
import importlib
importlib.reload(utils)

summary_stats = []

# 1. total number of nights logged
nights_total = df.shape[0]
summary_stats.append(nights_total)

# 2. average hours slept
# we need to convert the timestamps into numeric values before we can calculate the mean
clean_df = utils.clean_sleep(df, "asleep")
sleep_ser = clean_df["asleep"]   
avg_sleep_secs = sleep_ser.mean()
hours, mins, secs, avg_sleep = utils.sec_to_hours(avg_sleep_secs)
print("average time slept:", avg_sleep)
summary_stats.append(avg_sleep)

# 3. average sleep for each day of the week
monday, tuesday, wednesday, thursday, friday, saturday, sunday = utils.separate_days(df)
print(monday)
# avg_monday = monday.mean()
# avg_tuesday = tuesday.mean()
# avg_wednesday = wednesday.mean()
# avg_thursday = thursday.mean()
# avg_friday = friday.mean()
# avg_saturday = saturday.mean()
# avg_sunday = sunday.mean()
# summary_stats.append([avg_monday, avg_tuesday, avg_wednesday, avg_thursday, avg_friday, avg_saturday, avg_sunday])
# print("test:", summary_stats)

average time slept: ['7.0 hours 23.0 mins 18.75 seconds']


TypeError: cannot concatenate object of type '<class 'str'>'; only Series and DataFrame objs are valid