In [114]:
import numpy as np
import scipy.stats as stats 
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import math
from scipy.stats import norm
import glob
import fitbit
import requests
import datetime
from datetime import timedelta


# CAS Data Science Projekt Sleep Analysis - Data Wrangling

## Initiale Datenquellen

- 02.04.2022: Mond-Phasen: https://www.timeanddate.de/mond/phasen/ Download von Hand 2021 und 2022 für Ortschaft "Basel"
- XX.XX.2022: Fitbit Schlafdaten von meinem Account, CSV Export

## Mond Phasen

Ich habe diese zwei kleinen Tabellen (2021, 2022) von Hand in ein Excel bzw. CSV kopiert, da der Aufwand einen Crawler zu schreiben dafür zu gross gewesen wäre. Nun muss ich die Daten in ein geeignetes Format bringen. Grundsätzlich möchte ich pro Datum wissen, welche Mondphase das ist.

In [121]:
# read data
moon2021_raw = pd.read_csv('data/moon-phases-2021.csv')
moon2022_raw = pd.read_csv('data/moon-phases-2022.csv')

# have a look
print(moon2021_raw.head())
print(moon2022_raw.head())

# drop unnecessary column
moon2022_raw = moon2022_raw.drop(columns={'Unnamed: 4'})

# have a look again
moon2022_raw.head()

     New Moon First Quarter   Full Moon Third Quarter
0  2022-01-13    2022-01-20  2022-01-28    2022-01-06
1  2022-02-11    2022-02-19  2022-02-27    2022-02-04
2  2022-03-13    2022-03-21  2022-03-28    2022-03-06
3  2022-04-12    2022-04-20  2022-04-27    2022-04-04
4  2022-05-11    2022-05-19  2022-05-26    2022-05-03
     New Moon First Quarter   Full Moon Third Quarter  Unnamed: 4
0  2022-01-02    2022-01-09  2022-01-18    2022-01-25         NaN
1  2022-02-01    2022-02-08  2022-02-16    2022-02-23         NaN
2  2022-03-02    2022-03-10  2022-03-18    2022-03-25         NaN
3  2022-04-01    2022-04-09  2022-04-16    2022-04-23         NaN
4  2022-04-30    2022-05-09  2022-05-16    2022-05-22         NaN


Unnamed: 0,New Moon,First Quarter,Full Moon,Third Quarter
0,2022-01-02,2022-01-09,2022-01-18,2022-01-25
1,2022-02-01,2022-02-08,2022-02-16,2022-02-23
2,2022-03-02,2022-03-10,2022-03-18,2022-03-25
3,2022-04-01,2022-04-09,2022-04-16,2022-04-23
4,2022-04-30,2022-05-09,2022-05-16,2022-05-22


In [122]:

def convert_all_dates(df, format):
    """converts all the strings in the data frame into proper datetime objects"""

    new_moon = 'New Moon'
    df[new_moon] = pd.to_datetime(df[new_moon], format=format)
    first_quarter = 'First Quarter'
    df[first_quarter] = pd.to_datetime(df[first_quarter], format=format)
    full_moon = 'Full Moon'
    df[full_moon] = pd.to_datetime(df[full_moon], format=format)
    third_quarter = 'Third Quarter'
    df[third_quarter] = pd.to_datetime(df[third_quarter], format=format)

    return df

In [123]:
format = '%Y-%m-%d'
moon2021_raw = convert_all_dates(moon2021_raw, format)
moon2022_raw = convert_all_dates(moon2022_raw, format)

print(moon2021_raw.dtypes)
print(moon2022_raw.dtypes)

New Moon         datetime64[ns]
First Quarter    datetime64[ns]
Full Moon        datetime64[ns]
Third Quarter    datetime64[ns]
dtype: object
New Moon         datetime64[ns]
First Quarter    datetime64[ns]
Full Moon        datetime64[ns]
Third Quarter    datetime64[ns]
dtype: object


In [158]:
def flatten_moon_phases(df):
    """Flattens the moon phases to dates e.g. 2020-01-01 New Moon"""
    flattened = pd.DataFrame(columns={'Date', 'Moon Phase'})
    df = df.reset_index()  # make sure indexes pair with number of rows
    for index, row in df.iterrows():

        new_moon = {'Date': row['New Moon'], 'Moon Phase': 'New Moon'}
        flattened = flattened.append(new_moon, ignore_index = True)

        first_quarter = {'Date': row['First Quarter'], 'Moon Phase': 'First Quarter'}
        flattened = flattened.append(first_quarter, ignore_index = True)

        full_moon = {'Date': row['Full Moon'], 'Moon Phase': 'Full Moon'}
        flattened = flattened.append(full_moon, ignore_index = True)

        third_quarter = {'Date': row['Third Quarter'], 'Moon Phase': 'Third Quarter'}
        flattened = flattened.append(third_quarter, ignore_index = True)

        return flattened

In [162]:
moon_2021 = flatten_moon_phases(moon2021_raw)
moon_2022 = flatten_moon_phases(moon2022_raw)

moon_2022.head()

  flattened = flattened.append(new_moon, ignore_index = True)
  flattened = flattened.append(first_quarter, ignore_index = True)
  flattened = flattened.append(full_moon, ignore_index = True)
  flattened = flattened.append(third_quarter, ignore_index = True)
  flattened = flattened.append(new_moon, ignore_index = True)
  flattened = flattened.append(first_quarter, ignore_index = True)
  flattened = flattened.append(full_moon, ignore_index = True)
  flattened = flattened.append(third_quarter, ignore_index = True)


Unnamed: 0,Date,Moon Phase
0,2022-01-02 00:00:00,New Moon
1,2022-01-09 00:00:00,First Quarter
2,2022-01-18 00:00:00,Full Moon
3,2022-01-25 00:00:00,Third Quarter


### Temperature Variation

In [None]:
def importFiles(path):
    all_files = glob.glob(path + "/*.csv")
    li = []
    for filename in all_files:
        df = pd.read_csv(filename, index_col=None, header=0)
        li.append(df)

    return pd.concat(li, axis=0, ignore_index=True)

In [None]:
tempVarPath = r'/Users/tamara/GitRepos/chira/cas-datascience-notebooks/Projektideen/MyFitbitData/Tamara/Sleep/Temperature/' # use your path

tempVarDf = importFiles(tempVarPath)
print(tempVarDf)
print(tempVarDf["temperature"].hist())

### Computed Temperature

In [None]:
tempVarPath = r'/Users/tamara/GitRepos/chira/cas-datascience-notebooks/Projektideen/MyFitbitData/Tamara/Sleep/Computed Temp/'

tempVarDf = importFiles(tempVarPath)
print("Nightly Temperate ("+str(tempVarDf["nightly_temperature"].count())+") samples")
print(tempVarDf["nightly_temperature"].hist(bins = 50))


from dateutil import parser
import datetime

start = tempVarDf["sleep_start"]
end = tempVarDf["sleep_end"]
time = []

print(start)

for i in range(0, len(start)):    
    # e = datetime.datetime.strptime(end[i], '%y-%m-%dT%H:%M')
    e = parser.parse(end[i])
    # s = datetime.datetime.strptime(start[i],'%y-%m-%ddT%HG:%MM:%S')
    time.append(end[i]-start[i])



# Hilfsfunktionen

## Data Wrangling