## My Snoring 

Use the data of the sound snoring sensor and visualise it here. 

- Use snoring sensor on RPi and store it on the InfluxDB
- Use Grafana (hass.io) to get the data from the device.
- Download CSV from Grafana and import it here.

## Contents
0. Get the data and load into Pandas df
1. Data inspection, cleaning and visualisation
2. Statistical analysis
3. Store the results as json

## 0. Get the data and load into Pandas df

In [6]:
# instantiate a client and assign the RPI and database to it and test the connection
import pandas as pd
from influxdb import DataFrameClient
import notebooks_config
def influxclient():
    try:
        host = notebooks_config.red_rpi_ip
        mydf_client = DataFrameClient(host=host, port=8086, database='SNORING')# this is my raspberry ip on a local network
        pang = mydf_client.ping()
        return mydf_client
    except:
        return print('RPI not available')
influxclient()

<influxdb._dataframe_client.DataFrameClient at 0x1fc370da070>

In [3]:
#TO DO Input start and enddate
my_startdate = input('Wat is je startdatum? (YYYY-MM-DD) ' )

Wat is je startdatum? (YYYY-MM-DD) 2022-01-12


In [4]:
my_enddate = input('Wat is je einddatum? (YYYY-MM-DD)')


Wat is je einddatum? (YYYY-MM-DD)2022-01-13


In [5]:
#getting start and end time based on oura ring
from oura import OuraClient
# get your credentials from oura, see: https://cloud.ouraring.com/docs/.
# You will need a client_id, client_secret and a personal access token (PAT). 

import notebooks_config
#I store all my credentials in a notebooks_config.py file. 
#This file is then included in the .gitignore list so I can share the notebooks on GitHub.  

oura_pat = notebooks_config.oura_pat #
oura_client_id = notebooks_config.oura_client_id
oura_client_secret = notebooks_config.oura_client_secret
client = OuraClient(personal_access_token=oura_pat)
oura = OuraClient(oura_client_id, oura_client_secret, oura_pat)
result = oura.sleep_summary(start=my_startdate, end=my_enddate)
data = result['sleep'][0] #select the first item from the list
start_time = data['bedtime_start']
end_time = data['bedtime_end']
# convert strings to datetime
from dateutil import parser
start_time_dt = parser.parse(start_time)
end_time_dt = parser.parse(end_time)
from datetime import datetime
start_time_dt_ntz = start_time_dt.replace(tzinfo=None)
end_time_dt_ntz = end_time_dt.replace(tzinfo=None)
print ('start time is '+ str(start_time_dt_ntz) + ' and end time is ' + str(end_time_dt_ntz))

start time is 2022-01-13 00:49:25 and end time is 2022-01-13 07:58:25


In [71]:
#create query string for InfluxDB
def query_string(start_time_dt_ntz,end_time_dt_ntz):
    base_string = "SELECT * FROM my_snoring WHERE time >= '"
    middle_string = "' AND time <='"
    end_string = "'"
    query_string = base_string + str(start_time_dt_ntz) + middle_string + str(end_time_dt_ntz)+end_string
    return query_string
query_string(str(start_time_dt_ntz),str(end_time_dt_ntz))

"SELECT * FROM my_snoring WHERE time >= '2022-01-13 00:49:25' AND time <='2022-01-13 07:58:25'"

In [72]:
mydf_client.switch_database('SNORING')
msmts = mydf_client.get_list_measurements()
print(msmts)

[{'name': 'my_snoring'}]


In [73]:
helper_df=pd.DataFrame()
helper_df

In [74]:
#create a df and get the data from InfluxDB based on query above.
helper_df = mydf_client.query(query_string(str(start_time_dt_ntz),str(end_time_dt_ntz)))
helper_df

defaultdict(list,
            {'my_snoring':                                     sheets   silence   snoring  softtalking
             2022-01-13 00:49:25.085352+00:00  0.001894  0.932387  0.065706     0.000012
             2022-01-13 00:49:25.611770+00:00  0.001892  0.931350  0.066747     0.000012
             2022-01-13 00:49:26.142804+00:00  0.001979  0.931551  0.066457     0.000013
             2022-01-13 00:49:26.692581+00:00  0.001967  0.932555  0.065465     0.000013
             2022-01-13 00:49:27.241872+00:00  0.001881  0.931388  0.066719     0.000012
             ...                                    ...       ...       ...          ...
             2022-01-13 07:15:22.559873+00:00  0.001867  0.933768  0.064353     0.000012
             2022-01-13 07:15:23.099598+00:00  0.001981  0.932255  0.065752     0.000013
             2022-01-13 07:15:23.638667+00:00  0.002002  0.931863  0.066122     0.000013
             2022-01-13 07:15:24.179996+00:00  0.002000  0.931799  0.066188   

In [75]:
snoring_df =helper_df['my_snoring']
snoring_df

Unnamed: 0,sheets,silence,snoring,softtalking
2022-01-13 00:49:25.085352+00:00,0.001894,0.932387,0.065706,0.000012
2022-01-13 00:49:25.611770+00:00,0.001892,0.931350,0.066747,0.000012
2022-01-13 00:49:26.142804+00:00,0.001979,0.931551,0.066457,0.000013
2022-01-13 00:49:26.692581+00:00,0.001967,0.932555,0.065465,0.000013
2022-01-13 00:49:27.241872+00:00,0.001881,0.931388,0.066719,0.000012
...,...,...,...,...
2022-01-13 07:15:22.559873+00:00,0.001867,0.933768,0.064353,0.000012
2022-01-13 07:15:23.099598+00:00,0.001981,0.932255,0.065752,0.000013
2022-01-13 07:15:23.638667+00:00,0.002002,0.931863,0.066122,0.000013
2022-01-13 07:15:24.179996+00:00,0.002000,0.931799,0.066188,0.000013


In [9]:
def create_snoring_df(start_date, end_date):
    mydf_client.switch_database('SNORING')   
    helper_df=pd.DataFrame()
    helper_df = mydf_client.query(query_string(str(start_time_dt_ntz),str(end_time_dt_ntz)))
    snoring_df =helper_df['my_snoring']
    return snoring_df
create_snoring_df(str(my_startdate), str(my_enddate))    

NameError: name 'query_string' is not defined

## 1. Data inspection, cleaning and visualisation


In [76]:
#show colums
snoring_df.columns

Index(['sheets', 'silence', 'snoring', 'softtalking'], dtype='object')

In [77]:
#describe data
snoring_df.describe()

Unnamed: 0,sheets,silence,snoring,softtalking
count,42820.0,42820.0,42820.0,42820.0
mean,0.002159285,0.905751,0.09207,1.932633e-05
std,0.003598248,0.111158,0.110618,6.067356e-05
min,1.246198e-18,5.1e-05,0.004744,4.902169e-08
25%,0.00185222,0.931751,0.06527,1.213752e-05
50%,0.001945502,0.931964,0.066072,1.271249e-05
75%,0.001996274,0.932816,0.066285,1.299891e-05
max,0.2114756,0.979211,0.999949,0.003605354


## 2. Statistical analysis

In [78]:
# add a column 'bins' to the df and create bins with labels
snoring_df['bins'] = pd.cut(snoring_df['snoring'], bins=[0.0, 0.10, 0.50, 1.00],
                    labels=['no snoring', 'light snoring', 'loud snoring']) 
snoring_df.tail() #show the df

Unnamed: 0,sheets,silence,snoring,softtalking,bins
2022-01-13 07:15:22.559873+00:00,0.001867,0.933768,0.064353,1.2e-05,no snoring
2022-01-13 07:15:23.099598+00:00,0.001981,0.932255,0.065752,1.3e-05,no snoring
2022-01-13 07:15:23.638667+00:00,0.002002,0.931863,0.066122,1.3e-05,no snoring
2022-01-13 07:15:24.179996+00:00,0.002,0.931799,0.066188,1.3e-05,no snoring
2022-01-13 07:15:24.733040+00:00,0.001928,0.933299,0.064761,1.2e-05,no snoring


In [79]:
total_length = snoring_df['bins'].count()
total_length

42820

In [95]:
stats_df = pd.DataFrame()
stats_df['count'] =snoring_df['bins'].value_counts()
stats_df['minutes']= round(snoring_df['bins'].value_counts()/60,1)
stats_df

Unnamed: 0,count,minutes
no snoring,38935,648.9
light snoring,2991,49.8
loud snoring,894,14.9


In [97]:
#define parameters
no_snoring =stats_df['count'][0]
light_snoring=stats_df['count'][1]
loud_snoring=stats_df['count'][2]
#print results
time_snoring = round(((light_snoring+loud_snoring)/60),1)
print("You've snored " +str(time_snoring) + " minutes")
percentage = ((loud_snoring+light_snoring) / total_length)*100
print ("Total snoring was " + str(round(percentage,2)) + "% the measured time")
loud_perc = round(loud_snoring/total_length*100,1)
print ("Loud snoring was " + str(loud_perc) + "% of the measured time")
stats_df

You've snored 64.8 minutes
Total snoring was 9.07% the measured time
Loud snoring was 2.1% of the measured time


Unnamed: 0,count,minutes
no snoring,38935,648.9
light snoring,2991,49.8
loud snoring,894,14.9


### 2b. Make a graph

In [54]:
#visualise the data. To do: larger plot and mean.values
#snoring_df.plot.scatter(x="Time", y="my_snoring.mean")


## 3. Store the results

## 4. Scripts as a function

In [1]:
start_date = '2022-01-12'
end_date = '2022-01-13'

In [2]:
import notebooks_config
from oura import OuraClient
#I store all my credentials in a notebooks_config.py file. 
#This file is then included in the .gitignore list so I can share the notebooks on GitHub.  

def oura_load(start_date, end_date):
    oura_pat = notebooks_config.oura_pat #
    oura_client_id = notebooks_config.oura_client_id
    oura_client_secret = notebooks_config.oura_client_secret

    client = OuraClient(personal_access_token=oura_pat)
    oura = OuraClient(oura_client_id, oura_client_secret, oura_pat)
    result = oura.sleep_summary(start= start_date, end= end_date)
    data = result['sleep'][0] #select the first item from the list
    return data
print(oura_load(start_date,end_date))

{'summary_date': '2022-01-12', 'period_id': 2, 'is_longest': 1, 'timezone': 60, 'bedtime_end': '2022-01-13T07:58:25+01:00', 'bedtime_start': '2022-01-13T00:49:25+01:00', 'breath_average': 14.875, 'duration': 25740, 'total': 22380, 'awake': 3360, 'rem': 5610, 'deep': 5850, 'light': 10920, 'midpoint_time': 13500, 'efficiency': 87, 'restless': 24, 'onset_latency': 1020, 'hr_5min': [73, 70, 67, 66, 65, 64, 63, 62, 64, 64, 63, 60, 60, 61, 61, 62, 61, 65, 64, 65, 64, 64, 63, 60, 0, 59, 58, 66, 61, 62, 65, 0, 61, 59, 58, 59, 58, 67, 63, 59, 59, 58, 57, 58, 57, 57, 59, 58, 61, 0, 62, 60, 61, 61, 61, 60, 60, 61, 62, 62, 62, 62, 61, 62, 62, 59, 61, 0, 63, 63, 67, 66, 64, 63, 59, 59, 60, 61, 61, 60, 0, 0, 58, 59, 58, 61], 'hr_average': 61.64, 'hr_lowest': 57.0, 'hypnogram_5min': '44422211112111122222442333442242333224332223112333322211111111222233333222222113332224', 'rmssd': 34, 'rmssd_5min': [9, 12, 15, 25, 20, 25, 35, 37, 35, 33, 32, 29, 31, 31, 34, 35, 41, 24, 23, 35, 36, 28, 30, 22, 0, 43, 4

In [3]:
from dateutil import parser
def startend_time(start_date, end_date):
    #get the start and endtime as strings
    data = oura_load(start_date, end_date)
    start_time = data['bedtime_start']
    end_time = data['bedtime_end']
    # convert strings to datetime using dateutil
    start_time_dt = parser.parse(start_time)
    end_time_dt = parser.parse(end_time)
    #remove timezones
    start_time_dt = start_time_dt.replace(tzinfo=None)
    end_time_dt = end_time_dt.replace(tzinfo=None)
    return start_time_dt, end_time_dt
startend_time(start_date, end_date)

(datetime.datetime(2022, 1, 13, 0, 49, 25),
 datetime.datetime(2022, 1, 13, 7, 58, 25))

In [4]:
def query_string(start_date, end_date):
    start_time_dt = startend_time(start_date, end_date)[0]
    end_time_dt = startend_time(start_date, end_date)[1]
    base_string = "SELECT * FROM my_snoring WHERE time >= '"
    middle_string = "' AND time <='"
    end_string = "'"
    query_string = base_string + str(start_time_dt) + middle_string + str(end_time_dt)+end_string
    return query_string
query_string(start_date, end_date)

"SELECT * FROM my_snoring WHERE time >= '2022-01-13 00:49:25' AND time <='2022-01-13 07:58:25'"

In [7]:
#double!!!
# instantiate a client and assign the RPI and database to it and test the connection
import pandas as pd
from influxdb import DataFrameClient
import notebooks_config
def influxclient():
    try:
        host = notebooks_config.red_rpi_ip
        mydf_client = DataFrameClient(host=host, port=8086, database='SNORING')# this is my raspberry ip on a local network
        return mydf_client
    except:
        return print('RPI not available')
influxclient()

<influxdb._dataframe_client.DataFrameClient at 0x1fc37267310>

In [8]:
import pandas as pd
from influxdb import DataFrameClient
def create_snoring_df(start_date, end_date):
    mydf_client = influxclient()
    mydf_client.switch_database('SNORING')   
    helper_df=pd.DataFrame()
    my_query_string = query_string(start_date, end_date)
    helper_df = mydf_client.query(my_query_string)
    snoring_df =helper_df['my_snoring']
    return snoring_df
create_snoring_df(start_date, end_date) 

Unnamed: 0,sheets,silence,snoring,softtalking
2022-01-13 00:49:25.085352+00:00,0.001894,0.932387,0.065706,0.000012
2022-01-13 00:49:25.611770+00:00,0.001892,0.931350,0.066747,0.000012
2022-01-13 00:49:26.142804+00:00,0.001979,0.931551,0.066457,0.000013
2022-01-13 00:49:26.692581+00:00,0.001967,0.932555,0.065465,0.000013
2022-01-13 00:49:27.241872+00:00,0.001881,0.931388,0.066719,0.000012
...,...,...,...,...
2022-01-13 07:15:22.559873+00:00,0.001867,0.933768,0.064353,0.000012
2022-01-13 07:15:23.099598+00:00,0.001981,0.932255,0.065752,0.000013
2022-01-13 07:15:23.638667+00:00,0.002002,0.931863,0.066122,0.000013
2022-01-13 07:15:24.179996+00:00,0.002000,0.931799,0.066188,0.000013
