# Sleep analysis

It has been 6 months since I have been tracking my sleeping data. I bought a Samsung Galaxy Watch Active 2 which allows me to track every stages of my sleep, every night.

The goal of this project is to analyse my sleep and see if I can find any correlations between my sleep quality and other external factors.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime

In [2]:
sleep_data = pd.read_csv('sleep_data_2021_02_09.csv', sep=';', skiprows=[0])

In [3]:
sleep_data.head(5)

Unnamed: 0,start_time,sleep_id,custom,update_time,create_time,stage,time_offset,deviceuuid,pkg_name,end_time,datauuid
0,2020-09-01 23:22:00.000,b953ce09-66f0-5b6f-5b56-83c7dcd10e3b,,02.09.20 07:28,02.09.20 07:28,40001,UTC+0200,0CI+8SNNP+,com.sec.android.app.shealth,2020-09-01 23:26:00.000,3713d761-0e35-bae5-ae2e-bdb42c2e1536
1,2020-09-01 23:49:00.000,b953ce09-66f0-5b6f-5b56-83c7dcd10e3b,,02.09.20 07:28,02.09.20 07:28,40002,UTC+0200,0CI+8SNNP+,com.sec.android.app.shealth,2020-09-02 00:06:00.000,72ec0096-b482-f885-2580-a6f95f91af15
2,2020-09-02 00:42:00.000,b953ce09-66f0-5b6f-5b56-83c7dcd10e3b,,02.09.20 07:28,02.09.20 07:28,40004,UTC+0200,0CI+8SNNP+,com.sec.android.app.shealth,2020-09-02 00:49:00.000,a25927cb-e04c-04d4-6231-032a60a218dc
3,2020-09-02 01:09:00.000,b953ce09-66f0-5b6f-5b56-83c7dcd10e3b,,02.09.20 07:28,02.09.20 07:28,40004,UTC+0200,0CI+8SNNP+,com.sec.android.app.shealth,2020-09-02 01:23:00.000,3a97ac3d-a55b-d34e-db1c-4b594e055823
4,2020-09-02 02:25:00.000,b953ce09-66f0-5b6f-5b56-83c7dcd10e3b,,02.09.20 07:28,02.09.20 07:28,40002,UTC+0200,0CI+8SNNP+,com.sec.android.app.shealth,2020-09-02 02:58:00.000,f12df4f9-97aa-6a4b-dd17-27f2d70b8c06


In [4]:
sleep_data.drop(['custom','sleep_id','deviceuuid','pkg_name','datauuid'],axis=1, inplace=True)

In [5]:
sleep_data

Unnamed: 0,start_time,update_time,create_time,stage,time_offset,end_time
0,2020-09-01 23:22:00.000,02.09.20 07:28,02.09.20 07:28,40001,UTC+0200,2020-09-01 23:26:00.000
1,2020-09-01 23:49:00.000,02.09.20 07:28,02.09.20 07:28,40002,UTC+0200,2020-09-02 00:06:00.000
2,2020-09-02 00:42:00.000,02.09.20 07:28,02.09.20 07:28,40004,UTC+0200,2020-09-02 00:49:00.000
3,2020-09-02 01:09:00.000,02.09.20 07:28,02.09.20 07:28,40004,UTC+0200,2020-09-02 01:23:00.000
4,2020-09-02 02:25:00.000,02.09.20 07:28,02.09.20 07:28,40002,UTC+0200,2020-09-02 02:58:00.000
...,...,...,...,...,...,...
12499,2021-02-09 15:14:00.000,09.02.21 17:27,09.02.21 17:27,40004,UTC+0100,2021-02-09 15:20:00.000
12500,2021-02-09 15:20:00.000,09.02.21 17:27,09.02.21 17:27,40002,UTC+0100,2021-02-09 15:26:00.000
12501,2021-02-09 15:26:00.000,09.02.21 17:27,09.02.21 17:27,40003,UTC+0100,2021-02-09 15:55:00.000
12502,2021-02-09 15:55:00.000,09.02.21 17:27,09.02.21 17:27,40002,UTC+0100,2021-02-09 16:00:00.000


In [6]:
#I decide to remove update_time and create_time because they do not look actionable compared to start_time
#and end_time
sleep_data.drop(['update_time','create_time'],axis=1, inplace=True)

In [7]:
sleep_data

Unnamed: 0,start_time,stage,time_offset,end_time
0,2020-09-01 23:22:00.000,40001,UTC+0200,2020-09-01 23:26:00.000
1,2020-09-01 23:49:00.000,40002,UTC+0200,2020-09-02 00:06:00.000
2,2020-09-02 00:42:00.000,40004,UTC+0200,2020-09-02 00:49:00.000
3,2020-09-02 01:09:00.000,40004,UTC+0200,2020-09-02 01:23:00.000
4,2020-09-02 02:25:00.000,40002,UTC+0200,2020-09-02 02:58:00.000
...,...,...,...,...
12499,2021-02-09 15:14:00.000,40004,UTC+0100,2021-02-09 15:20:00.000
12500,2021-02-09 15:20:00.000,40002,UTC+0100,2021-02-09 15:26:00.000
12501,2021-02-09 15:26:00.000,40003,UTC+0100,2021-02-09 15:55:00.000
12502,2021-02-09 15:55:00.000,40002,UTC+0100,2021-02-09 16:00:00.000


| Stage        | Signification           | Description |
| ------------- | :-------------| :------------- |
| 40001      | **Awaken stage of sleep.** | Eyes open. Responsive to external stimuli. |
| 40002      | **Light stage of sleep.** | Breathing slows down and heartbeat becomes regular. Typically lasts between 1 and 20 minutes after falling asleep. |
| 40003 | **Deep stage of sleep.** | Brain waves slow down and become larger. Typically starts 35 - 45 minutes after falling asleep. |
| 40004 | **REM (Rapid Eye Movement) stage of sleep.** | Brain waves similar to waking. Most vivid dreams happen in this stage. Body does not move. |

*https://developer.samsung.com/health/server/partner-only/api-reference/data-types/sleep-stage.html*

In [8]:
#All the data was mixed up. In here I sort the values by start time.
sleep_data = sleep_data.sort_values('start_time').reset_index().drop('index',axis=1)

In [9]:
sleep_data

Unnamed: 0,start_time,stage,time_offset,end_time
0,2020-09-01 23:22:00.000,40001,UTC+0200,2020-09-01 23:26:00.000
1,2020-09-01 23:26:00.000,40003,UTC+0200,2020-09-01 23:29:00.000
2,2020-09-01 23:29:00.000,40002,UTC+0200,2020-09-01 23:30:00.000
3,2020-09-01 23:30:00.000,40003,UTC+0200,2020-09-01 23:33:00.000
4,2020-09-01 23:33:00.000,40002,UTC+0200,2020-09-01 23:43:00.000
...,...,...,...,...
12499,2021-02-09 17:18:00.000,40001,UTC+0100,2021-02-09 17:19:00.000
12500,2021-02-09 17:19:00.000,40002,UTC+0100,2021-02-09 17:20:00.000
12501,2021-02-09 17:20:00.000,40001,UTC+0100,2021-02-09 17:21:00.000
12502,2021-02-09 17:21:00.000,40002,UTC+0100,2021-02-09 17:23:00.000


In [10]:
sleep_data['start_time_date'] = sleep_data['start_time'].apply(lambda x: x.split(' ')[0])
sleep_data['start_time_hour'] = sleep_data['start_time'].apply(lambda x: x.split(' ')[1].split('.')[0])
sleep_data['end_time_date'] = sleep_data['end_time'].apply(lambda x: x.split(' ')[0])
sleep_data['end_time_hour'] = sleep_data['end_time'].apply(lambda x: x.split(' ')[1].split('.')[0])

In [11]:
sleep_data

Unnamed: 0,start_time,stage,time_offset,end_time,start_time_date,start_time_hour,end_time_date,end_time_hour
0,2020-09-01 23:22:00.000,40001,UTC+0200,2020-09-01 23:26:00.000,2020-09-01,23:22:00,2020-09-01,23:26:00
1,2020-09-01 23:26:00.000,40003,UTC+0200,2020-09-01 23:29:00.000,2020-09-01,23:26:00,2020-09-01,23:29:00
2,2020-09-01 23:29:00.000,40002,UTC+0200,2020-09-01 23:30:00.000,2020-09-01,23:29:00,2020-09-01,23:30:00
3,2020-09-01 23:30:00.000,40003,UTC+0200,2020-09-01 23:33:00.000,2020-09-01,23:30:00,2020-09-01,23:33:00
4,2020-09-01 23:33:00.000,40002,UTC+0200,2020-09-01 23:43:00.000,2020-09-01,23:33:00,2020-09-01,23:43:00
...,...,...,...,...,...,...,...,...
12499,2021-02-09 17:18:00.000,40001,UTC+0100,2021-02-09 17:19:00.000,2021-02-09,17:18:00,2021-02-09,17:19:00
12500,2021-02-09 17:19:00.000,40002,UTC+0100,2021-02-09 17:20:00.000,2021-02-09,17:19:00,2021-02-09,17:20:00
12501,2021-02-09 17:20:00.000,40001,UTC+0100,2021-02-09 17:21:00.000,2021-02-09,17:20:00,2021-02-09,17:21:00
12502,2021-02-09 17:21:00.000,40002,UTC+0100,2021-02-09 17:23:00.000,2021-02-09,17:21:00,2021-02-09,17:23:00


In [12]:
def stage_pairing(x):
    if x == 40001:
        x = 'Awaken'
    elif x == 40002:
        x = 'Light'
    elif x == 40003:
        x = 'Deep'
    else:
        x = 'REM'
    return x

sleep_data['stage_of_sleep'] = sleep_data['stage'].apply(lambda x: stage_pairing(x))

In [13]:
sleep_data

Unnamed: 0,start_time,stage,time_offset,end_time,start_time_date,start_time_hour,end_time_date,end_time_hour,stage_of_sleep
0,2020-09-01 23:22:00.000,40001,UTC+0200,2020-09-01 23:26:00.000,2020-09-01,23:22:00,2020-09-01,23:26:00,Awaken
1,2020-09-01 23:26:00.000,40003,UTC+0200,2020-09-01 23:29:00.000,2020-09-01,23:26:00,2020-09-01,23:29:00,Deep
2,2020-09-01 23:29:00.000,40002,UTC+0200,2020-09-01 23:30:00.000,2020-09-01,23:29:00,2020-09-01,23:30:00,Light
3,2020-09-01 23:30:00.000,40003,UTC+0200,2020-09-01 23:33:00.000,2020-09-01,23:30:00,2020-09-01,23:33:00,Deep
4,2020-09-01 23:33:00.000,40002,UTC+0200,2020-09-01 23:43:00.000,2020-09-01,23:33:00,2020-09-01,23:43:00,Light
...,...,...,...,...,...,...,...,...,...
12499,2021-02-09 17:18:00.000,40001,UTC+0100,2021-02-09 17:19:00.000,2021-02-09,17:18:00,2021-02-09,17:19:00,Awaken
12500,2021-02-09 17:19:00.000,40002,UTC+0100,2021-02-09 17:20:00.000,2021-02-09,17:19:00,2021-02-09,17:20:00,Light
12501,2021-02-09 17:20:00.000,40001,UTC+0100,2021-02-09 17:21:00.000,2021-02-09,17:20:00,2021-02-09,17:21:00,Awaken
12502,2021-02-09 17:21:00.000,40002,UTC+0100,2021-02-09 17:23:00.000,2021-02-09,17:21:00,2021-02-09,17:23:00,Light


In [14]:
sleep_data.drop(['start_time','stage','time_offset','end_time'], axis=1, inplace=True)

In [15]:
sleep_data.head()

Unnamed: 0,start_time_date,start_time_hour,end_time_date,end_time_hour,stage_of_sleep
0,2020-09-01,23:22:00,2020-09-01,23:26:00,Awaken
1,2020-09-01,23:26:00,2020-09-01,23:29:00,Deep
2,2020-09-01,23:29:00,2020-09-01,23:30:00,Light
3,2020-09-01,23:30:00,2020-09-01,23:33:00,Deep
4,2020-09-01,23:33:00,2020-09-01,23:43:00,Light


In [16]:
#Dates and hours are strings. I need to find a way to translate them into time values. I want to create a new
#column "Duration" to calculate the duration of each sleeping phase.

In [17]:
sleep_data['Duration'] = 0

for i in range(0,len(sleep_data)):
    sleep_data['Duration'].loc[i] = str((datetime.datetime.strptime(sleep_data['end_time_hour'].loc[i], '%H:%M:%S') - datetime.datetime.strptime(sleep_data['start_time_hour'].loc[i], '%H:%M:%S')))
    if sleep_data['Duration'].loc[i].find('day')!=-1:
        sleep_data['Duration'].loc[i] = sleep_data['Duration'].loc[i].split(', ')[1]
sleep_data['Duration'] = pd.to_timedelta(sleep_data['Duration'],errors='ignore')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_with_indexer(indexer, value)


In [18]:
sleep_data.head()

Unnamed: 0,start_time_date,start_time_hour,end_time_date,end_time_hour,stage_of_sleep,Duration
0,2020-09-01,23:22:00,2020-09-01,23:26:00,Awaken,00:04:00
1,2020-09-01,23:26:00,2020-09-01,23:29:00,Deep,00:03:00
2,2020-09-01,23:29:00,2020-09-01,23:30:00,Light,00:01:00
3,2020-09-01,23:30:00,2020-09-01,23:33:00,Deep,00:03:00
4,2020-09-01,23:33:00,2020-09-01,23:43:00,Light,00:10:00


In [19]:
sleep_data['start_time_date_reworked']=0
for i in range(0, len(sleep_data)):
    if sleep_data['start_time_hour'].loc[i]>'21:00:00':
        sleep_data['start_time_date_reworked'].loc[i] = datetime.datetime.strptime(sleep_data['start_time_date'].loc[i], '%Y-%m-%d')
    elif sleep_data['start_time_hour'].loc[i] < '13:00:00':
        sleep_data['start_time_date_reworked'].loc[i] = datetime.datetime.strptime(sleep_data['start_time_date'].loc[i], '%Y-%m-%d') - datetime.timedelta(days=1)
    elif sleep_data['start_time_hour'].loc[i] >= '13:00:00' and sleep_data['start_time_hour'].loc[i] <= '21:00:00':
        sleep_data.drop(i,inplace=True)
    else:
        sleep_data['start_time_date_reworked'].loc[i] = datetime.datetime.strptime(sleep_data['start_time_date'].loc[i], '%Y-%m-%d')

In [20]:
pd.set_option('display.max_rows', len(sleep_data))

sleep_data

Unnamed: 0,start_time_date,start_time_hour,end_time_date,end_time_hour,stage_of_sleep,Duration,start_time_date_reworked
0,2020-09-01,23:22:00,2020-09-01,23:26:00,Awaken,00:04:00,2020-09-01 00:00:00
1,2020-09-01,23:26:00,2020-09-01,23:29:00,Deep,00:03:00,2020-09-01 00:00:00
2,2020-09-01,23:29:00,2020-09-01,23:30:00,Light,00:01:00,2020-09-01 00:00:00
3,2020-09-01,23:30:00,2020-09-01,23:33:00,Deep,00:03:00,2020-09-01 00:00:00
4,2020-09-01,23:33:00,2020-09-01,23:43:00,Light,00:10:00,2020-09-01 00:00:00
5,2020-09-01,23:43:00,2020-09-01,23:49:00,Awaken,00:06:00,2020-09-01 00:00:00
6,2020-09-01,23:49:00,2020-09-02,00:06:00,Light,00:17:00,2020-09-01 00:00:00
7,2020-09-02,00:06:00,2020-09-02,00:07:00,Awaken,00:01:00,2020-09-01 00:00:00
8,2020-09-02,00:07:00,2020-09-02,00:21:00,Deep,00:14:00,2020-09-01 00:00:00
9,2020-09-02,00:21:00,2020-09-02,00:33:00,Light,00:12:00,2020-09-01 00:00:00


In [21]:
sleep_data.head()

Unnamed: 0,start_time_date,start_time_hour,end_time_date,end_time_hour,stage_of_sleep,Duration,start_time_date_reworked
0,2020-09-01,23:22:00,2020-09-01,23:26:00,Awaken,00:04:00,2020-09-01 00:00:00
1,2020-09-01,23:26:00,2020-09-01,23:29:00,Deep,00:03:00,2020-09-01 00:00:00
2,2020-09-01,23:29:00,2020-09-01,23:30:00,Light,00:01:00,2020-09-01 00:00:00
3,2020-09-01,23:30:00,2020-09-01,23:33:00,Deep,00:03:00,2020-09-01 00:00:00
4,2020-09-01,23:33:00,2020-09-01,23:43:00,Light,00:10:00,2020-09-01 00:00:00


In [22]:
sleep_data_gb = sleep_data[['start_time_date_reworked','stage_of_sleep','Duration']]

In [23]:
sleep_data_gb.head()

Unnamed: 0,start_time_date_reworked,stage_of_sleep,Duration
0,2020-09-01 00:00:00,Awaken,00:04:00
1,2020-09-01 00:00:00,Deep,00:03:00
2,2020-09-01 00:00:00,Light,00:01:00
3,2020-09-01 00:00:00,Deep,00:03:00
4,2020-09-01 00:00:00,Light,00:10:00


In [24]:
sleep_data_final = sleep_data_gb.groupby(['start_time_date_reworked','stage_of_sleep']).sum().unstack().reset_index()
sleep_data_final.columns = sleep_data_final.columns.get_level_values(1)
sleep_data_final = pd.DataFrame(sleep_data_final)
sleep_data_final.columns = ['Date','Awaken','Deep','Light','REM']
#sleep_data_final.replace('NaT',0,inplace=True)
sleep_data_final.fillna(0,inplace=True)

  


In [25]:
sleep_data_final['Total'] = sleep_data_final['Awaken']+sleep_data_final['Deep']+sleep_data_final['Light']+sleep_data_final['REM']

In [26]:
sleep_data_final.head()

Unnamed: 0,Date,Awaken,Deep,Light,REM,Total
0,2020-09-01,00:45:00,00:42:00,04:52:00,01:46:00,08:05:00
1,2020-09-02,00:55:00,00:47:00,05:28:00,00:57:00,08:07:00
2,2020-09-03,00:32:00,00:49:00,02:52:00,01:34:00,05:47:00
3,2020-09-04,00:27:00,01:04:00,04:40:00,02:40:00,08:51:00
4,2020-09-05,00:38:00,00:46:00,06:05:00,01:07:00,08:36:00


In [27]:
sleep_data_final['Weekend'] = 0
for i in range(0, len(sleep_data_final)):
    if datetime.date.weekday(sleep_data_final['Date'].loc[i]) < 5:
        sleep_data_final['Weekend'].loc[i] = 0
    else:
        sleep_data_final['Weekend'].loc[i] = 1

In [28]:
sleep_data_final.head()

Unnamed: 0,Date,Awaken,Deep,Light,REM,Total,Weekend
0,2020-09-01,00:45:00,00:42:00,04:52:00,01:46:00,08:05:00,0
1,2020-09-02,00:55:00,00:47:00,05:28:00,00:57:00,08:07:00,0
2,2020-09-03,00:32:00,00:49:00,02:52:00,01:34:00,05:47:00,0
3,2020-09-04,00:27:00,01:04:00,04:40:00,02:40:00,08:51:00,0
4,2020-09-05,00:38:00,00:46:00,06:05:00,01:07:00,08:36:00,1


In [29]:
sleep_data_final['Total'].min()

Timedelta('0 days 04:30:00')

In [30]:
sleep_data_final['Total'].max()

Timedelta('0 days 11:02:00')

In [31]:
sleep_data_final[sleep_data_final['Weekend']==0]['Total'].mean()

Timedelta('0 days 07:45:09.913043')

In [32]:
sleep_data_final[sleep_data_final['Weekend']==1]['Total'].mean()

Timedelta('0 days 07:33:24')

In [33]:
sleep_data_final['Awaken'].mean()

Timedelta('0 days 00:49:31.125000')

In [34]:
sleep_data_final['Deep'].mean()

Timedelta('0 days 00:41:52.125000')

In [35]:
sleep_data_final['Light'].mean()

Timedelta('0 days 04:58:55.125000')

In [36]:
sleep_data_final['REM'].mean()

Timedelta('0 days 01:11:33')