In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')

%matplotlib inline

In [None]:
import os

In [None]:
dataset = %env WORKSPACE_CDR

In [None]:
import subprocess

name_of_file_in_bucket = 'n3c_aou_cohort.csv'


# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file from the bucket to the current working space
os.system(f"gsutil cp '{my_bucket}/data/{name_of_file_in_bucket}' .")

print(f'[INFO] {name_of_file_in_bucket} is successfully downloaded into your working space')
# save dataframe in a csv file in the same workspace as the notebook
cohort = pd.read_csv(name_of_file_in_bucket)
cohort.head()


In [None]:
categories = ['Activity summary', 'Minulte-level heart rate', 'Heart rate summary', 'Steps intraday']
tables = ['activity_summary', 'heart_rate_minute_level', 'heart_rate_summary', 'steps_intraday', 'sleep_level', 'sleep_daily_summary']

In [None]:
#cohort = pd.read_csv('n3c_aou_cohort.csv')

In [None]:
import subprocess

name_of_file_in_bucket = 'n3c_aou_cohort_ft.csv'


# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file from the bucket to the current working space
os.system(f"gsutil cp '{my_bucket}/data/{name_of_file_in_bucket}' .")

print(f'[INFO] {name_of_file_in_bucket} is successfully downloaded into your working space')
# save dataframe in a csv file in the same workspace as the notebook
cohort_with_covid_dt = pd.read_csv(name_of_file_in_bucket)
cohort_with_covid_dt.head()


In [None]:
cohort.shape

In [None]:
cohort_with_covid_dt.shape

In [None]:
cohort_with_covid_dt = cohort_with_covid_dt.merge(cohort[['person_id']], left_on='person_id', right_on='person_id',
          suffixes=('_left', '_right'))

In [None]:
cohort_with_covid_dt.shape

In [None]:
min(cohort_with_covid_dt['min_covid_dt'])

In [None]:
people = tuple(cohort['person_id'])

In [None]:
people_50 = tuple(list(cohort['person_id'])[0:50])

In [None]:
len(people_50)

In [None]:
steps_summary_sql = f"""select * from `{dataset}`.activity_summary where person_id in {people} """

In [None]:
steps_summary_df = pd.read_gbq(steps_summary_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
steps_summary_df.head(10)

In [None]:
steps_summary_df['date']= pd.to_datetime(steps_summary_df['date'])

cohort_with_covid_dt['min_covid_dt'] = pd.to_datetime(cohort_with_covid_dt['min_covid_dt'])

In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
from statistics import median

steps_stat = []

for index, row in cohort_with_covid_dt.iterrows():
    person_id = row['person_id']
    min_covid_dt = row['min_covid_dt']
    
    min_covid_dt_minus_7 = min_covid_dt - relativedelta(days=7)
    min_covid_dt_plus_28 = min_covid_dt + relativedelta(days=28)
    
    p_data = steps_summary_df[steps_summary_df['person_id'] == person_id]
    
    p_data['date']= pd.to_datetime(p_data['date'])
    
    
    if not p_data.empty:
        
        prev_df = p_data[(p_data['date'] < min_covid_dt_minus_7)]
        next_df = p_data[p_data['date'] > min_covid_dt_plus_28]
        
        prev_steps = list(prev_df['steps'])

        prev_steps.sort()

        next_steps = list(next_df['steps'])

        next_steps.sort()

        p_m = 0
        n_m = 0

        if len(prev_steps) > 0:
            p_m = median(prev_steps)

        if len(next_steps) > 0:
            n_m = median(next_steps)


        steps_stat.append([person_id, min_covid_dt, p_m, n_m])

In [None]:
steps_stat_df = pd.DataFrame(steps_stat, columns=['person_id', 'min_covid_dt', 'before_covid_median_steps', 'after_covid_median_steps'])

In [None]:
steps_stat_df

In [None]:
destination_filename = 'steps_median.csv'

# save dataframe in a csv file in the same workspace as the notebook
steps_stat_df.to_csv(destination_filename, index=False)

# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file to the bucket
args = ["gsutil", "cp", f"./{destination_filename}", f"{my_bucket}/data/"]
output = subprocess.run(args, capture_output=True)

# print output from gsutil
output.stderr


In [None]:
#steps_stat_df.to_csv('steps_median.csv', index=False)

In [None]:
hrml_sql = f"""select person_id, date(datetime) as date, avg(heart_rate_value) as average_heart_rate from `{dataset}`.heart_rate_minute_level where person_id in {people} group by 1,2"""

In [None]:
hrml_df = pd.read_gbq(hrml_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
hrml_df.head()

In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
from statistics import median

heart_rate_minute_level_stat = []

for index, row in cohort_with_covid_dt.iterrows():
    person_id = row['person_id']
    min_covid_dt = row['min_covid_dt']
      
    min_covid_dt_minus_7 = min_covid_dt - relativedelta(days=7)
    min_covid_dt_plus_28 = min_covid_dt + relativedelta(days=28)
    
    p_data = hrml_df[hrml_df['person_id'] == person_id]
    
    p_data['date']= pd.to_datetime(p_data['date'])
    
    
    if not p_data.empty:
        
        prev_df = p_data[p_data['date'] < min_covid_dt_minus_7]
        next_df = p_data[p_data['date'] > min_covid_dt_plus_28]
        prev_hr = list(prev_df['average_heart_rate'])

        prev_hr.sort()

        next_hr = list(next_df['average_heart_rate'])

        next_hr.sort()

        p_m = 0
        n_m = 0

        if len(prev_hr) > 0:
            p_m = median(prev_hr)

        if len(next_hr) > 0:
            n_m = median(next_hr)


        heart_rate_minute_level_stat.append([person_id, min_covid_dt, p_m, n_m])

In [None]:
heart_rate_minute_level_df = pd.DataFrame(heart_rate_minute_level_stat, columns=['person_id', 'min_covid_dt', 'before_covid_avg_heart_rate', 'after_covid_avg_heart_rate'])

In [None]:
destination_filename = 'avg_heart_rate_median.csv'

# save dataframe in a csv file in the same workspace as the notebook
heart_rate_minute_level_df.to_csv(destination_filename, index=False)

# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file to the bucket
args = ["gsutil", "cp", f"./{destination_filename}", f"{my_bucket}/data/"]
output = subprocess.run(args, capture_output=True)

# print output from gsutil
output.stderr


#heart_rate_minute_level_df.to_csv('avg_heart_rate_median.csv', index=False)

In [None]:
hr_summary_sql = f"""select * from `{dataset}`.heart_rate_summary where person_id in {people} """

In [None]:
hr_summary_df = pd.read_gbq(hr_summary_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
hr_summary_df.head()

In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
from statistics import median

heart_rate_min_max_stat = []

for index, row in cohort_with_covid_dt.iterrows():
    person_id = row['person_id']
    min_covid_dt = row['min_covid_dt']
     
    min_covid_dt_minus_7 = min_covid_dt - relativedelta(days=7)
    min_covid_dt_plus_28 = min_covid_dt + relativedelta(days=28)
    
    p_data = hr_summary_df[hr_summary_df['person_id'] == person_id]
    
    p_data['date']= pd.to_datetime(p_data['date'])
    
    
    if not p_data.empty:
        
        prev_df = p_data[p_data['date'] < min_covid_dt_minus_7]
        next_df = p_data[p_data['date'] > min_covid_dt_plus_28 ]
        
        prev_min_hr = list(prev_df['min_heart_rate'])
        prev_max_hr = list(prev_df['max_heart_rate'])

        prev_min_hr.sort()
        prev_max_hr.sort()

        next_min_hr = list(next_df['min_heart_rate'])
        next_max_hr = list(next_df['max_heart_rate'])

        next_min_hr.sort()
        next_max_hr.sort()

        p_m = 0
        n_m = 0

        if len(prev_min_hr) > 0:
            p_min_m = median(prev_min_hr)
            
        if len(prev_max_hr) > 0:
            p_max_m = median(prev_max_hr)

        if len(next_min_hr) > 0:
            n_min_m = median(next_min_hr)
            
        if len(next_max_hr) > 0:
            n_max_m = median(next_max_hr)    


        heart_rate_min_max_stat.append([person_id, min_covid_dt, p_min_m, p_max_m, n_min_m, n_max_m])

In [None]:
heart_rate_min_max_df = pd.DataFrame(heart_rate_min_max_stat, columns=['person_id', 'min_covid_dt', 'before_covid_min_heart_rate', 'before_covid_max_heart_rate', 'after_covid_min_heart_rate', 'after_covid_max_heart_rate'])

In [None]:
heart_rate_min_max_df

In [None]:
destination_filename = 'heart_rate_min_max_median.csv'

# save dataframe in a csv file in the same workspace as the notebook
heart_rate_min_max_df.to_csv(destination_filename, index=False)

# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file to the bucket
args = ["gsutil", "cp", f"./{destination_filename}", f"{my_bucket}/data/"]
output = subprocess.run(args, capture_output=True)


#heart_rate_min_max_df.to_csv('heart_rate_min_max_median.csv', index=False)

In [None]:
sleep_level_sql = f"""select * from `{dataset}`.sleep_level where person_id in {people}"""

In [None]:
sleep_level_df = pd.read_gbq(sleep_level_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
sleep_level_df.head()

In [None]:
sleep_summary_sql = f"""select * from `{dataset}`.sleep_daily_summary where person_id in {people} """

In [None]:
sleep_summary_df = pd.read_gbq(sleep_summary_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
sleep_summary_df.head()

In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
from statistics import median

sleep_summary_stat = []

for index, row in cohort_with_covid_dt.iterrows():
    person_id = row['person_id']
    min_covid_dt = row['min_covid_dt']
    
    min_covid_dt_minus_7 = min_covid_dt - relativedelta(days=7)
    min_covid_dt_plus_28 = min_covid_dt + relativedelta(days=28)
    
    p_data = sleep_summary_df[sleep_summary_df['person_id'] == person_id]
    
    p_data['sleep_date']= pd.to_datetime(p_data['sleep_date'])
    
    
    if not p_data.empty:
        
        prev_df = p_data[p_data['sleep_date'] < min_covid_dt_minus_7]
        next_df = p_data[p_data['sleep_date'] > min_covid_dt_plus_28]
        
        prev_hr = list(prev_df['minute_asleep'])

        prev_hr.sort()

        next_hr = list(next_df['minute_asleep'])

        next_hr.sort()

        p_m = 0
        n_m = 0

        if len(prev_hr) > 0:
            p_m = median(prev_hr)

        if len(next_hr) > 0:
            n_m = median(next_hr)


        sleep_summary_stat.append([person_id, min_covid_dt, p_m, n_m])

In [None]:
sleep_summary_stat_df = pd.DataFrame(sleep_summary_stat, columns=['person_id', 'min_covid_dt', 'before_covid_median_minutes_asleep', 'after_covid_median_minutes_asleep'])

In [None]:
destination_filename = 'sleep_summary_stat_df.csv'

# save dataframe in a csv file in the same workspace as the notebook
sleep_summary_stat_df.to_csv(destination_filename, index=False)

# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file to the bucket
args = ["gsutil", "cp", f"./{destination_filename}", f"{my_bucket}/data/"]
output = subprocess.run(args, capture_output=True)

#sleep_summary_stat_df.to_csv('sleep_summary_stat_df.csv', index=False)

In [None]:
sleep_summary_df[sleep_summary_df['person_id'] == 2114885]

In [None]:
fitbit_wore_time_sql = f"""SELECT person_id, date, SUM(has_hour) AS hours_with_fitbit FROM (SELECT person_id, CAST(datetime AS DATE) AS date, IF(SUM(steps)>0, 1, 0) AS has_hour FROM `{dataset}`.steps_intraday where person_id in {people} GROUP BY CAST(datetime AS DATE), EXTRACT(HOUR FROM datetime), person_id) t GROUP BY date, person_id"""

fitbit_wore_time_df = pd.read_gbq(fitbit_wore_time_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
fitbit_wore_time_df.head()

In [None]:
cohort_with_covid_dt['min_covid_dt']= pd.to_datetime(cohort_with_covid_dt['min_covid_dt'])

In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
from statistics import median

wear_time_stat = []

for index, row in cohort_with_covid_dt.iterrows():
    person_id = row['person_id']
    min_covid_dt = row['min_covid_dt']
    
    min_covid_dt_minus_7 = min_covid_dt - relativedelta(days=7)
    min_covid_dt_plus_28 = min_covid_dt + relativedelta(days=28)
    
    p_data = fitbit_wore_time_df[fitbit_wore_time_df['person_id'] == person_id]
    
    p_data['date']= pd.to_datetime(p_data['date'])
    
    
    if not p_data.empty:
        
        prev_df = p_data[(p_data['date'] < min_covid_dt_minus_7)]
        next_df = p_data[p_data['date'] > min_covid_dt_plus_28]
        
        prev_steps = list(prev_df['hours_with_fitbit'])

        prev_steps.sort()

        next_steps = list(next_df['hours_with_fitbit'])

        next_steps.sort()

        p_m = 0
        n_m = 0

        if len(prev_steps) > 0:
            p_m = median(prev_steps)

        if len(next_steps) > 0:
            n_m = median(next_steps)


        wear_time_stat.append([person_id, min_covid_dt, p_m, n_m])

In [None]:
wear_time_median_df = pd.DataFrame(wear_time_stat, columns=['person_id', 'min_covid_dt', 'before_covid_median_wear_time', 'after_covid_median_wear_time'])

In [None]:
destination_filename = 'median_weartime_before_after_longcovid.csv'

# save dataframe in a csv file in the same workspace as the notebook
wear_time_median_df.to_csv(destination_filename, index=False)

# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file to the bucket
args = ["gsutil", "cp", f"./{destination_filename}", f"{my_bucket}/data/"]
output = subprocess.run(args, capture_output=True)

#wear_time_median_df.to_csv('median_weartime_before_after_longcovid.csv', index=False)

In [None]:
destination_filename = 'fitbit_wore_time_df.csv'

# save dataframe in a csv file in the same workspace as the notebook
fitbit_wore_time_df.to_csv(destination_filename, index=False)

# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file to the bucket
args = ["gsutil", "cp", f"./{destination_filename}", f"{my_bucket}/data/"]
output = subprocess.run(args, capture_output=True)

#fitbit_wore_time_df.to_csv('fitbit_wore_time_df.csv', index=False)

In [None]:
fitbit_wore_time_df.shape

In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
from statistics import median

fitbit_weartime_stat = []

for index, row in cohort_with_covid_dt.iterrows():
    person_id = row['person_id']
    min_covid_dt = row['min_covid_dt']
    
    
    p_data = fitbit_wore_time_df[fitbit_wore_time_df['person_id'] == person_id]
    
    p_data['date']= pd.to_datetime(p_data['date'])
    
    
    if not p_data.empty:
        
        avg_weartime = list(p_data['hours_with_fitbit'])
        avg_wt = round(sum(avg_weartime)/len(avg_weartime), 2)


        fitbit_wore_time_df.append([person_id, min_covid_dt, avg_wt])

In [None]:
fitbit_wore_time_stat_df = pd.DataFrame(fitbit_wore_time_df, columns=['person_id', 'min_covid_dt', 'avg_weartime'])

In [None]:
fitbit_wore_time_stat_df.head()

In [None]:
fitbit_wore_time_sql = f"""SELECT person_id, AVG(hours_with_fitbit) as average_weartime from
(SELECT person_id, date, SUM(has_hour) AS hours_with_fitbit FROM (SELECT person_id, CAST(datetime AS DATE) AS date, 
IF(SUM(steps)>0, 1, 0) AS has_hour FROM `{dataset}`.steps_intraday where person_id in {people} GROUP BY CAST(datetime AS DATE), EXTRACT(HOUR FROM datetime), person_id) t GROUP BY date, person_id)
GROUP BY person_id"""

fitbit_wore_time_df = pd.read_gbq(fitbit_wore_time_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
fitbit_wore_time_df.head()

In [None]:
destination_filename = 'fitbit_avg_weartime.csv'

# save dataframe in a csv file in the same workspace as the notebook
fitbit_wore_time_df.to_csv(destination_filename, index=False)

# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file to the bucket
args = ["gsutil", "cp", f"./{destination_filename}", f"{my_bucket}/data/"]
output = subprocess.run(args, capture_output=True)

#fitbit_wore_time_df.to_csv('fitbit_avg_weartime.csv', index=False)

In [None]:
categories = ['Activity summary', 'Minulte-level heart rate', 'Heart rate summary', 'Steps intraday']
tables = ['activity_summary', 'heart_rate_minute_level', 'heart_rate_summary', 'steps_intraday', 'sleep_level', 'sleep_daily_summary']

In [None]:
expl_sql = f"""SELECT * from `{dataset}`.activity_summary limit 1000"""

expl_df = pd.read_gbq(expl_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
expl_df

In [None]:
expl_sql = f"""SELECT * from `{dataset}`.heart_rate_summary limit 1000"""

expl_df = pd.read_gbq(expl_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
expl_df.head()

In [None]:
expl_sql = f"""SELECT *
from `{dataset}`.heart_rate_minute_level where person_id in {people} limit 1000"""

expl_df = pd.read_gbq(expl_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
expl_df.head()

In [None]:
heart_rate_minute_level_median_sql = f"""SELECT DISTINCT person_id, date(datetime) as date, PERCENTILE_DISC(heart_rate_value, 0.5) OVER (PARTITION BY person_id, date(datetime)) as median_heart_rate
from `{dataset}`.heart_rate_minute_level where person_id in {people} """

heart_rate_minute_level_median_df = pd.read_gbq(heart_rate_minute_level_median_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
heart_rate_minute_level_median_df.shape

In [None]:
heart_rate_minute_level_median_df.head(2)

In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
from statistics import median

hrml_median_stat = []

cohort_with_covid_dt['min_covid_dt'] = pd.to_datetime(cohort_with_covid_dt['min_covid_dt'])

for index, row in cohort_with_covid_dt.iterrows():
    person_id = row['person_id']
    min_covid_dt = row['min_covid_dt']
    
    min_covid_dt_minus_7 = min_covid_dt - relativedelta(days=7)
    min_covid_dt_plus_28 = min_covid_dt + relativedelta(days=28)
    
    p_data = heart_rate_minute_level_median_df[heart_rate_minute_level_median_df['person_id'] == person_id]
    
    p_data['date']= pd.to_datetime(p_data['date'])
    
    
    if not p_data.empty:
        
        prev_df = p_data[(p_data['date'] < min_covid_dt_minus_7)]
        next_df = p_data[p_data['date'] > min_covid_dt_plus_28]
        
        prev_steps = list(prev_df['median_heart_rate'])

        prev_steps.sort()

        next_steps = list(next_df['median_heart_rate'])

        next_steps.sort()

        p_m = 0
        n_m = 0

        if len(prev_steps) > 0:
            p_m = median(prev_steps)

        if len(next_steps) > 0:
            n_m = median(next_steps)


        hrml_median_stat.append([person_id, min_covid_dt, p_m, n_m])

In [None]:
hrml_median_stat_df = pd.DataFrame(hrml_median_stat, columns=['person_id', 'min_covid_dt', 'heart_rate_median_before_covid', 'heart_rate_median_after_covid'])

In [None]:
destination_filename = 'heart_rate_minute_level_median.csv'

# save dataframe in a csv file in the same workspace as the notebook
hrml_median_stat_df.to_csv(destination_filename, index=False)

# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file to the bucket
args = ["gsutil", "cp", f"./{destination_filename}", f"{my_bucket}/data/"]
output = subprocess.run(args, capture_output=True)

#hrml_median_stat_df.to_csv('heart_rate_minute_level_median.csv', index=False)

In [None]:
sleep_summary_sql = f"""SELECT * from `{dataset}`.sleep_daily_summary where person_id in {people} """

sleep_summary_df = pd.read_gbq(sleep_summary_sql, dialect="standard", use_bqstorage_api=("BIGQUERY_STORAGE_API_ENABLED" in os.environ),  progress_bar_type="tqdm_notebook")

In [None]:
sleep_summary_df.head()

In [None]:
from datetime import datetime
from dateutil.relativedelta import relativedelta
from statistics import median

dsm_stat = []

cohort_with_covid_dt['min_covid_dt'] = pd.to_datetime(cohort_with_covid_dt['min_covid_dt'])

sleep_summary_df['minute_asleep'] = sleep_summary_df['minute_asleep'].fillna(0)

for index, row in cohort_with_covid_dt.iterrows():
    person_id = row['person_id']
    min_covid_dt = row['min_covid_dt']
    
    min_covid_dt_minus_7 = min_covid_dt - relativedelta(days=7)
    min_covid_dt_plus_28 = min_covid_dt + relativedelta(days=28)
    
    p_data = sleep_summary_df[sleep_summary_df['person_id'] == person_id]
    
    p_data['sleep_date']= pd.to_datetime(p_data['sleep_date'])
    
    
    if not p_data.empty:
        
        prev_df = p_data[(p_data['sleep_date'] < min_covid_dt_minus_7)]
        next_df = p_data[p_data['sleep_date'] > min_covid_dt_plus_28]
        
        prev_steps = list(prev_df['minute_asleep'])

        prev_steps.sort()

        next_steps = list(next_df['minute_asleep'])

        next_steps.sort()

        p_m = 0
        n_m = 0

        if len(prev_steps) > 0:
            p_m = median(prev_steps)

        if len(next_steps) > 0:
            n_m = median(next_steps)


        dsm_stat.append([person_id, min_covid_dt, p_m, n_m])

In [None]:
dsm_stat_df = pd.DataFrame(dsm_stat, columns=['person_id', 'min_covid_dt', 'asleep_min_median_before_covid', 'asleep_min_median_after_covid'])

In [None]:
destination_filename = 'asleep_minutes_median.csv'

# save dataframe in a csv file in the same workspace as the notebook
dsm_stat_df.to_csv(destination_filename, index=False)

# get the bucket name
my_bucket = os.getenv('WORKSPACE_BUCKET')

# copy csv file to the bucket
args = ["gsutil", "cp", f"./{destination_filename}", f"{my_bucket}/data/"]
output = subprocess.run(args, capture_output=True)

#dsm_stat_df.to_csv('asleep_minutes_median.csv', index=False)

In [None]:
dsm_stat_df