In [1]:
import pandas as pd
import ast
import json

slp_score_data = pd.read_csv("slp_score_data.csv")
sleep_data = pd.read_csv("sleep_data.csv")
wellness_data = pd.read_csv("wellness_data.csv")

In [9]:
all_summaries = []
all_data = []
all_short_data = []

for _,row in sleep_data.iterrows():
    user_id = row['participant_id']
    
    try:
        sleep_record = ast.literal_eval(row['levels'])

        # Summary
        summary_df = pd.DataFrame(sleep_record['summary']).T.reset_index().rename(columns={'index': 'level'})
        summary_df['participant_id'] = user_id
        all_summaries.append(summary_df)

        # Data
        data_df = pd.DataFrame(sleep_record.get('data', []))
        data_df['participant_id'] = user_id
        all_data.append(data_df)

        # ShortData
        if 'shortData' in sleep_record:
            short_df = pd.DataFrame(sleep_record['shortData'])
            short_df['participant_id'] = user_id
            all_short_data.append(short_df)

    except Exception as e:
        print(f"Error processing row {row} for user {user_id}: {e}")

In [11]:
# Step 3: Combine and Save
summary_combined = pd.concat(all_summaries, ignore_index=True)
data_combined = pd.concat(all_data, ignore_index=True)
short_data_combined = pd.concat(all_short_data, ignore_index=True) if all_short_data else pd.DataFrame()


In [13]:
summary_combined

Unnamed: 0,level,count,minutes,thirtyDayAvgMinutes,participant_id
0,deep,2,36,40.0,p01
1,wake,26,52,42.0,p01
2,light,30,259,249.0,p01
3,rem,4,83,55.0,p01
4,deep,2,38,40.0,p01
...,...,...,...,...,...
8098,rem,12,94,86.0,p16
8099,deep,3,79,110.0,p16
8100,wake,32,56,66.0,p16
8101,light,28,219,246.0,p16


In [15]:
data_combined

Unnamed: 0,dateTime,level,seconds,participant_id
0,2019-11-02T00:09:30.000,wake,30,p01
1,2019-11-02T00:10:00.000,light,3570,p01
2,2019-11-02T01:09:30.000,deep,1140,p01
3,2019-11-02T01:28:30.000,light,210,p01
4,2019-11-02T01:32:00.000,rem,930,p01
...,...,...,...,...
49102,2020-03-31T11:59:00.000,wake,450,p16
49103,2020-03-31T12:06:30.000,light,2160,p16
49104,2020-03-31T12:42:30.000,wake,270,p16
49105,2020-03-31T12:47:00.000,light,210,p16


In [17]:
short_data_combined

Unnamed: 0,dateTime,level,seconds,participant_id
0,2019-11-02T00:09:30.000,wake,150,p01
1,2019-11-02T00:15:30.000,wake,150,p01
2,2019-11-02T00:20:30.000,wake,90,p01
3,2019-11-02T00:40:00.000,wake,150,p01
4,2019-11-02T00:57:30.000,wake,30,p01
...,...,...,...,...
44549,2020-03-31T11:27:30.000,wake,30,p16
44550,2020-03-31T11:33:30.000,wake,30,p16
44551,2020-03-31T11:37:00.000,wake,30,p16
44552,2020-03-31T11:40:30.000,wake,60,p16


In [35]:
summary_combined.to_csv('sleep_processed_summary.csv')
data_combined.to_csv('sleep_processed_data.csv')
short_data_combined.to_csv('sleep_processed_shortdata.csv')