In [1]:
import os
import numpy as np
import pandas as pd

In [2]:
import json
from datetime import datetime

stress_path = '../MentalHealth/dataset/dataset/EMA/response/Stress/'
stress = []

for user_file in os.listdir(stress_path):
    if not user_file.endswith('.json'):
        continue

    uid = user_file.split('_')[1].split('.')[0]
    with open(os.path.join(stress_path, user_file), 'r') as f:
        data = json.load(f)

    for entry in data:
        if 'level' in entry and 'resp_time' in entry:
            score = int(entry['level'])
            time = int(entry['resp_time'])
            date = datetime.fromtimestamp(time).date()

            stress.append({
                'uid': uid, 
                'stress_level': score, 
                'time': time, 
                'date': date
            })

df_stress = pd.DataFrame(stress)
df_stress = df_stress.sort_values('uid').reset_index(drop=True)

In [3]:
from datetime import timedelta

pl_path = '../MentalHealth/dataset/dataset/sensing/phonelock'
pl_daily = []   

for uid in df_stress['uid'].unique():
    f = f"{pl_path}/phonelock_{uid}.csv"
    if not os.path.exists(f):
        continue

    df = pd.read_csv(f)
    df['uid'] = uid
    df['start'] = pd.to_datetime(df['start'], unit='s')
    df['end']   = pd.to_datetime(df['end'],   unit='s')
    df['duration_min'] = (df['end'] - df['start']).dt.total_seconds() / 60
    df['date']  = df['start'].dt.date
    df['hour']  = df['start'].dt.hour
    df['night'] = df['hour'].between(23, 23) | df['hour'].between(0, 5)

    daily = (
        df.groupby(['uid', 'date'])
          .agg(total_unlocks   = ('duration_min', 'size'),
               total_scr_time  = ('duration_min', 'sum'),
               avg_scr_time    = ('duration_min', 'mean'),
               night_unlocks   = ('night', 'sum'))
          .reset_index()
    )
    pl_daily.append(daily)

df_pl = pd.concat(pl_daily, ignore_index=True)

In [4]:
from datetime import timedelta
import os, pandas as pd

convo_path = '../MentalHealth/dataset/dataset/sensing/conversation'
convo_daily = []

for uid in df_stress['uid'].unique():                      
    f = f"{convo_path}/conversation_{uid}.csv"
    if not os.path.exists(f):
        continue

    df = pd.read_csv(f)

    # ----- clean column names -----
    df.columns = df.columns.str.strip()                    
    df = df.dropna(subset=['start_timestamp', 'end_timestamp'])

    df['uid']   = uid
    df['start'] = pd.to_datetime(df['start_timestamp'], unit='s')
    df['end']   = pd.to_datetime(df['end_timestamp'],   unit='s')
    df['duration_min'] = (df['end'] - df['start']).dt.total_seconds() / 60
    df['date']  = df['start'].dt.date
    df['hour']  = df['start'].dt.hour
    df['night'] = df['hour'].between(23, 23) | df['hour'].between(0, 5)

    daily = (
        df.groupby(['uid', 'date'])
          .agg(total_convos      = ('duration_min', 'size'),
               total_convo_time  = ('duration_min', 'sum'),
               avg_convo_time    = ('duration_min', 'mean'),
               night_convos      = ('night', 'sum'))
          .reset_index()
    )
    convo_daily.append(daily)

df_convo = pd.concat(convo_daily, ignore_index=True)

In [5]:
dark_path = '../MentalHealth/dataset/dataset/sensing/dark'
dark_daily = []

for uid in df_stress['uid'].unique():
    user_file = f"{dark_path}/dark_{uid}.csv"
    if not os.path.exists(user_file):
        continue

    df = pd.read_csv(user_file)
    df['uid'] = uid
    df['start'] = pd.to_datetime(df['start'], unit='s')
    df['end'] = pd.to_datetime(df['end'], unit='s')
    df['duration_min'] = (df['end'] - df['start']).dt.total_seconds() / 60
    df['date'] = df['start'].dt.date
    df['hour'] = df['start'].dt.hour
    df['night'] = df['hour'].apply(lambda h: h >= 23 or h < 6)

    daily = (
        df.groupby(['uid', 'date'])
          .agg(total_dark=('duration_min', 'size'),
               total_dark_time=('duration_min', 'sum'),
               avg_dark_time=('duration_min', 'mean'),
               night_dark=('night', 'sum'))
          .reset_index()
    )
    
    dark_daily.append(daily)

df_dark = pd.concat(dark_daily, ignore_index=True)

In [6]:
from functools import reduce

dfs = [df_stress, df_pl, df_convo, df_dark]
df_final = reduce(lambda left, right: pd.merge(left, right, on=['uid', 'date'], how='inner'), dfs)
df_final.to_csv('stress_levels.csv', index=False)