In [4]:
import pandas as pd
import json
from collections import Counter

filenames = ['P_ALQ.csv', 'P_DPQ.csv', 'P_SLQ.csv', 'P_SMQ.csv']
filedirectory = '../../data/2017_2020_NHANES/'

# read files and merge them according to SEQN
df = pd.read_csv(filedirectory + filenames[0])
for filename in filenames[1:]:
    df = pd.merge(df, pd.read_csv(filedirectory + filename), on='SEQN')

# SLD012 - Sleep hours - weekdays or workdays
# SLD013 - Sleep hours - weekends
# ALQ121 - Past 12 mo how often drink alcoholic beverages
# DPQ020 - Feeling down, depressed, or hopeless
# DPQ030 - Trouble sleeping or sleeping too much
# DPQ060 - Feeling bad about yourself
# SMQ040 - Do you now smoke cigarettes?
studyColumns = ['SLD012', 'SLD013', 'ALQ121', 'DPQ020', 'DPQ030', 'DPQ060', 'SMQ040']
df = df.dropna(subset=studyColumns) # drop rows with missing values

print("Data size: ",df.shape)

df.head()

Data size:  (3266, 45)


Unnamed: 0,SEQN,ALQ111,ALQ121,ALQ130,ALQ142,ALQ270,ALQ280,ALQ290,ALQ151,ALQ170,...,SMD057,SMQ078,SMD641,SMD650,SMD100FL,SMD100MN,SMQ670,SMQ621,SMD630,SMAQUEX2
1,109271.0,1.0,0.0,,,,,,1.0,,...,,2.0,30.0,20.0,,,1.0,,,1.0
2,109273.0,1.0,0.0,,,,,,2.0,,...,,1.0,30.0,15.0,1.0,1.0,1.0,,,1.0
4,109282.0,1.0,0.0,,,,,,2.0,,...,10.0,,,,,,,,,1.0
17,109307.0,1.0,9.0,1.0,0.0,,,,2.0,0.0,...,5.0,,,,,,,,,1.0
22,109317.0,1.0,3.0,3.0,4.0,5.0,6.0,8.0,2.0,10.0,...,3.0,,,,,,,,,1.0


In [7]:
# export to csv
df.to_csv('../../data/2017_2020_NHANES/merged_data.csv', index=False)

# calculate the number of each sleep duration on weekdays and weekends
# Avg hour = (5*SLD012 + 2*SLD013) / 7
def round_to_nearest_half(x):
    return round(x * 2) / 2

df['AvgSleep'] = (5*df['SLD012'] + 2*df['SLD013']) / 7
df['AvgSleep'] = df['AvgSleep'].apply(round_to_nearest_half)

# generate map for average sleep duration and store in json

avgSleepMap = Counter(df['AvgSleep'])
avgSleepMap = dict(sorted(avgSleepMap.items()))
avgSleepList = [{'hour': hour, 'count': count} for hour, count in avgSleepMap.items()]
dataDict = {'records': avgSleepList}
with open('../../data/2017_2020_NHANES/avgSleepMap.json', 'w') as f:
    json.dump(dataDict, f)
    