# Capstone Project

Health and Nutrition Data from CDC and NHANES

In [None]:
import pandas as pd     # Import pandas for better viewing of Dataframes

In [None]:
pwd                     # Useful for loading data

'c:\\Users\\srnwn\\Documents\\neueFische\\Capstone\\scripts'

In [4]:
demo = pd.read_csv("../data/NHANES_2013-2014/demographics.csv")
demo_var = pd.read_excel("../data/NHANES_2013-2014/demographics_variables.xlsx").iloc[:,0:2]
diet = pd.read_csv("../data/NHANES_2013-2014/dietary.csv")
diet_var = pd.read_excel("../data/NHANES_2013-2014/dietary_variables.xlsx").iloc[:,0:2]
exam = pd.read_csv("../data/NHANES_2013-2014/examination.csv")
exam_var = pd.read_excel("../data/NHANES_2013-2014/examination_variables.xlsx")
lab = pd.read_csv("../data/NHANES_2013-2014/laboratory.csv")
lab_var = pd.read_excel("../data/NHANES_2013-2014/laboratory_variables.xlsx")
quest = pd.read_csv("../data/NHANES_2013-2014/questionnaire.csv")
quest_var = pd.read_excel("../data/NHANES_2013-2014/questionnaire_variables.xlsx")


# Data Clean-up

## Demographic

We want only certain variables! Filter for those :)

In [None]:
demo_clean = demo.loc[:,[
    "SEQN",                 # ID
    "DMDBORN4",             # Country of Birth
    "DMDCITZN",             # US-Citizenship
    "DMDEDUC2",             # Highschool Grad?
    "DMDEDUC3",             # College Degree?
    "DMDHHSIZ",             # Household Size
    "DMDHHSZA",             # No of Kids < 6 yr in Household?
    "DMDHHSZB",             # No of Kids between 6 and 17
    "RIAGENDR",             # Gender
    "INDFMIN2",             # Total family Income
    "RIDEXPRG",             # Pregnancy status
    "RIDAGEYR"              # Age
    ]]

In [19]:
demo_clean.head()

Unnamed: 0,SEQN,DMDBORN4,DMDCITZN,DMDEDUC2,DMDEDUC3,DMDHHSIZ,DMDHHSZA,DMDHHSZB,RIAGENDR,INDFMIN2,RIDEXPRG,RIDAGEYR
0,73557.0,1.0,1.0,3.0,,3.0,0.0,0.0,1.0,4.0,,69.0
1,73558.0,1.0,1.0,3.0,,4.0,0.0,2.0,1.0,7.0,,54.0
2,73559.0,1.0,1.0,4.0,,2.0,0.0,0.0,1.0,10.0,,72.0
3,73560.0,1.0,1.0,,3.0,4.0,0.0,2.0,1.0,9.0,,9.0
4,73561.0,1.0,1.0,5.0,,2.0,0.0,0.0,2.0,15.0,,73.0


## Dietary Data Filtering

In [24]:
diet_clean = diet.loc[:,[
    'SEQN',                 # ID
    'DR1TKCAL',             # Total Calories
    'DR1TPROT',             # Total Protein
    'DR1TCARB',             # Total Carbohydrates
    'DR1TSUGR',             # Total Sugar
    'DR1TTFAT',             # Total Fat
    'DR1TMFAT',             # MUFA
    'DR1TPFAT',             # PUFA
    'DR1TCHOL',             # Cholesterin
    'DR1TFIBE',             # Total Fiber
    'DR1TALCO',             # Total Alcohol
    'DR1TCALC',             # Calcium
    'DR1TIRON',             # Iron
    'DR1TVB12',             # Vitamin B12
    'DR1TVB6',              # Vitamin B6
    'DR1TVB1',              # Vit B1
    'DR1TVB2',              # Vit B2
    'DR1TVARA',             # Vit A
    'DR1TVC',               # Vit C
    'DR1TVD',               # Vit D
    'DR1TFA',               # Folic Acid
    'DR1TNIAC',             # Niacin
    'DR1TZINC',             # Zinc
    'DR1TSODI',             # Sodium
    'DR1TPOTA',             # Potassium
    'DR1TMOIS'              # Watercontent
    ]]

## Examination Data Filtering

In [None]:
exam_clean = exam.loc[:,[
    'SEQN',                 # ID
    'BMXWT',                # Weight
    'BMXHT',                # Height
    'BMXBMI',               # BMI
    'BMXWAIST',             # Waist circumference
    'BPXSY1',               # Systolic BP
    'BPXDI1',               # Diastolic BP
    'BPXPULS'               # Pulse
    ]]

## Questionaire Data Filtering

In [None]:
quest_clean = quest.loc[:,[
    'SEQN',                 # ID
    'PAQ620',               # Work with physical activity?
    'PAD680',               # Time spent sitting
    'PAD675',               # Time spent doing physical activities / day
    'PAQ640',               # Days on which you bike or walk more than 10 mins/week
    'MCQ010',               # Asthma?
    'MCQ080',               # Has a Doctor ever said: "you are overweight!" ?
    'MCQ220',               # Has a Doctor ever said: "You have cancer" ?
    'SMQ040',               # Do you smoke?
    'ALQ130',               # On avg how many drinks on days you drank alcohol?
    'SLQ050'                # Ever told a doctor: Trouble sleeping?           
    ]]

## Laboratory Data Filtering

In [None]:
lab_clean = lab.loc[:,[
    "SEQN",             # ID
    "LBXGLU",           # Glucose in Blood mg/dL
    "LBDINSI",          # Insulin pmol/L
    "LBXTC",            # Cholesterol mg/dL
    "LBXTR",            # Triglyceride mg/dL
    "LBDLDL",           # LDL-cholesterol mg/dL
#   "LBXSAL",           # Albumin
#   "LBXSATSI",         # Alanine Aminotransferase ALT U/L
    "LBXSGTSI",         # Gamma glutamyl transferase
    "LBXSAPSI",         # Alkaline phosphatase (IU/L)
    "LBXVIDMS",         # vitamin D2 + D3
    "URXUCR",           # Urinary creatinine (mg/dL)
    "URXUMA",           # Albumin, urine (ug/mL)
    "URDACT"            # Albumin creatinine ratio (mg/g)
]]


In [62]:
mapping = dict(zip(demo_var["Variable Name"], demo_var["Variable Description"]))

demo_clean.rename(columns=mapping, inplace = True)

In [63]:
demo_clean

Unnamed: 0,Respondent sequence number.,In what country {were you/was SP} born?,{Are you/Is SP} a citizen of the United States? [Information about citizenship is being collected by the U.S. Public Health Service to perform health related research. Providing this information is voluntary and is collected under the authority of the Public Health Service Act. There will be no effect on pending immigration or citizenship petitions.],What is the highest grade or level of school {you have/SP has} completed or the highest degree {you have/s/he has} received?,What is the highest grade or level of school {you have/SP has} completed or the highest degree {you have/s/he has} received?.1,Total number of people in the Household,Gender of the participant.,Total family income (reported as a range value in dollars),Pregnancy status for females between 20 and 44 years of age at the time of MEC exam.
0,73557.0,1.0,1.0,3.0,,3.0,1.0,4.0,
1,73558.0,1.0,1.0,3.0,,4.0,1.0,7.0,
2,73559.0,1.0,1.0,4.0,,2.0,1.0,10.0,
3,73560.0,1.0,1.0,,3.0,4.0,1.0,9.0,
4,73561.0,1.0,1.0,5.0,,2.0,2.0,15.0,
...,...,...,...,...,...,...,...,...,...
10170,83727.0,1.0,1.0,5.0,,5.0,1.0,77.0,
10171,83728.0,1.0,1.0,,,4.0,2.0,8.0,
10172,83729.0,2.0,1.0,5.0,,1.0,2.0,7.0,2.0
10173,83730.0,1.0,1.0,,0.0,4.0,1.0,6.0,
