In [2]:
import numpy as np
import pandas as pd

In [None]:
# convert XPT files to pandas dataframe
def xpt_to_df(file_path):
    df = pd.read_sas(file_path, format='xport', encoding='utf-8')
    return df

In [10]:
nhanes_variable_mapping = {
    # Demographics and Status
    'SEQN': 'respondent_sequence_number',
    'SDDSRVYR': 'data_release_cycle', 
    'RIDSTATR': 'interview_examination_status',
    'RIAGENDR': 'gender',
    'RIDAGEYR': 'age_years_screening',
    'RIDAGEMN': 'age_months_screening_0_to_24',
    'RIDRETH1': 'race_hispanic_origin',
    'RIDRETH3': 'race_hispanic_origin_with_asian',
    'RIDEXMON': 'six_month_exam_period',
    'RIDEXAGM': 'age_months_exam_0_to_19_years',
    'DMQMILIZ': 'served_active_duty_armed_forces',
    'DMDBORN4': 'country_of_birth',
    'DMDYRUSR': 'length_time_in_us',
    'DMDEDUC2': 'education_level_adults_20_plus',
    'DMDMARTZ': 'marital_status',
    'RIDEXPRG': 'pregnancy_status_at_exam',
    'DMDHHSIZ': 'total_people_in_household',
    'DMDHRGND': 'household_ref_person_gender',
    'DMDHRAGZ': 'household_ref_person_age',
    'DMDHREDZ': 'household_ref_person_education',
    'DMDHRMAZ': 'household_ref_person_marital_status',
    'DMDHSEDZ': 'household_ref_person_spouse_education',
    'WTINT2YR': 'full_sample_2year_interview_weight',
    'WTMEC2YR': 'full_sample_2year_mec_exam_weight',
    'SDMVSTRA': 'masked_variance_pseudo_stratum',
    'SDMVPSU': 'masked_variance_pseudo_psu',
    'INDFMPIR': 'ratio_family_income_to_poverty',
    
    # Body Measurements
    'BMDSTATS': 'body_measures_component_status',
    'BMXWT': 'weight_kg',
    'BMIWT': 'weight_comment',
    'BMXRECUM': 'recumbent_length_cm',
    'BMIRECUM': 'recumbent_length_comment',
    'BMXHEAD': 'head_circumference_cm',
    'BMIHEAD': 'head_circumference_comment',
    'BMXHT': 'standing_height_cm',
    'BMIHT': 'standing_height_comment',
    'BMXBMI': 'body_mass_index',
    'BMDBMIC': 'bmi_category_children_youth',
    'BMXLEG': 'upper_leg_length_cm',
    'BMILEG': 'upper_leg_length_comment',
    'BMXARML': 'upper_arm_length_cm',
    'BMIARML': 'upper_arm_length_comment',
    'BMXARMC': 'arm_circumference_cm',
    'BMIARMC': 'arm_circumference_comment',
    'BMXWAIST': 'waist_circumference_cm',
    'BMIWAIST': 'waist_circumference_comment',
    'BMXHIP': 'hip_circumference_cm',
    'BMIHIP': 'hip_circumference_comment'
}

In [11]:
# read data
BMX_df_raw = xpt_to_df('2021-2023/BMX_L.XPT')

BMX_df = BMX_df_raw.rename(columns=nhanes_variable_mapping)
BMX_df.head()

Unnamed: 0,respondent_sequence_number,body_measures_component_status,weight_kg,weight_comment,recumbent_length_cm,recumbent_length_comment,head_circumference_cm,head_circumference_comment,standing_height_cm,standing_height_comment,...,upper_leg_length_cm,upper_leg_length_comment,upper_arm_length_cm,upper_arm_length_comment,arm_circumference_cm,arm_circumference_comment,waist_circumference_cm,waist_circumference_comment,hip_circumference_cm,hip_circumference_comment
0,130378.0,1.0,86.9,,,,,,179.5,,...,42.8,,42.0,,35.7,,98.3,,102.9,
1,130379.0,1.0,101.8,,,,,,174.2,,...,38.5,,38.7,,33.7,,114.7,,112.4,
2,130380.0,1.0,69.4,,,,,,152.9,,...,38.5,,35.5,,36.3,,93.5,,98.0,
3,130381.0,1.0,34.3,,,,,,120.1,,...,,,25.4,,23.4,,70.4,,,
4,130382.0,3.0,13.6,,,1.0,,,,1.0,...,,,,1.0,,1.0,,1.0,,
