## Import Library

In [None]:
import pandas as pd
import numpy as np
import os

root_folder = f"../Dataset"
output_folder = f"../Dataset/Processed"

## Download Data

In [6]:
nhanes_cycles = {
    "2005-2006": {
        "base_url": "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2005/DataFiles/",
        "files": [
            "DEMO_D.XPT", "BIOPRO_D.XPT", "HEPC_D.XPT", "MCQ_D.XPT", "DUQ_D.XPT", "ALQ_D.XPT"
        ]
    },
    "2007-2008": {
        "base_url": "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2007/DataFiles/",
        "files": [
            "DEMO_E.XPT", "BIOPRO_E.XPT", "HEPC_E.XPT", "MCQ_E.XPT", "DUQ_E.XPT", "ALQ_E.XPT"
        ]
    },
    "2009-2010": {
        "base_url": "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2009/DataFiles/",
        "files": [
            "DEMO_F.XPT", "BIOPRO_F.XPT", "HEPC_F.XPT", "MCQ_F.XPT", "DUQ_F.XPT", "ALQ_F.XPT"
        ]
    },
    "2011-2012": {
        "base_url": "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2011/DataFiles/",
        "files": [
            "DEMO_G.XPT", "BIOPRO_G.XPT", "HEPC_G.XPT", "MCQ_G.XPT", "DUQ_G.XPT", "ALQ_G.XPT"
        ]
    },
    "2013-2014": {
        "base_url": "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2013/DataFiles/",
        "files": [
            "DEMO_H.XPT", "BIOPRO_H.XPT", "HEPC_H.XPT", "MCQ_H.XPT", "DUQ_H.XPT", "ALQ_H.XPT"
        ]
    },
    "2015-2016": {
        "base_url": "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2015/DataFiles/",
        "files": [
            "DEMO_I.XPT", "BIOPRO_I.XPT", "HEPC_I.XPT", "MCQ_I.XPT", "DUQ_I.XPT", "ALQ_I.XPT"
        ]
    },
    "2017-2018": {
        "base_url": "https://wwwn.cdc.gov/Nchs/Data/Nhanes/Public/2017/DataFiles/",
        "files": [
            "DEMO_J.xpt", "BIOPRO_J.xpt", "HEPC_J.xpt", "MCQ_J.xpt", "DUQ_J.xpt", "ALQ_J.xpt"
        ]
    },
}

In [7]:
os.makedirs(root_folder, exist_ok=True)

for cycle, info in nhanes_cycles.items():
    print(f"\n=== Downloading NHANES {cycle} ===")
    cycle_folder = os.path.join(root_folder, cycle.replace("-", "_"))
    os.makedirs(cycle_folder, exist_ok=True)
    for file in info["files"]:
        url = info["base_url"] + file
        dest_path = os.path.join(cycle_folder, file)
        print(f"Downloading {file}...")
        try:
            response = requests.get(url)
            response.raise_for_status()
            with open(dest_path, "wb") as f:
                f.write(response.content)
            print(f"✅ Saved to {dest_path}")
        except requests.HTTPError as e:
            print(f"❌ Failed to download {file} ({e.response.status_code})")


=== Downloading NHANES 2005-2006 ===
Downloading DEMO_D.XPT...
✅ Saved to ../Dataset\2005_2006\DEMO_D.XPT
Downloading BIOPRO_D.XPT...
✅ Saved to ../Dataset\2005_2006\BIOPRO_D.XPT
Downloading HEPC_D.XPT...
✅ Saved to ../Dataset\2005_2006\HEPC_D.XPT
Downloading MCQ_D.XPT...
✅ Saved to ../Dataset\2005_2006\MCQ_D.XPT
Downloading DUQ_D.XPT...
✅ Saved to ../Dataset\2005_2006\DUQ_D.XPT
Downloading ALQ_D.XPT...
✅ Saved to ../Dataset\2005_2006\ALQ_D.XPT

=== Downloading NHANES 2007-2008 ===
Downloading DEMO_E.XPT...
✅ Saved to ../Dataset\2007_2008\DEMO_E.XPT
Downloading BIOPRO_E.XPT...
✅ Saved to ../Dataset\2007_2008\BIOPRO_E.XPT
Downloading HEPC_E.XPT...
✅ Saved to ../Dataset\2007_2008\HEPC_E.XPT
Downloading MCQ_E.XPT...
✅ Saved to ../Dataset\2007_2008\MCQ_E.XPT
Downloading DUQ_E.XPT...
✅ Saved to ../Dataset\2007_2008\DUQ_E.XPT
Downloading ALQ_E.XPT...
✅ Saved to ../Dataset\2007_2008\ALQ_E.XPT

=== Downloading NHANES 2009-2010 ===
Downloading DEMO_F.XPT...
✅ Saved to ../Dataset\2009_2010\DEMO