In [1]:
import requests
import pandas as pd

# Optional: Add your API key here
api_key = ''  # Replace with your key if you have one

# Base API URL
base_url = "https://api.census.gov/data/2025/cps/basic/feb"

# Variables to retrieve
variables = [
    "PWSSWGT", "PESEX", "PRPERTYP", "PEIO1COW", "PESCHENR", "PESCHFT", "PEEDUCA", "PEMARITL", "PRDTOCC1",
    "PENATVTY", "PTDTRACE", "PTERNWA", "PTERN2", "PTERNHLY", "PTERNH1O", "PTERNH2", "PTERNH1C"
]

# Construct query parameters
params = {
    "get": ",".join(variables)
}

# Add API key if available
if api_key:
    params["key"] = api_key

# Make the API request
response = requests.get(base_url, params=params)

# Check if the request was successful
if response.status_code == 200:
    data = response.json()
    # The first row contains column names
    columns = data[0]
    rows = data[1:]
    df = pd.DataFrame(rows, columns=columns)
    display(df.head())  # Display the first few rows
else:
    print(f"Error {response.status_code}: {response.text}")

Unnamed: 0,PWSSWGT,PESEX,PRPERTYP,PEIO1COW,PESCHENR,PESCHFT,PEEDUCA,PEMARITL,PRDTOCC1,PENATVTY,PTDTRACE,PTERNWA,PTERN2,PTERNHLY,PTERNH1O,PTERNH2,PTERNH1C
0,1351.3346,2,2,-1,-1,-1,39,1,-1,57,1,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01
1,1581.1002,1,2,1,-1,-1,40,1,17,57,1,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01
2,1390.5738,2,2,-1,2,-1,43,6,-1,57,1,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01
3,1351.3346,2,2,-1,-1,-1,39,4,-1,57,1,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01
4,1499.3271,1,2,4,2,-1,39,5,19,57,1,-0.01,-0.01,-0.01,-0.01,-0.01,-0.01


In [2]:
import pandas as pd

# ------------------------------------------
# PESEX → PESEX_BIN
# ------------------------------------------
df["PESEX_BIN"] = df["PESEX"].map({
    "1": "Male",
    "2": "Female"
})

# ------------------------------------------
# PEMARITL → PEMARITL_BIN
# ------------------------------------------
df["PEMARITL_BIN"] = df["PEMARITL"].map({
    "1": "Married - spouse present",
    "2": "Married - spouse absent",
    "3": "Widowed",
    "4": "Divorced",
    "5": "Separated",
    "6": "Never married"
})

# ------------------------------------------
# PEEDUCA → PEEDUCA_BIN
# ------------------------------------------
df["PEEDUCA_BIN"] = df["PEEDUCA"].map({
    "31": "Less than 1st grade",
    "32": "1st-4th grade",
    "33": "5th-6th grade",
    "34": "7th-8th grade",
    "35": "9th grade",
    "36": "10th grade",
    "37": "11th grade",
    "38": "12th grade, no diploma",
    "39": "High school graduate",
    "40": "Some college, no degree",
    "41": "Associate degree - occupational",
    "42": "Associate degree - academic",
    "43": "Bachelor's degree",
    "44": "Master's degree",
    "45": "Professional degree",
    "46": "Doctorate degree"
})

# ------------------------------------------
# PRPERTYP → PRPERTYP_BIN
# ------------------------------------------
df["PRPERTYP_BIN"] = df["PRPERTYP"].map({
    "1": "Child household member",
    "2": "Adult civilian household member",
    "3": "Adult armed forces household member"
})

# ------------------------------------------
# PENATVTY → PENATVTY_BIN
# ------------------------------------------
df["PENATVTY_BIN"] = df["PENATVTY"].map({
    "57": "United States",
    "60": "American Samoa",
    "66": "Guam",
    "69": "Northern Marianas",
    "73": "Puerto Rico",
    "78": "U.S. Virgin Islands",
    "100": "Albania",
    "102": "Austria",
    "103": "Belgium",
    "104": "Bulgaria",
    "105": "Czechoslovakia",
    "106": "Denmark",
    "108": "Finland",
    "109": "France",
    "110": "Germany",
    "116": "Greece",
    "117": "Hungary",
    "118": "Iceland",
    "119": "Ireland",
    "120": "Italy",
    "126": "Netherlands",
    "127": "Norway",
    "128": "Poland",
    "129": "Portugal",
    "130": "Azores",
    "132": "Romania",
    "134": "Spain",
    "136": "Sweden",
    "137": "Switzerland",
    "138": "United Kingdom",
    "139": "England",
    "140": "Scotland",
    "142": "Northern Ireland",
    "147": "Yugoslavia",
    "148": "Czech Republic",
    "149": "Slovakia",
    "150": "Bosnia & Herzegovina",
    "151": "Croatia",
    "152": "Macedonia",
    "154": "Serbia",
    "155": "Estonia",
    "156": "Latvia",
    "157": "Lithuania",
    "158": "Armenia",
    "159": "Azerbaijan",
    "160": "Belarus",
    "161": "Georgia",
    "162": "Moldova",
    "163": "Russia",
    "164": "Ukraine",
    "165": "USSR",
    "166": "Europe, not specified",
    "168": "Montenegro",
    "200": "Afghanistan",
    "202": "Bangladesh",
    "203": "Bhutan",
    "205": "Myanmar (Burma)",
    "206": "Cambodia",
    "207": "China",
    "209": "Hong Kong",
    "210": "India",
    "211": "Indonesia",
    "212": "Iran",
    "213": "Iraq",
    "214": "Israel",
    "215": "Japan",
    "216": "Jordan",
    "217": "Korea",
    "218": "Kazakhstan",
    "220": "South Korea",
    "222": "Kuwait",
    "223": "Laos",
    "224": "Lebanon",
    "226": "Malaysia",
    "228": "Mongolia",
    "229": "Nepal",
    "231": "Pakistan",
    "233": "Philippines",
    "235": "Saudi Arabia",
    "236": "Singapore",
    "238": "Sri Lanka",
    "239": "Syria",
    "240": "Taiwan",
    "242": "Thailand",
    "243": "Turkey",
    "245": "United Arab Emirates",
    "246": "Uzbekistan",
    "247": "Vietnam",
    "248": "Yemen",
    "249": "Asia, not specified",
    "300": "Bermuda",
    "301": "Canada",
    "303": "Mexico",
    "310": "Belize",
    "311": "Costa Rica",
    "312": "El Salvador",
    "313": "Guatemala",
    "314": "Honduras",
    "315": "Nicaragua",
    "316": "Panama",
    "321": "Antigua and Barbuda",
    "323": "Bahamas",
    "324": "Barbados",
    "327": "Cuba",
    "328": "Dominica",
    "329": "Dominican Republic",
    "330": "Grenada",
    "332": "Haiti",
    "333": "Jamaica",
    "338": "St. Kitts--Nevis",
    "339": "St. Lucia",
    "340": "St. Vincent and the Grenadines",
    "341": "Trinidad and Tobago",
    "343": "West Indies, not specified",
    "360": "Argentina",
    "361": "Bolivia",
    "362": "Brazil",
    "363": "Chile",
    "364": "Columbia",
    "365": "Ecuador",
    "368": "Guyana",
    "369": "Paraguay",
    "370": "Peru",
    "372": "Uruguay",
    "373": "Venezuela",
    "374": "South America, not specified",
    "399": "Americas, not specified",
    "400": "Algeria",
    "407": "Cameroon",
    "408": "Cape Verde",
    "412": "Congo",
    "414": "Egypt",
    "416": "Ethiopia",
    "417": "Eritrea",
    "421": "Ghana",
    "423": "Guinea",
    "425": "Ivory Coast",
    "427": "Kenya",
    "429": "Liberia",
    "430": "Libya",
    "436": "Morocco",
    "440": "Nigeria",
    "444": "Senegal",
    "447": "Sierra Leone",
    "448": "Somalia",
    "449": "South Africa",
    "451": "Sudan",
    "453": "Tanzania",
    "454": "Togo",
    "457": "Uganda",
    "459": "Zaire",
    "460": "Zambia",
    "461": "Zimbabwe",
    "462": "Africa, Not Specified",
    "501": "Australia",
    "508": "Fiji",
    "511": "Marshall Islands",
    "512": "Micronesia",
    "515": "New Zealand",
    "523": "Tonga",
    "527": "Samoa",
    "555": "Elsewhere"
})

# ------------------------------------------
# PTDTRACE → PTDTRACE_BIN
# ------------------------------------------
df["PTDTRACE_BIN"] = df["PTDTRACE"].map({
    "1": "White only",
    "2": "Black only",
    "3": "American Indian or Alaskan Native only",
    "4": "Asian only",
    "5": "Hawaiian/Pacific Islander only",
    "6": "White-Black",
    "7": "White-AI",
    "8": "White-Asian",
    "9": "White-HP",
    "10": "Black-AI",
    "11": "Black-Asian",
    "12": "Black-HP",
    "13": "AI-Asian",
    "14": "AI-HP",
    "15": "Asian-HP",
    "16": "W-B-AI",
    "17": "W-B-A",
    "18": "W-B-HP",
    "19": "W-AI-A",
    "20": "W-AI-HP",
    "21": "W-A-HP",
    "22": "B-AI-A",
    "23": "W-B-AI-A",
    "24": "W-AI-A-HP",
    "25": "Other 3 Race Combinations",
    "26": "Other 4 and 5 Race Combinations"
})

# ------------------------------------------
# PEIO1COW → PEIO1COW_BIN
# ------------------------------------------
df["PEIO1COW_BIN"] = df["PEIO1COW"].map({
    "1": "Private for-profit",
    "2": "Private nonprofit",
    "3": "Local government",
    "4": "State government",
    "5": "Federal government",
    "6": "Self-employed, unincorporated",
    "7": "Self-employed, incorporated",
    "8": "Unpaid family worker",
    "9": "Never worked"
})

# ------------------------------------------
# PESCHENR → PESCHENR_BIN
# ------------------------------------------
df["PESCHENR_BIN"] = df["PESCHENR"].map({
    "1": "Enrolled",
    "2": "Not enrolled"
})

# ------------------------------------------
# PESCHFT → PESCHFT_BIN
# ------------------------------------------
df["PESCHFT_BIN"] = df["PESCHFT"].map({
    "1": "Full time",
    "2": "Part time"
})

# ------------------------------------------
# PTERNHLY → PTERNHLY_BIN
# ------------------------------------------
df["PTERNHLY_BIN"] = df["PTERNHLY"].map({
    "1": "Hourly",
    "2": "Non-hourly"
})

# ------------------------------------------
# PRDTOCC1 → PRDTOCC1_BIN (Detailed Occupation Recode - Job 1)
# ------------------------------------------
df["PRDTOCC1_BIN"] = df["PRDTOCC1"].map({
    "1": "Management occupations",
    "2": "Business and financial operations occupations",
    "3": "Computer and mathematical occupations",
    "4": "Architecture and engineering occupations",
    "5": "Life, physical, and social science occupations",
    "6": "Community and social service occupations",
    "7": "Legal occupations",
    "8": "Education instruction and library occupations",
    "9": "Arts, design, entertainment, sports, and media occupations",
    "10": "Healthcare practitioner and technical occupations",
    "11": "Healthcare support occupations",
    "12": "Protective service occupations",
    "13": "Food preparation and serving related occupations",
    "14": "Building and grounds cleaning and maintenance occupations",
    "15": "Personal care and service occupations",
    "16": "Sales and related occupations",
    "17": "Office and administrative support occupations",
    "18": "Farming, fishing, and forestry occupations",
    "19": "Construction and extraction occupations",
    "20": "Installation, maintenance, and repair occupations",
    "21": "Production occupations",
    "22": "Transportation and material moving occupations",
    "23": "Armed Forces"
})

# ------------------------------------------
# PTERNWA → PTERNWA_BIN (Weekly earnings)
# ------------------------------------------
df["PTERNWA"] = pd.to_numeric(df["PTERNWA"], errors='coerce')
df["PTERNWA_BIN"] = pd.cut(
    df["PTERNWA"],
    bins=[0, 250, 500, 750, 1000, 1250, 1500, 2000, float('inf')],
    labels=["<250", "250-499", "500-749", "750-999", "1000-1249", "1250-1499", "1500-1999", "2000+"],
    right=False
)

# ------------------------------------------
# PTERN2 → PTERN2_BIN (Number of hours worked)
# ------------------------------------------
df["PTERN2"] = pd.to_numeric(df["PTERN2"], errors='coerce')
df["PTERN2_BIN"] = pd.cut(
    df["PTERN2"],
    bins=[0, 10, 20, 30, 40, 50, 60, float('inf')],
    labels=["0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60+"],
    right=False
)

# ------------------------------------------
# PTERNH1O → PTERNH1O_BIN (Hourly pay rate)
# ------------------------------------------
df["PTERNH1O"] = pd.to_numeric(df["PTERNH1O"], errors='coerce')
df["PTERNH1O_BIN"] = pd.cut(
    df["PTERNH1O"],
    bins=[0, 250, 500, 750, 1000, 1250, 1500, 2000, float('inf')],
    labels=["<250", "250-499", "500-749", "750-999", "1000-1249", "1250-1499", "1500-1999", "2000+"],
    right=False
)

# ------------------------------------------
# PTERNH2 → PTERNH2_BIN (Usual hours per week)
# ------------------------------------------
df["PTERNH2"] = pd.to_numeric(df["PTERNH2"], errors='coerce')
df["PTERNH2_BIN"] = pd.cut(
    df["PTERNH2"],
    bins=[0, 10, 20, 30, 40, 50, 60, float('inf')],
    labels=["0-9", "10-19", "20-29", "30-39", "40-49", "50-59", "60+"],
    right=False
)

# ------------------------------------------
# PTERNH1C → PTERNH1C_BIN (Usual hourly earnings)
# ------------------------------------------
df["PTERNH1C"] = pd.to_numeric(df["PTERNH1C"], errors='coerce')
df["PTERNH1C_BIN"] = pd.cut(
    df["PTERNH1C"],
    bins=[0, 250, 500, 750, 1000, 1250, 1500, 2000, float('inf')],
    labels=["<250", "250-499", "500-749", "750-999", "1000-1249", "1250-1499", "1500-1999", "2000+"],
    right=False
)

# ------------------------------------------
# Done! Your DataFrame `df` now contains all binned columns.
# ------------------------------------------
display(df.head())

Unnamed: 0,PWSSWGT,PESEX,PRPERTYP,PEIO1COW,PESCHENR,PESCHFT,PEEDUCA,PEMARITL,PRDTOCC1,PENATVTY,...,PEIO1COW_BIN,PESCHENR_BIN,PESCHFT_BIN,PTERNHLY_BIN,PRDTOCC1_BIN,PTERNWA_BIN,PTERN2_BIN,PTERNH1O_BIN,PTERNH2_BIN,PTERNH1C_BIN
0,1351.3346,2,2,-1,-1,-1,39,1,-1,57,...,,,,,,,,,,
1,1581.1002,1,2,1,-1,-1,40,1,17,57,...,Private for-profit,,,,Office and administrative support occupations,,,,,
2,1390.5738,2,2,-1,2,-1,43,6,-1,57,...,,Not enrolled,,,,,,,,
3,1351.3346,2,2,-1,-1,-1,39,4,-1,57,...,,,,,,,,,,
4,1499.3271,1,2,4,2,-1,39,5,19,57,...,State government,Not enrolled,,,Construction and extraction occupations,,,,,


In [3]:
# Define the original columns and their associated bin columns
column_pairs = [
    ("PESEX", "PESEX_BIN"),
    ("PRPERTYP", "PRPERTYP_BIN"),
    ("PEIO1COW", "PEIO1COW_BIN"),
    ("PESCHENR", "PESCHENR_BIN"),
    ("PESCHFT", "PESCHFT_BIN"),
    ("PEEDUCA", "PEEDUCA_BIN"),
    ("PEMARITL", "PEMARITL_BIN"),
    ("PTDTRACE", "PTDTRACE_BIN"),
    ("PENATVTY", "PENATVTY_BIN"),
    ("PTERNWA", "PTERNWA_BIN"),
    ("PTERN2", "PTERN2_BIN"),
    ("PTERNH1O", "PTERNH1O_BIN"),
    ("PTERNH2", "PTERNH2_BIN"),
    ("PTERNH1C", "PTERNH1C_BIN"),
    ("PTERNHLY", "PTERNHLY_BIN"),
    ("PRDTOCC1", "PRDTOCC1_BIN")
]

# Build a new column order
new_column_order = []
for col, bin_col in column_pairs:
    if col in df.columns:
        new_column_order.append(col)
        if bin_col in df.columns:
            new_column_order.append(bin_col)

# Add any columns not explicitly reordered (e.g., PWSSWGT, PRDTOCC1, etc.)
remaining_cols = [col for col in df.columns if col not in new_column_order]
new_column_order = new_column_order + remaining_cols

# Reorder the DataFrame
df = df[new_column_order]

# Display result
display(df.head())

Unnamed: 0,PESEX,PESEX_BIN,PRPERTYP,PRPERTYP_BIN,PEIO1COW,PEIO1COW_BIN,PESCHENR,PESCHENR_BIN,PESCHFT,PESCHFT_BIN,...,PTERNH1O_BIN,PTERNH2,PTERNH2_BIN,PTERNH1C,PTERNH1C_BIN,PTERNHLY,PTERNHLY_BIN,PRDTOCC1,PRDTOCC1_BIN,PWSSWGT
0,2,Female,2,Adult civilian household member,-1,,-1,,-1,,...,,-0.01,,-0.01,,-0.01,,-1,,1351.3346
1,1,Male,2,Adult civilian household member,1,Private for-profit,-1,,-1,,...,,-0.01,,-0.01,,-0.01,,17,Office and administrative support occupations,1581.1002
2,2,Female,2,Adult civilian household member,-1,,2,Not enrolled,-1,,...,,-0.01,,-0.01,,-0.01,,-1,,1390.5738
3,2,Female,2,Adult civilian household member,-1,,-1,,-1,,...,,-0.01,,-0.01,,-0.01,,-1,,1351.3346
4,1,Male,2,Adult civilian household member,4,State government,2,Not enrolled,-1,,...,,-0.01,,-0.01,,-0.01,,19,Construction and extraction occupations,1499.3271


In [4]:
# ------------------------------------------
# Drop original columns that have been binned
# and create a cleaned copy of the DataFrame
# ------------------------------------------
columns_to_drop = [
    "PESEX", "PRPERTYP", "PEIO1COW", "PESCHENR", "PESCHFT",
    "PEEDUCA", "PEMARITL", "PTDTRACE", "PENATVTY", "PRDTOCC1"
]

# Create a cleaned copy before dropping
df_cleaned = df.copy()

# Drop the specified columns from the cleaned copy
df_cleaned.drop(columns=columns_to_drop, inplace=True)

# Preview the cleaned DataFrame
display(df_cleaned.head())

Unnamed: 0,PESEX_BIN,PRPERTYP_BIN,PEIO1COW_BIN,PESCHENR_BIN,PESCHFT_BIN,PEEDUCA_BIN,PEMARITL_BIN,PTDTRACE_BIN,PENATVTY_BIN,PTERNWA,...,PTERNH1O,PTERNH1O_BIN,PTERNH2,PTERNH2_BIN,PTERNH1C,PTERNH1C_BIN,PTERNHLY,PTERNHLY_BIN,PRDTOCC1_BIN,PWSSWGT
0,Female,Adult civilian household member,,,,High school graduate,Married - spouse present,White only,United States,-0.01,...,-0.01,,-0.01,,-0.01,,-0.01,,,1351.3346
1,Male,Adult civilian household member,Private for-profit,,,"Some college, no degree",Married - spouse present,White only,United States,-0.01,...,-0.01,,-0.01,,-0.01,,-0.01,,Office and administrative support occupations,1581.1002
2,Female,Adult civilian household member,,Not enrolled,,Bachelor's degree,Never married,White only,United States,-0.01,...,-0.01,,-0.01,,-0.01,,-0.01,,,1390.5738
3,Female,Adult civilian household member,,,,High school graduate,Divorced,White only,United States,-0.01,...,-0.01,,-0.01,,-0.01,,-0.01,,,1351.3346
4,Male,Adult civilian household member,State government,Not enrolled,,High school graduate,Separated,White only,United States,-0.01,...,-0.01,,-0.01,,-0.01,,-0.01,,Construction and extraction occupations,1499.3271


In [5]:
# ------------------------------------------
# Create a filtered copy and remove rows where PTERNWA == -0.01
# ------------------------------------------
df_filtered = df_cleaned.copy()

# Ensure PTERNWA is numeric (if not already)
df_filtered["PTERNWA"] = pd.to_numeric(df_filtered["PTERNWA"], errors="coerce")

# Drop rows with PTERNWA == -0.01
df_filtered = df_filtered[df_filtered["PTERNWA"] != -0.01]

# Preview the filtered DataFrame
display(df_filtered)

Unnamed: 0,PESEX_BIN,PRPERTYP_BIN,PEIO1COW_BIN,PESCHENR_BIN,PESCHFT_BIN,PEEDUCA_BIN,PEMARITL_BIN,PTDTRACE_BIN,PENATVTY_BIN,PTERNWA,...,PTERNH1O,PTERNH1O_BIN,PTERNH2,PTERNH2_BIN,PTERNH1C,PTERNH1C_BIN,PTERNHLY,PTERNHLY_BIN,PRDTOCC1_BIN,PWSSWGT
33,Male,Adult civilian household member,State government,Not enrolled,,"Some college, no degree",Married - spouse present,White only,United States,1500.0,...,-0.01,,22.00,20-29,-0.01,,22.00,,Construction and extraction occupations,3131.1945
53,Female,Adult civilian household member,State government,,,Bachelor's degree,Never married,White only,United States,866.0,...,-0.01,,-0.01,,-0.01,,-0.01,,Management occupations,3414.4259
77,Female,Adult civilian household member,State government,,,High school graduate,Married - spouse present,Asian only,Malaysia,800.0,...,20.00,<250,-0.01,,-0.01,,20.00,,Production occupations,3356.7695
83,Female,Adult civilian household member,Local government,Not enrolled,,Master's degree,Married - spouse present,White only,United States,866.0,...,-0.01,,-0.01,,-0.01,,-0.01,,Education instruction and library occupations,4638.1003
84,Male,Adult civilian household member,State government,Not enrolled,,High school graduate,Married - spouse present,White only,United States,1480.0,...,37.00,<250,-0.01,,-0.01,,37.00,,Sales and related occupations,2885.3200
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
96720,Female,Adult civilian household member,State government,,,Associate degree - academic,Married - spouse present,White only,United States,1630.0,...,-0.01,,-0.01,,-0.01,,-0.01,,Office and administrative support occupations,1550.0935
96746,Male,Adult civilian household member,State government,Not enrolled,,High school graduate,Married - spouse present,White only,United States,720.0,...,18.00,<250,-0.01,,-0.01,,18.00,,Production occupations,3738.4843
96778,Female,Adult civilian household member,State government,,,"Some college, no degree",Never married,White only,Cuba,462.0,...,-0.01,,17.60,10-19,-0.01,,17.60,,Sales and related occupations,2265.9486
96798,Female,Adult civilian household member,Private nonprofit,Not enrolled,,Associate degree - academic,Divorced,White only,United States,950.0,...,-0.01,,30.00,30-39,-0.01,,30.00,,Production occupations,3958.7897


In [6]:
# ------------------------------------------
# Rename columns to more descriptive names
# ------------------------------------------
df_filtered.rename(columns={
    "PESEX_BIN": "Sex",
    "PRPERTYP_BIN": "Household Member Status",
    "PEIO1COW_BIN": "Industry",
    "PESCHENR_BIN": "Education Enrollment",
    "PESCHFT_BIN": "FT/PT Enrollment",
    "PEEDUCA_BIN": "Education Level Attained",
    "PEMARITL_BIN": "Marital Status",
    "PTDTRACE_BIN": "Race",
    "PENATVTY_BIN": "Native Country",
    "PTERNWA": "Weekly Earnings",
    "PTERNWA_BIN": "Weekly Earnings Categories",
    "PTERNH1O": "Hourly Pay Rate",
    "PTERNH1O_BIN": "Hourly Pay Rate Categories",
    "PTERNH2": "Hourly Pay Rate (Main Job)",
    "PTERNH2_BIN": "Hourly Pay Rate (Main Job) Categories",
    "PTERNH1C": "Hourly Pay Rate (excluding OT)",
    "PTERNH1C_BIN": "Hourly Pay Rate (excluding OT) Categories",
    "PTERNHLY": "Hourly Pay Rate Recode",
    "PTERNHLY_BIN": "Hourly Pay Rate Recode Categories",
    "PRDTOCC1_BIN": "Occupation"
}, inplace=True)

# Preview to confirm
display(df_filtered.head())

Unnamed: 0,Sex,Household Member Status,Industry,Education Enrollment,FT/PT Enrollment,Education Level Attained,Marital Status,Race,Native Country,Weekly Earnings,...,Hourly Pay Rate,Hourly Pay Rate Categories,Hourly Pay Rate (Main Job),Hourly Pay Rate (Main Job) Categories,Hourly Pay Rate (excluding OT),Hourly Pay Rate (excluding OT) Categories,Hourly Pay Rate Recode,Hourly Pay Rate Recode Categories,Occupation,PWSSWGT
33,Male,Adult civilian household member,State government,Not enrolled,,"Some college, no degree",Married - spouse present,White only,United States,1500.0,...,-0.01,,22.0,20-29,-0.01,,22.0,,Construction and extraction occupations,3131.1945
53,Female,Adult civilian household member,State government,,,Bachelor's degree,Never married,White only,United States,866.0,...,-0.01,,-0.01,,-0.01,,-0.01,,Management occupations,3414.4259
77,Female,Adult civilian household member,State government,,,High school graduate,Married - spouse present,Asian only,Malaysia,800.0,...,20.0,<250,-0.01,,-0.01,,20.0,,Production occupations,3356.7695
83,Female,Adult civilian household member,Local government,Not enrolled,,Master's degree,Married - spouse present,White only,United States,866.0,...,-0.01,,-0.01,,-0.01,,-0.01,,Education instruction and library occupations,4638.1003
84,Male,Adult civilian household member,State government,Not enrolled,,High school graduate,Married - spouse present,White only,United States,1480.0,...,37.0,<250,-0.01,,-0.01,,37.0,,Sales and related occupations,2885.32


In [8]:
# ------------------------------------------
# Create another copy and clean/reorder columns
# ------------------------------------------
df_final = df_filtered.copy()

# Drop specified hourly pay-related columns
columns_to_remove = [
    "Hourly Pay Rate",
    "Hourly Pay Rate Categories",
    "Hourly Pay Rate (Main Job)",
    "Hourly Pay Rate (Main Job) Categories",
    "Hourly Pay Rate (excluding OT)",
    "Hourly Pay Rate (excluding OT) Categories",
    "Hourly Pay Rate Recode",
    "Hourly Pay Rate Recode Categories"
]

df_final.drop(columns=columns_to_remove, inplace=True, errors="ignore")

# Rearrange columns
column_order = [
    "Sex",
    "Marital Status",
    "Race",
    "Native Country",
    "Industry",
    "Occupation",
    "Education Enrollment",
    "FT/PT Enrollment",
    "Education Level Attained",
    "Household Member Status",
    "Weekly Earnings",
    "Weekly Earnings Categories",
    "PWSSWGT"
]

# Apply new column order
df_final = df_final[[col for col in column_order if col in df_final.columns]]

# Preview the final DataFrame
display(df_final.head())

Unnamed: 0,Sex,Marital Status,Race,Native Country,Industry,Occupation,Education Enrollment,FT/PT Enrollment,Education Level Attained,Household Member Status,Weekly Earnings,Weekly Earnings Categories,PWSSWGT
33,Male,Married - spouse present,White only,United States,State government,Construction and extraction occupations,Not enrolled,,"Some college, no degree",Adult civilian household member,1500.0,1500-1999,3131.1945
53,Female,Never married,White only,United States,State government,Management occupations,,,Bachelor's degree,Adult civilian household member,866.0,750-999,3414.4259
77,Female,Married - spouse present,Asian only,Malaysia,State government,Production occupations,,,High school graduate,Adult civilian household member,800.0,750-999,3356.7695
83,Female,Married - spouse present,White only,United States,Local government,Education instruction and library occupations,Not enrolled,,Master's degree,Adult civilian household member,866.0,750-999,4638.1003
84,Male,Married - spouse present,White only,United States,State government,Sales and related occupations,Not enrolled,,High school graduate,Adult civilian household member,1480.0,1250-1499,2885.32


In [9]:
df_final.dtypes

Sex                             object
Marital Status                  object
Race                            object
Native Country                  object
Industry                        object
Occupation                      object
Education Enrollment            object
FT/PT Enrollment                object
Education Level Attained        object
Household Member Status         object
Weekly Earnings                float64
Weekly Earnings Categories    category
PWSSWGT                         object
dtype: object

In [10]:
df_final["PWSSWGT"] = pd.to_numeric(df_final["PWSSWGT"], errors="coerce")

In [11]:
df_final.dtypes

Sex                             object
Marital Status                  object
Race                            object
Native Country                  object
Industry                        object
Occupation                      object
Education Enrollment            object
FT/PT Enrollment                object
Education Level Attained        object
Household Member Status         object
Weekly Earnings                float64
Weekly Earnings Categories    category
PWSSWGT                        float64
dtype: object

In [12]:
df_final.to_csv("outputs/bls_data.csv", index=False)