In [9]:
pip install requests pandas


Note: you may need to restart the kernel to use updated packages.


In [17]:
import requests
import pandas as pd
import math
import os

#  DOWNLOAD COUNTY DATA AUTOMATICALLY

def download_county_data(csv_path="us_counties.csv"):
    if os.path.exists(csv_path):
        return pd.read_csv(csv_path)

    print("Downloading county data from Census API...")

    url = (
        "https://api.census.gov/data/2022/acs/acs5"
        "?get=NAME,B19013_001E,B01003_001E"
        "&for=county:*"
    )

    response = requests.get(url)
    data = response.json()

    rows = []
    for row in data[1:]:
        name, median_income, population, state_fips, county_fips = row

        if median_income is None or median_income == "None":
            continue

        parts = name.split(", ")
        county = parts[0].replace(" County", "")
        state = parts[1]

        rows.append([
            state,
            county,
            int(median_income),
            int(population)
        ])

    df = pd.DataFrame(rows, columns=["state", "county", "median_income", "population"])
    df.to_csv(csv_path, index=False)

    print(f"Saved dataset to {csv_path}")
    return df


df = download_county_data()


#  INCOME CLASS FUNCTION

def income_class(salary):
    if salary < 40000:
        return "Lower Class"
    elif salary < 65000:
        return "Working Class"
    elif salary < 150000:
        return "Middle Class"
    elif salary < 350000:
        return "Upper-Middle Class"
    else:
        return "Upper Class"


#  COUNTY PROBABILITY MODEL

def county_probabilities(salary, sigma=25000):
    scores = []

    for _, row in df.iterrows():
        median = row["median_income"]
        pop = row["population"]

        diff = salary - median
        likelihood = math.exp(-(diff ** 2) / (2 * sigma ** 2))

        weighted = likelihood * pop
        scores.append((row["state"], row["county"], weighted))

    total = sum(score for _, _, score in scores)
    prob_list = [(state, county, score / total) for state, county, score in scores]

    prob_list.sort(key=lambda x: x[2], reverse=True)
    return prob_list[:10]


#  MAIN PROGRAM

if __name__ == "__main__":
    salary = int(input("Enter annual salary: $ "))

    # NEW: Show user's income class
    classification = income_class(salary)
    print(f"\nIncome Class: {classification}")

    # County prediction
    results = county_probabilities(salary)

    print("\nTop 10 Most Likely U.S. Counties")
    for state, county, prob in results:
        print(f"{county}, {state}: {prob * 100:.4f}%")


Enter annual salary: $  500000



Income Class: Upper Class

Top 10 Most Likely U.S. Counties
Loudoun, Virginia: 99.7941%
Falls Church city, Virginia: 0.1477%
Santa Clara, California: 0.0554%
San Mateo, California: 0.0025%
Fairfax, Virginia: 0.0003%
Marin, California: 0.0000%
Howard, Maryland: 0.0000%
Nassau, New York: 0.0000%
Douglas, Colorado: 0.0000%
San Francisco, California: 0.0000%
