In [None]:
import pandas as pd
import requests

def load_state_fips():
    statefips = pd.read_csv("../statefips.csv")
    return statefips["STATE_FIPS"].tolist()

In [None]:
def fetch_census_data_tableS(api_key, geography, colnamedict, years, var):
    HOST = 'https://api.census.gov/data/'
    data = f"/acs/acs5/subject?get=group({var})"

    coloutput = [i for i in colnamedict.values()]
    colinput = [i for i in colnamedict.keys()]
    featurelist = coloutput[1:]
    default = ["state", "place", "NAME", "year", "fips"]
    varnames = coloutput + default

    df = pd.DataFrame()

    for year in years:
        url = HOST + year + data + '&for=' + geography + ":*&in=state:*" + "&in=county:*&key=" + api_key
        resp = requests.get(url)
        unit = resp.json()
        df1 = pd.DataFrame(unit[1:], columns=unit[0])
        df1["year"] = year
        df = pd.concat([df, df1])

    df.rename(columns=colnamedict, inplace=True)
    df["fips"] = df["GEO_ID"].str[9:].apply(int)
    df = df[varnames]

    for feature in featurelist:
        df.loc[df[feature] == "-666666666", feature] = " "
        df.loc[df[feature] == "-", feature] = " "

    for feature in featurelist:
        df[feature] = pd.to_numeric(df[feature].str.replace(" ", ""), errors='coerce')
        df[feature] = df[feature].astype('Int64')

    return df

In [None]:
def fetch_census_data_tableB(api_key, geography, colnamedict, years):
    HOST = 'https://api.census.gov/data/'
    data = '/acs/acs5?get=' #ACS 5-Year Estimates Detailed Tables

    # output variables (variable names to be recorded in csv files)
    coloutput = [i for i in colnamedict.values()]

    # input variables (US Census code)
    colinput = [i for i in colnamedict.keys()]

    # feature names except for the "GEO_ID"
    featurelist = coloutput[1:]
    varnames = coloutput

    df = pd.DataFrame()

    for year in years:
        url = HOST + year + data + ','.join(colinput) + '&for=' + geography + ":*&in=state:*" + "&in=county:*&key=" + api_key
        resp = requests.get(url)
        unit = resp.json()
        df1 = pd.DataFrame(unit[1:], columns=unit[0])
        df1["year"] = year
        df = pd.concat([df, df1], ignore_index=True)

    df.rename(columns=colnamedict, inplace=True)
    df["fips"] = df["GEO_ID"].str[9:].apply(int)
    df = df[varnames]

    for feature in featurelist:
        df.loc[df[feature]== "-666666666", feature] = " "
        df.loc[df[feature]== "-666666666.0", feature] = " "
        df.loc[df[feature]== "-999999999", feature] = " "
        df.loc[df[feature]== "-999999999.0", feature] = " "
        df.loc[df[feature]== "-", feature] = " "

    for feature in featurelist:
        df[feature] = pd.to_numeric(df[feature].str.replace(" ", ""), errors='coerce')
        df[feature] = df[feature].astype('float')

    return df

In [None]:
def main():
    api_key = "Your API KEY"
    geography = "place"
    years = ["2018"]
    var_S = "S1903"  # income

    # Column name dictionaries for Table S and Table B
    tableS_colnamedict = {
        "GEO_ID": "GEO_ID",
        "S1903_C03_015E": "MHIncome"
    }

    tableB_colnamedict = {
        "GEO_ID": "GEO_ID",
        "B15003_001E": "Population"
    }

    state_fips_list = load_state_fips()

    # Fetch census data for Table S and Table B
    df_census_dataS = fetch_census_data_tableS(api_key, geography, tableS_colnamedict, years, var_S)
    df_census_dataB = fetch_census_data_tableB(api_key, geography, tableB_colnamedict, years)

    # Merge df_census_dataS and df_census_dataB on the 'GEO_ID' column
    merged_df = pd.merge(df_census_dataS, df_census_dataB, on="GEO_ID", how='inner')

    # Exclude Puerto Rico (state code 72) and Virgin Islands (state code 78)
    df_place = merged_df[~merged_df.state.isin(["72", "78"])]

    # Drop rows with missing values in MHIncome and Population columns
    df_place = df_place.dropna(subset=["MHIncome", "Population"])

    df_place = df_place.sort_values(by=["Population"], ascending=False, ignore_index=True)

    return df_place

In [None]:
if __name__ == "__main__":
    df_result = main()
    df_result

In [None]:
df_place_100 = df_result[df_result["Population"] >= 100]
df_place_100.to_csv("../us-place-over100pop.csv", index=False)