In [17]:
# Install pip
!pip install us



In [18]:
# Dependencies 
import requests
import pandas as pd
from census import Census
import us
from us import states
# Import API Key
from config import api_key

In [19]:
# Run Census Search to retrieve data on congressional districts

years = [2017, 2018, 2019, 2020, 2021]
variables = [
    "NAME",
    "B19013_001E",  # population
    "B01003_001E",  # median age total
    "B01002_002E",  # median age (male)
    "B01002_003E",  # median age (female)
    "B01002_001E",  # household income
    "B19301_001E",  # Per capital income
    "B17001_002E",  # poverty count
    "B23022_001E",  # total work force
    "B23001_088E",  # total female workforce
    "B23022_049E",  # total female did not work
    "B23022_002E",  # total male workforce
    "B23022_025E",  # total male did not work
    "B15003_023E",  # masters degrees
    "B25058_001E",  # median rent
]

common_params = {'for': 'state:*', 'for': 'congressional district:*'}

for year in years:
    c = Census(api_key, year=year)
    census_data = c.acs5.get(variables, common_params)
    
    # Initialize empty list to store dataframes
    census_dfs = []

    # Convert to DataFrame and add "year" column
    census_pd = pd.DataFrame(census_data)
    census_pd['Year'] = year

    # Append the DataFrame to the list
    census_dfs.append(census_pd)

In [20]:

# Concatenate all dataframes in list
census_pd_combined = pd.concat(census_dfs, ignore_index=True)

In [21]:
# Rename columns
census_pd_combined = census_pd_combined.rename(
    columns = {
        "NAME":        "Name",
        "B01003_001E": "Population",
        "B01002_001E": "Total Median Age",
        "B01002_002E":  "Median Age (male)",
        "B01002_003E":  "Median age (female)",
        "B19013_001E": "Household Income",
        "B19301_001E": "Per Capita Income",
        "B17001_002E": "Poverty Count",
        "B23022_001E": "Total Work Force",
        "B23001_088E": "Total Female Workforce",
        "B23022_049E": "Total Female Did Not Work",
        "B23022_002E": "Total Male Workforce",
        "B23022_025E": "Total Male Did Not Work",
        "B15003_023E": "Masters Degree Obtained",
        "B25058_001E": "Median Rent"
        
    }
        )

In [22]:
# Show combined dataframe
census_pd_combined.head()

Unnamed: 0,Name,Household Income,Population,Median Age (male),Median age (female),Total Median Age,Per Capita Income,Poverty Count,Total Work Force,Total Female Workforce,Total Female Did Not Work,Total Male Workforce,Total Male Did Not Work,Masters Degree Obtained,Median Rent,state,congressional district,Year
0,"Congressional District 1 (116th Congress), Ala...",53606.0,722794.0,38.9,41.4,40.3,29688.0,109032.0,447518.0,301800.0,77217.0,217406.0,55953.0,33551.0,707.0,1,1,2021
1,"Congressional District 2 (116th Congress), Ala...",52194.0,689681.0,37.5,40.7,39.0,28319.0,115310.0,435275.0,287843.0,76285.0,213201.0,54707.0,31270.0,619.0,1,2,2021
2,"Congressional District 3 (116th Congress), Ala...",53090.0,731482.0,37.6,39.9,38.8,28451.0,121511.0,468161.0,304646.0,77313.0,230635.0,57921.0,35868.0,590.0,1,3,2021
3,"Congressional District 4 (116th Congress), Ala...",50931.0,702255.0,39.7,42.2,40.9,27056.0,109196.0,431360.0,285978.0,76826.0,216170.0,50766.0,24306.0,469.0,1,4,2021
4,"Congressional District 5 (116th Congress), Ala...",63180.0,751690.0,38.7,40.8,39.8,35350.0,90719.0,483297.0,310448.0,75739.0,241259.0,48404.0,52340.0,643.0,1,5,2021


In [23]:
# Calculate Male Did Not Work Rate
census_pd_combined = census_pd_combined.assign(
    **{
        "Male DNW %": lambda df: 100 * df["Total Male Did Not Work"] / df["Total Male Workforce"],
        "Female DNW %": lambda df: 100 * df["Total Female Did Not Work"] / df["Total Female Workforce"],
    }
)

# Display DataFrame length and sample data
print(f"Number of rows in DataFrame: {len(census_pd_combined)}")
census_pd_combined.head()

# Display unique states
unique_states = census_pd_combined['state'].unique()
print(unique_states)

Number of rows in DataFrame: 440
['01' '02' '04' '05' '06' '08' '09' '10' '11' '12' '13' '15' '16' '17'
 '18' '19' '20' '21' '22' '23' '24' '25' '26' '27' '28' '29' '30' '31'
 '32' '33' '34' '35' '36' '37' '38' '39' '40' '41' '42' '44' '45' '46'
 '47' '48' '49' '50' '51' '53' '54' '55' '56' '72']


In [24]:
# Save DataFrame to CSV file
census_pd.to_csv("census_data.csv", encoding="utf-8", index=False)

# Display the head of the DataFrame from the saved CSV file
census_pd_from_file = pd.read_csv("census_data.csv")
census_pd_from_file.head()

Unnamed: 0,NAME,B19013_001E,B01003_001E,B01002_002E,B01002_003E,B01002_001E,B19301_001E,B17001_002E,B23022_001E,B23001_088E,B23022_049E,B23022_002E,B23022_025E,B15003_023E,B25058_001E,state,congressional district,Year
0,"Congressional District 1 (116th Congress), Ala...",53606.0,722794.0,38.9,41.4,40.3,29688.0,109032.0,447518.0,301800.0,77217.0,217406.0,55953.0,33551.0,707.0,1,1,2021
1,"Congressional District 2 (116th Congress), Ala...",52194.0,689681.0,37.5,40.7,39.0,28319.0,115310.0,435275.0,287843.0,76285.0,213201.0,54707.0,31270.0,619.0,1,2,2021
2,"Congressional District 3 (116th Congress), Ala...",53090.0,731482.0,37.6,39.9,38.8,28451.0,121511.0,468161.0,304646.0,77313.0,230635.0,57921.0,35868.0,590.0,1,3,2021
3,"Congressional District 4 (116th Congress), Ala...",50931.0,702255.0,39.7,42.2,40.9,27056.0,109196.0,431360.0,285978.0,76826.0,216170.0,50766.0,24306.0,469.0,1,4,2021
4,"Congressional District 5 (116th Congress), Ala...",63180.0,751690.0,38.7,40.8,39.8,35350.0,90719.0,483297.0,310448.0,75739.0,241259.0,48404.0,52340.0,643.0,1,5,2021


In [25]:
# Save combined dataframe to CSV
census_pd_combined.to_csv("Data-Stella/census_data.csv", encoding="utf-8", index=False)