In [1]:
import pandas as pd
import os
import sys
import requests
from census import Census
from us import states

# Append the path of the parent directory to the system path
path = os.path.join(os.getcwd(), '..')
sys.path.append(path)

# Import the api_key from the config module
from config import api_key


c = Census(api_key, year=2021)



# State economic data

In [12]:
def extract_data(year: int):
    # Create Census object
    c = Census(api_key, year=year)

    # Run Census Search to retrieve data on all states
    # Note the addition of "B23025_005E" for unemployment count
    census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                              "B19301_001E",
                              "B17001_002E",
                              "B23025_005E"), {'for': 'state:*'})

    # Convert to DataFrame
    state_df = pd.DataFrame(census_data)

    # Column Reordering
    state_df = state_df.rename(columns={"B01003_001E": "Population",
                                          "B01002_001E": "Median Age",
                                          "B19013_001E": "Household Income",
                                          "B19301_001E": "Per Capita Income",
                                          "B17001_002E": "Poverty Count",
                                          "B23025_005E": "Unemployment Count",
                                          "NAME": "Name", "state": "State"})
    state_df['Year'] = year
    
    return state_df



In [13]:
state_df = extract_data(2013)

state_df["Poverty Rate"] = 100 * state_df["Poverty Count"].astype(int) / state_df["Population"].astype(int)

# Add in Employment Rate (Employment Count / Population)
state_df["Unemployment Rate"] = 100 * state_df["Unemployment Count"].astype(int) / state_df["Population"].astype(int)

# Final DataFrame
state_df = state_df[["Year","State", "Name", "Population", "Median Age", "Household Income",
                        "Per Capita Income", "Poverty Count", "Poverty Rate", "Unemployment Rate"]]

state_df.head()

Unnamed: 0,Year,State,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate
0,2013,1,Alabama,4799277.0,38.1,43253.0,23680.0,870631.0,18.140878,5.040968
1,2013,2,Alaska,720316.0,33.6,70760.0,32651.0,69514.0,9.650487,4.572854
2,2013,4,Arizona,6479703.0,36.3,49774.0,25358.0,1131901.0,17.468409,4.882323
3,2013,5,Arkansas,2933369.0,37.5,40768.0,22170.0,547328.0,18.658682,4.132961
4,2013,6,California,37659181.0,35.4,61094.0,29527.0,5885417.0,15.628107,5.758662


In [4]:
# Run Census Search to retrieve data on all states
# Note the addition of "B23025_005E" for unemployment count
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"), {'for': 'state:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "state": "State"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Add in Employment Rate (Employment Count / Population)
census_pd["Unemployment Rate"] = 100 * \
    census_pd["Unemployment Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["State", "Name", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate", "Unemployment Rate"]]

census_pd.head()

Unnamed: 0,State,Name,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Unemployment Rate
0,1,Alabama,4997675.0,39.3,54943.0,30458.0,769819.0,15.403543,2.489478
1,2,Alaska,735951.0,35.0,80287.0,39236.0,75016.0,10.19307,3.404303
2,4,Arizona,7079203.0,38.1,65913.0,34644.0,934911.0,13.206444,2.699668
3,5,Arkansas,3006309.0,38.3,52123.0,29210.0,468113.0,15.571021,2.452609
4,6,California,39455353.0,37.0,84097.0,41276.0,4741175.0,12.016557,3.304345


In [None]:
state_list = census_pd['Name'].tolist()

In [None]:
census_pd.dtypes

In [None]:
print(len(state_list))

# City Economic Data

In [None]:
api_key = "cc674b1444ce9b9c47be7f3bacfbdb17aab61268"

In [None]:
census_client = Census(api_key)

def retrieve_city_data(state):
    variables = [
        "NAME",  # City name
        "B01003_001E",  # Total population
        "B01002_001E",  # Median age
        "B19013_001E",  # Median household income
        "B19301_001E",  # Per capita income
        "B17001_002E",  # Poverty count
        "B23025_005E",  # Unemployment count
        "B23025_004E",  # Employment count
    ]

    # Retrieve the state FIPS code
    state_code = states.lookup(state).fips

    city_data = census_client.acs5.state_place(
        variables,
        state_code,
        Census.ALL,
        year=2021
    )
    return city_data

state = "California"  # Replace with your desired state

# Retrieve city data for the specified state
city_data = retrieve_city_data(state)

# Check the count and length of city_data
data_count = len(city_data)
data_length = len(city_data[0]) if city_data else 0

# Print the count and length
print("Data Count:", data_count)
print("Data Length:", data_length)

# Define the column names for the DataFrame
column_names = [
    "City",
    "Population",
    "Median Age",
    "Median Household Income",
    "Per Capita Income",
    "Poverty Count",
    "Unemployment Count",
    "Employment Count"
]

# Create a list to store the data
data = []

# Process the city-level data and add it to the data list
for city in city_data:
    city_info = [
        city["NAME"],
        city["B01003_001E"],
        city["B01002_001E"],
        city["B19013_001E"],
        city["B19301_001E"],
        city["B17001_002E"],
        city["B23025_005E"],
        city["B23025_004E"]
    ]
    data.append(city_info)

# Create a pandas DataFrame from the data with the column names
df_city= pd.DataFrame(data, columns=column_names)

# Print the DataFrame
df_city



# Zip Code Economic Data


In [None]:


census_client = Census(api_key)

def retrieve_zipcode_data(state):
    variables = [
        "NAME",  # ZIP code
        "B01003_001E",  # Total population
        "B19013_001E",  # Median household income
        "B17001_002E",  # Poverty count
        "B23025_005E",  # Unemployment count
        "B23025_004E",  # Employment count
    ]

    # Retrieve the state FIPS code
    state_code = states.lookup(state).fips

    zipcode_data = census_client.acs5.state_zipcode(
        variables,
        state_fips=state_code,
        zcta="*",
        year=2021  # Replace with the desired Census year
    )

    return zipcode_data



state = "California"  # Replace with your desired state

# Retrieve ZIP code data for the specified state
zipcode_data = retrieve_zipcode_data(state)

# Create a pandas DataFrame from the retrieved data
df = pd.DataFrame(zipcode_data)

# Rename the columns for better readability
column_names = {
    "NAME": "ZIP Code",
    "B01003_001E": "Population",
    "B19013_001E": "Median Household Income",
    "B17001_002E": "Poverty Count",
    "B23025_005E": "Unemployment Count",
    "B23025_004E": "Employment Count"
}
df.rename(columns=column_names, inplace=True)

df



# State Geographic Data

In [None]:

variables = [
    "NAME",  # Geographic name
    "B18101_001E",  # Disability status of the civilian noninstitutionalized population
    "B27001_001E",  # Health insurance coverage by type of coverage and age
    "B07013_001E",  # Geographical mobility in the past year for current residence
    "B08006_001E",  # Means of transportation to work by selected characteristics
    "B28002_001E",  # Presence and types of internet subscriptions in households
    # Add more social variables as per your requirements
]
def retrieve_social_data(state):
    # Retrieve the state FIPS code
    state_code = states.lookup(state).fips

    # Perform the Census API query
    data = census_client.acs5.state(
        variables,
        state_code,
        year=2021  # Replace with the desired year or remove this line to retrieve the latest available year
    )

    return data



state = "California"  # Replace with your desired state

# Retrieve social characteristic data for the specified state
social_data = retrieve_social_data(state)

# Create a pandas DataFrame from the retrieved data
df = pd.DataFrame(social_data)



# Column Reordering
df = df.rename(columns={
    "NAME": "Geographic Name",
    "B18101_001E": "Disability Status",
    "B27001_001E": "Health Insurance Coverage",
    "B07013_001E": "Geographical Mobility",
    "B08006_001E": "Means of Transportation to Work",
    "B28002_001E": "Internet Subscriptions in Households",
    # Add more column names for additional social characteristics
})

# Print the DataFrame
df

# City Demographic Data

In [None]:


census_client = Census(api_key)

def retrieve_demographic_data(state):
    variables = [
        "NAME",  # Name of the geographic area
        "B01001_001E",  # Total population
        "B01001_024E",  # Population aged 17-19
        "B01001_025E",  # Population aged 20-24
        # Add more age groups as needed
        "B08006_001E",  # Total means of transportation
        "B08006_002E",  # Car, truck, or van
        "B08006_003E",  # Public transportation (excluding taxis)
        "B08006_004E",  # Walked
        "B08006_009E",  # Bicycle
        "B08006_014E",  # Other means of transportation
    ]

    # Retrieve the state FIPS code
    state_code = states.lookup(state).fips

    data = census_client.acs5.state_county(
        variables,
        state_code,
        Census.ALL,
    )

    return data

state = "California"  # Replace with your desired state

# Retrieve demographic data for the specified state
demographic_data = retrieve_demographic_data(state)

# Create a pandas DataFrame from the retrieved data
df = pd.DataFrame(demographic_data)

# Select relevant columns for age groups and means of transportation
columns = [
    "NAME",
    "B01001_001E",
    "B01001_024E",
    "B01001_025E",
    "B08006_001E",
    "B08006_002E",
    "B08006_003E",
    "B08006_004E",
    "B08006_009E",
    "B08006_014E",
]

df = df[columns]

# Rename the columns for better readability
column_names = [
    "Name",
    "Total Population",
    "Aged 17-19",
    "Aged 20-24",
    "Total Means of Transportation",
    "Car, Truck, or Van",
    "Public Transportation",
    "Walked",
    "Bicycle",
    "Other Means of Transportation",
]

df.columns = column_names

df
