###### Census API

This was created to request total population for the top 201 cities in the US. We already have the list of 201 cities but with this data we will also get the home ownership for detached 1 unit, as well as attached 1 unit, 2unit duplexes, 3-4 units (tri and fourplexes). 

Reason we are getting this information vs just using the population is because major cities like New York has a huge population but the home ownership rate is low. The main target market for solar company's target market is going to be home owners. 




In [10]:
import requests
import pandas as pd
import time

#KEY
api_key = "275b5f346b7d21848c7d080e4d742ee5c7c45d9c"

# Variables found on Census documentation. Might not need all of them.
variables = [
    "B01003_001E",  # Total Population
    "B19013_001E",  # Median Household Income
    "B25077_001E",  # Median Home Value
    "B25024_002E",  # 1-unit, detached
    "B25024_003E",  # 1-unit, attached
    "B25024_004E",  # 2 units
    "B25024_005E",  # 3 or 4 units
    "B25024_006E",  # 5 to 9 units
    "B25024_007E",  # 10 to 19 units
    "B25024_008E",  # 20 or more units
    "B25024_001E",  # Total housing units
]

# Cities of interest
cities_of_interest = [

"New York",
"Los Angeles",
"Chicago",
"Houston",
"Phoenix",
"Philadelphia",
"San Antonio",
"San Diego",
"Dallas",
"Jacksonville",
"Fort Worth",
"Austin",
"San Jose",
"Charlotte",
"Columbus",
"Indianapolis",
"San Francisco",
"Seattle",
"Denver",
"Oklahoma City",
"Nashville",
"Washington",
"El Paso",
"Las Vegas",
"Boston",
"Detroit",
"Portland",
"Louisville",
"Memphis",
"Baltimore",
"Albuquerque",
"Milwaukee",
"Tucson",
"Fresno",
"Sacramento",
"Atlanta",
"Mesa",
"Kansas City",
"Colorado Springs",
"Raleigh",
"Omaha",
"Miami",
"Virginia Beach",
"Long Beach",
"Oakland",
"Minneapolis",
"Bakersfield",
"Tulsa",
"Tampa",
"Arlington",
"Aurora",
"Wichita",
"Cleveland",
"New Orleans",
"Henderson",
"Honolulu",
"Anaheim",
"Orlando",
"Riverside",
"Lexington",
"Stockton",
"Irvine",
"Corpus Christi",
"Cincinnati",
"Santa Ana",
"Greensboro",
"Pittsburgh",
"Newark",
"St. Paul",
"Durham",
"Lincoln",
"North Las Vegas",
"Jersey City",
"Plano",
"Anchorage",
"Madison",
"Chandler",
"Reno",
"Gilbert",
"St. Louis",
"Chula Vista",
"Buffalo",
"Fort Wayne",
"Lubbock",
"St. Petersburg",
"Toledo",
"Laredo",
"Port St. Lucie",
"Glendale",
"Chesapeake",
"Winston",
"Irving",
"Scottsdale",
"Garland",
"Enterprise",
"Arlington",
"Boise",
"Cape Coral",
"Frisco",
"Richmond",
"Spokane",
"Norfolk",
"Huntsville",
"Fremont",
"San Bernardino",
"Tacoma",
"Spring Valley",
"Santa Clarita",
"Hialeah",
"Modesto",
"McKinney",
"Fontana",
"Baton Rouge",
"Moreno Valley",
"Salt Lake City",
"Sioux Falls",
"Fayetteville",
"Des Moines",
"Worcester",
"Yonkers",
"Rochester",
"Little Rock",
"Grand Prairie",
"Tallahassee",
"Amarillo",
"Peoria",
"Augusta",
"Knoxville",
"Columbus",
"Sunrise Manor",
"Vancouver",
"Oxnard",
"Overland Park",
"Grand Rapids",
"Birmingham",
"Montgomery",
"Tempe",
"Brownsville",
"Providence",
"Huntington Beach",
"Paradise",
"Chattanooga",
"Akron",
"Clarksville",
"Fort Lauderdale",
"Ontario",
"Glendale",
"Newport News",
"Cary",
"Mobile",
"Elk Grove",
"Eugene",
"Salem",
"Aurora",
"Santa Rosa",
"Shreveport",
"Rancho Cucamonga",
"Pembroke Pines",
"Springfield",
"Fort Collins",
"Murfreesboro",
"Oceanside",
"Garden Grove",
"Denton",
"Lancaster",
"Surprise",
"Roseville",
"Killeen",
"Corona",
"Palmdale",
"Salinas",
"Charleston",
"Macon",
"Lakewood",
"Paterson",
"Hollywood",
"Alexandria",
"Hayward",
"Springfield",
"Kansas City",
"Bellevue",
"Sunnyvale",
"Joliet",
"Naperville",
"Olathe",
"Bridgeport",
"McAllen",
"Savannah",
"Escondido",
"Gainesville",
"Mesquite",
"Waco",
"Visalia",
"Thornton",
"Rockford",
"Syracuse",
"Pasadena",
"Columbia",
"Pomona",
"Palm Bay",


]


combined_df = pd.DataFrame()

# Loop for each city again
for city in cities_of_interest:
    print(f"Fetching data for {city}...")
    
    # URL
    base_url = f"https://api.census.gov/data/2021/acs/acs5?get=NAME,{','.join(variables)}&for=place:*&in=state:*&key={api_key}"
    
    response = requests.get(base_url)
    
    if response.status_code == 200:
        data = response.json()
        columns = data[0]
        records = data[1:]
        

        df = pd.DataFrame(records, columns=columns)
        
        filtered_df = df[df['NAME'].str.contains(city, case=False, na=False)]
        
        for col in variables:
            filtered_df.loc[:, col] = pd.to_numeric(filtered_df[col], errors='coerce')
        
        combined_df = pd.concat([combined_df, filtered_df], ignore_index=True)
    
    else:
        print(f"Error: {response.status_code} - {response.text}")
    
    # Added Sleep. Note this request took about 9 hours to complete.
    time.sleep(120)


combined_df.to_csv("CompleteFinal_combined_cities_census_data.csv", index=False)
print("Data fetching complete. Combined data saved to 'combined_cities_census_data.csv'.")


Fetching data for New York...
Fetching data for Los Angeles...
Fetching data for Chicago...
Fetching data for Houston...
Fetching data for Phoenix...
Fetching data for Philadelphia...
Fetching data for San Antonio...
Fetching data for San Diego...
Fetching data for Dallas...
Fetching data for Jacksonville...
Fetching data for Fort Worth...
Fetching data for Austin...
Fetching data for San Jose...
Fetching data for Charlotte...
Fetching data for Columbus...
Fetching data for Indianapolis...
Fetching data for San Francisco...
Fetching data for Seattle...
Fetching data for Denver...
Fetching data for Oklahoma City...
Fetching data for Nashville...
Fetching data for Washington...
Fetching data for El Paso...
Fetching data for Las Vegas...
Fetching data for Boston...
Fetching data for Detroit...
Fetching data for Portland...
Fetching data for Louisville...
Fetching data for Memphis...
Fetching data for Baltimore...
Fetching data for Albuquerque...
Fetching data for Milwaukee...
Fetching dat