# Creating DataFrames for Each State

This file creates DataFrames for each state with 2020 Presidential Election Data and Total COVID-19 Cases as of mid-December.

In [1]:
# Imports - these are provided for you. Do not import any other packages.
import pandas as pd
import requests
import bs4
import time
from bs4 import BeautifulSoup

### Creating initial DataFrame with election data

In [2]:
# List of states to iterate through
state_list = ['Alabama','Alaska','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','Florida','Georgia','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana','Maine','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska','Nevada','New-Hampshire','New-Jersey','New-Mexico','New-York','North-Carolina','North-Dakota','Ohio','Oklahoma','Oregon','Pennsylvania','Rhode-Island','South-Carolina','South-Dakota','Tennessee','Texas','Utah','Vermont','Virginia','Washington','West Virginia','Wisconsin','Wyoming']

# Dictionary to store all DataFrames for each state
df_dictionary = {}

# Iterate through state list
for state in state_list:
    
    # Skip the special case
    if(state == 'Alaska'):
        continue
    
    # Create lists to hold county names, democrat votes and percent, republican votes and percent
    county_list, dem_per, dem_votes, rep_per, rep_votes = [], [], [], [], []
    
    # Lower case string states
    state.lower
    
    # Build URL to scrape data about election
    election_page = 'https://abcnews.go.com/Elections/' + state.lower() + '-county-presidential-election-results-2020'
    page = requests.get(election_page).text
    soup = BeautifulSoup(page, 'html.parser')
    divs = soup.findAll('div', class_='CountyResults_result')
    
    # Temporary 1 second sleep timer to not overwhelm website with requests
    time.sleep(1)
    
    # Iterate through html and find data for each county
    for div in divs:
        
        # Save county name
        name = div.find('h2').text
        
        # Special case for named county in Louisiana
        if(state == 'Louisiana' and name == 'La Salle Parish'):
            name = 'LaSalle Parish';
        county_list.append(name)
    
        # Democrats info
        democrats = div.find('tr', class_='ElectionsTable__Row ResultsTable--counting ResultsTable__row--democrats')
        dems_vote = int(democrats.find('td', class_='ElectionsTable__Cell ResultsTable__votes').text.replace(',',''))
        dems_per = int(democrats.find('td', class_='ElectionsTable__Cell ResultsTable__percent').text.replace('%',''))
    
        # Append info scraped to lists
        dem_votes.append(dems_vote)
        dem_per.append(dems_per)

        # Republicans info
        republicans = div.find('tr', class_='ElectionsTable__Row ResultsTable--counting ResultsTable__row--republicans')
        reps_vote = int(republicans.find('td', class_='ElectionsTable__Cell ResultsTable__votes').text.replace(',',''))
        reps_per = int(republicans.find('td', class_='ElectionsTable__Cell ResultsTable__percent').text.replace('%',''))
    
        # Append info scraped to lists
        rep_votes.append(reps_vote)
        rep_per.append(reps_per)
    
    # Create DataFrames from lists for current state
    df = pd.DataFrame()
    df['County'] = county_list
    df['Democrat Votes'] = dem_votes
    df['Democrat %'] = dem_per
    df['Republican Votes'] = rep_votes
    df['Republican %'] = rep_per
    df.set_index('County', inplace=True)
    
    # Save to dictionary with Key as state and Value as DataFrame
    df_dictionary[state] = df
    
    print('Saving dataframe for ' + state)

Saving dataframe for Alabama
Saving dataframe for Arizona
Saving dataframe for Arkansas
Saving dataframe for California
Saving dataframe for Colorado
Saving dataframe for Connecticut
Saving dataframe for Delaware
Saving dataframe for Florida
Saving dataframe for Georgia
Saving dataframe for Hawaii
Saving dataframe for Idaho
Saving dataframe for Illinois
Saving dataframe for Indiana
Saving dataframe for Iowa
Saving dataframe for Kansas
Saving dataframe for Kentucky
Saving dataframe for Louisiana
Saving dataframe for Maine
Saving dataframe for Maryland
Saving dataframe for Massachusetts
Saving dataframe for Michigan
Saving dataframe for Minnesota
Saving dataframe for Mississippi
Saving dataframe for Missouri
Saving dataframe for Montana
Saving dataframe for Nebraska
Saving dataframe for Nevada
Saving dataframe for New-Hampshire
Saving dataframe for New-Jersey
Saving dataframe for New-Mexico
Saving dataframe for New-York
Saving dataframe for North-Carolina
Saving dataframe for North-Dakot

### Appending population data to existing DataFrames

In [3]:
# List of state abbreviations to iterate through
state_abrv = ['AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA','HI','ID','IL','IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI','SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']

# Iterate through state list
for x in range(50):
    
    # Skip the special case
    if(state_abrv[x] == 'AK'):
        continue
    
    # Temporary 1 second sleep timer to not overwhelm website with requests
    time.sleep(1)
    
    # Build URL to scrape data about population
    population_page = 'https://worldpopulationreview.com/us-counties/states/' + state_abrv[x].lower()
    page = requests.get(population_page).text
    soup = BeautifulSoup(page, 'html.parser')
    table_pop = soup.findAll('tr')

    # Create lists to hold county names and population
    county_list, pop = [], []
    
    # Skip heading in population table
    heading = True
    
    # Iterate through html and find info for each county
    for row in table_pop:
        # Skip heading in table
        if heading:
            heading = False
            continue
    
        # Find each column in the row
        row = row.findAll('td')
    
        # Save county name
        county_name = row[0].find('a').text
        
        # Special case for accented county in New Mexico
        if state_abrv[x] == 'NM' and county_name == 'DoÃ±a Ana County':
            county_name = 'Dona Ana County'
        county_list.append(county_name)
    
        # Save population info
        pops = int(row[1].find(text=True).replace(',',''))
        pop.append(pops)

    # Create DataFrame for population for each county
    pops_df = pd.DataFrame()
    pops_df['County'] = county_list
    pops_df['Population'] = pop
    pops_df.set_index('County', inplace=True)
    
    # Combine population data to existing DataFrames with voter information
    df_dictionary[state_list[x]] = pd.concat([df_dictionary[state_list[x]],pops_df], axis=1, sort=True)
    print('Adding population to ' + state_list[x])

Adding population to Alabama
Adding population to Arizona
Adding population to Arkansas
Adding population to California
Adding population to Colorado
Adding population to Connecticut
Adding population to Delaware
Adding population to Florida
Adding population to Georgia
Adding population to Hawaii
Adding population to Idaho
Adding population to Illinois
Adding population to Indiana
Adding population to Iowa
Adding population to Kansas
Adding population to Kentucky
Adding population to Louisiana
Adding population to Maine
Adding population to Maryland
Adding population to Massachusetts
Adding population to Michigan
Adding population to Minnesota
Adding population to Mississippi
Adding population to Missouri
Adding population to Montana
Adding population to Nebraska
Adding population to Nevada
Adding population to New-Hampshire
Adding population to New-Jersey
Adding population to New-Mexico
Adding population to New-York
Adding population to North-Carolina
Adding population to North-Dakot

### Appending COVID-19 data to existing DataFrames 

In [4]:
# Iterate through list of states used earlier for election data
for state in state_list:
    
     # Temporary 1 second sleep timer to not overwhelm website with requests
    time.sleep(1)
    
    # Skip the special case
    if(state == 'Alaska'):
        continue
    
    # Build URL to scrape data about total COVID-19 cases 
    covid_page = 'https://usafacts.org/visualizations/coronavirus-covid-19-spread-map/state/' + state.lower()
    page = requests.get(covid_page).text
    soup = BeautifulSoup(page, 'html.parser')
    covid_table = soup.find('tbody').findAll('tr')
    
    # Create list to save number of total cases in each county
    total_cases = []

    # Iterate through html and find info for each county
    for row in covid_table:
        
        # Save cases info
        cases = int(row.find('td').text.replace(',',''))
        total_cases.append(cases)
    
    # Add new column 'Total COVID-19 Cases' to existing DataFrames for each state
    df_dictionary[state]['Total COVID-19 Cases'] = total_cases
    print('Adding COVID-19 data to ' + state)

Adding COVID-19 data to Alabama
Adding COVID-19 data to Arizona
Adding COVID-19 data to Arkansas
Adding COVID-19 data to California
Adding COVID-19 data to Colorado
Adding COVID-19 data to Connecticut
Adding COVID-19 data to Delaware
Adding COVID-19 data to Florida
Adding COVID-19 data to Georgia
Adding COVID-19 data to Hawaii
Adding COVID-19 data to Idaho
Adding COVID-19 data to Illinois
Adding COVID-19 data to Indiana
Adding COVID-19 data to Iowa
Adding COVID-19 data to Kansas
Adding COVID-19 data to Kentucky
Adding COVID-19 data to Louisiana
Adding COVID-19 data to Maine
Adding COVID-19 data to Maryland
Adding COVID-19 data to Massachusetts
Adding COVID-19 data to Michigan
Adding COVID-19 data to Minnesota
Adding COVID-19 data to Mississippi
Adding COVID-19 data to Missouri
Adding COVID-19 data to Montana
Adding COVID-19 data to Nebraska
Adding COVID-19 data to Nevada
Adding COVID-19 data to New-Hampshire
Adding COVID-19 data to New-Jersey
Adding COVID-19 data to New-Mexico
Adding C

# Examples of DataFrames

In [5]:
# Sanity Check. Example of what each DataFrame for each state looks like
df_dictionary['New-Jersey']

Unnamed: 0,Democrat Votes,Democrat %,Republican Votes,Republican %,Population,Total COVID-19 Cases
Atlantic County,73808,53,64438,46,263351,9654
Bergen County,285967,58,204417,41,931588,40483
Burlington County,154595,59,103345,39,445953,16874
Camden County,175065,66,86207,33,506589,23633
Cape May County,23941,41,33158,57,91632,1970
Cumberland County,32742,52,28952,46,148419,6218
Essex County,266820,77,75475,22,799380,41089
Gloucester County,86702,50,83340,48,291747,11472
Hudson County,181452,72,65698,26,672851,37187
Hunterdon County,39457,47,43153,51,123935,3040


In [6]:
df_dictionary['California']

Unnamed: 0,Democrat Votes,Democrat %,Republican Votes,Republican %,Population,Total COVID-19 Cases
Alameda County,617659,80,136309,18,1675900,36185
Alpine County,476,64,244,33,1169,5
Amador County,8153,36,13585,61,40099,706
Butte County,50426,49,48730,48,208033,4912
Calaveras County,10046,37,16518,61,46112,642
Colusa County,3234,41,4554,57,21676,921
Contra Costa County,416386,72,152877,26,1156530,29760
Del Norte County,4677,41,6461,56,27884,536
El Dorado County,51621,44,61838,53,194940,3754
Fresno County,193025,53,164464,45,1006250,43071


In [7]:
# Check special case for Louisiana
pd.set_option('display.max_rows', 200)
display(df_dictionary['Louisiana'])

Unnamed: 0,Democrat Votes,Democrat %,Republican Votes,Republican %,Population,Total COVID-19 Cases
Acadia Parish,5443,19,22596,80,61876,4522
Allen Parish,2108,22,7574,77,25687,2234
Ascension Parish,20399,33,40687,65,128350,6734
Assumption Parish,3833,34,7271,65,21503,1155
Avoyelles Parish,4979,29,12028,70,39886,2352
Beauregard Parish,2542,16,13575,83,37625,1582
Bienville Parish,3067,44,3891,55,13139,1002
Bossier Parish,15662,29,38074,70,127056,7384
Caddo Parish,55110,53,48021,46,237575,15245
Calcasieu Parish,25982,31,55066,67,203695,11822


In [8]:
# Check special case for New Mexico
df_dictionary['New-Mexico']

Unnamed: 0,Democrat Votes,Democrat %,Republican Votes,Republican %,Population,Total COVID-19 Cases
Bernalillo County,193757,61,116135,37,680208,34149
Catron County,595,26,1698,73,3500,51
Chaves County,6381,28,15656,70,64676,5814
Cibola County,4745,53,3975,45,26584,1905
Colfax County,2611,43,3271,54,11811,396
Curry County,4307,29,10444,69,48570,3626
De Baca County,231,26,656,73,1717,70
Dona Ana County,47957,58,32802,40,219112,15384
Eddy County,5424,23,17454,75,59191,3611
Grant County,7590,53,6553,45,26693,639
