In [1]:
# Imports - these are provided for you. Do not import any other packages.
import pandas as pd
import requests
import bs4
import time
from bs4 import BeautifulSoup

In [2]:
# List of states to iterate through
state_list = ['Alabama','Alaska','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','Florida','Georgia','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana','Maine','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska','Nevada','New-Hampshire','New-Jersey','New-Mexico','New-York','North-Carolina','North-Dakota','Ohio','Oklahoma','Oregon','Pennsylvania','Rhode-Island','South-Carolina','South-Dakota','Tennessee','Texas','Utah','Vermont','Virginia','Washington','West Virginia','Wisconsin','Wyoming']

# Dictionary to store all DataFrames for each state
df_dictionary = {}

# Iterate through state list
for state in state_list:
    # Create lists to hold county names, democrat votes and percent, republican votes and percent
    county_list, dem_per, dem_votes, rep_per, rep_votes = [], [], [], [], []
    
    # Lower case string states
    state.lower
    
    # Build URL to scrape data about election
    election_page = 'https://abcnews.go.com/Elections/' + state.lower() + '-county-presidential-election-results-2020'
    page = requests.get(election_page).text
    soup = BeautifulSoup(page, 'html.parser')
    divs = soup.findAll('div', class_='CountyResults_result')
    
    # Temporary 1 second sleep timer to not overwhelm website with requests
    time.sleep(1)
    
    # Iterate through html and find data for each county
    for div in divs:
        # Save county name
        name = div.find('h2').text
        county_list.append(name)
    
        # Democrats info
        democrats = div.find('tr', class_='ElectionsTable__Row ResultsTable--counting ResultsTable__row--democrats')
        dems_vote = int(democrats.find('td', class_='ElectionsTable__Cell ResultsTable__votes').text.replace(',',''))
        dems_per = int(democrats.find('td', class_='ElectionsTable__Cell ResultsTable__percent').text.replace('%',''))
    
        # Append info scraped to lists
        dem_votes.append(dems_vote)
        dem_per.append(dems_per)

        # Republicans info
        republicans = div.find('tr', class_='ElectionsTable__Row ResultsTable--counting ResultsTable__row--republicans')
        reps_vote = int(republicans.find('td', class_='ElectionsTable__Cell ResultsTable__votes').text.replace(',',''))
        reps_per = int(republicans.find('td', class_='ElectionsTable__Cell ResultsTable__percent').text.replace('%',''))
    
        # Append info scraped to lists
        rep_votes.append(reps_vote)
        rep_per.append(reps_per)
    
    # Create DataFrames from lists for current state
    df = pd.DataFrame()
    df['County'] = county_list
    df['Democrat Votes'] = dem_votes
    df['Democrat Percent'] = dem_per
    df['Republican Votes'] = rep_votes
    df['Republican Percent'] = rep_per
    df.set_index('County', inplace=True)
    
    # Save to dictionary with Key as state and Value as DataFrame
    df_dictionary[state] = df
    
    print('Saving dataframe for ' + state)

Saving dataframe for Alabama
Saving dataframe for Alaska
Saving dataframe for Arizona
Saving dataframe for Arkansas
Saving dataframe for California
Saving dataframe for Colorado
Saving dataframe for Connecticut
Saving dataframe for Delaware
Saving dataframe for Florida
Saving dataframe for Georgia
Saving dataframe for Hawaii
Saving dataframe for Idaho
Saving dataframe for Illinois
Saving dataframe for Indiana
Saving dataframe for Iowa
Saving dataframe for Kansas
Saving dataframe for Kentucky
Saving dataframe for Louisiana
Saving dataframe for Maine
Saving dataframe for Maryland
Saving dataframe for Massachusetts
Saving dataframe for Michigan
Saving dataframe for Minnesota
Saving dataframe for Mississippi
Saving dataframe for Missouri
Saving dataframe for Montana
Saving dataframe for Nebraska
Saving dataframe for Nevada
Saving dataframe for New-Hampshire
Saving dataframe for New-Jersey
Saving dataframe for New-Mexico
Saving dataframe for New-York
Saving dataframe for North-Carolina
Savi

In [3]:
# List of state abbreviations to iterate through
state_abrv = ['AL','AK','AZ','AR','CA','CO','CT','DE','FL','GA','HI','ID','IL','IN','IA','KS','KY','LA','ME','MD','MA','MI','MN','MS','MO','MT','NE','NV','NH','NJ','NM','NY','NC','ND','OH','OK','OR','PA','RI','SC','SD','TN','TX','UT','VT','VA','WA','WV','WI','WY']

# Iterate through state list
for x in range(50):
    
    # Temporary 1 second sleep timer to not overwhelm website with requests
    time.sleep(1)
    
    # Build URL to scrape data about population
    population_page = 'https://worldpopulationreview.com/us-counties/states/' + state_abrv[x].lower()
    page = requests.get(population_page).text
    soup = BeautifulSoup(page, 'html.parser')
    table_pop = soup.findAll('tr')

    # Create lists to hold county names and population
    county_list, pop = [], []
    
    # Skip heading in population table
    heading = True
    
    # Iterate through html and find info for each county
    for row in table_pop:
        # Skip heading in table
        if heading:
            heading = False
            continue
    
        # Find each column in the row
        row = row.findAll('td')
    
        # Save county name
        county_name = row[0].find('a').text
        county_list.append(county_name)
    
        # Save population name
        pops = int(row[1].find(text=True).replace(',',''))
        pop.append(pops)

    # Create DataFrame for population for each county
    pops_df = pd.DataFrame()
    pops_df['County'] = county_list
    pops_df['Population'] = pop
    pops_df.set_index('County', inplace=True)
    
    # Combine population data to existing DataFrames with voter information
    df_dictionary[state_list[x]] = pd.concat([df_dictionary[state_list[x]],pops_df], axis=1, sort=True)
    print('Adding population to ' + state_list[x])

Adding population to Alabama
Adding population to Alaska
Adding population to Arizona
Adding population to Arkansas
Adding population to California
Adding population to Colorado
Adding population to Connecticut
Adding population to Delaware
Adding population to Florida
Adding population to Georgia
Adding population to Hawaii
Adding population to Idaho
Adding population to Illinois
Adding population to Indiana
Adding population to Iowa
Adding population to Kansas
Adding population to Kentucky
Adding population to Louisiana
Adding population to Maine
Adding population to Maryland
Adding population to Massachusetts
Adding population to Michigan
Adding population to Minnesota
Adding population to Mississippi
Adding population to Missouri
Adding population to Montana
Adding population to Nebraska
Adding population to Nevada
Adding population to New-Hampshire
Adding population to New-Jersey
Adding population to New-Mexico
Adding population to New-York
Adding population to North-Carolina
Addi

In [4]:
# Sanity Check. Example of what each DataFrame for each state looks like
df_dictionary['New-Jersey']

Unnamed: 0,Democrat Votes,Democrat Percent,Republican Votes,Republican Percent,Population
Atlantic County,73808,53,64438,46,263351
Bergen County,285967,58,204417,41,931588
Burlington County,154595,59,103345,39,445953
Camden County,175065,66,86207,33,506589
Cape May County,23941,41,33158,57,91632
Cumberland County,32742,52,28952,46,148419
Essex County,266820,77,75475,22,799380
Gloucester County,86702,50,83340,48,291747
Hudson County,181452,72,65698,26,672851
Hunterdon County,39457,47,43153,51,123935


In [5]:
df_dictionary['California']

Unnamed: 0,Democrat Votes,Democrat Percent,Republican Votes,Republican Percent,Population
Alameda County,617659,80,136309,18,1675900
Alpine County,476,64,244,33,1169
Amador County,8153,36,13585,61,40099
Butte County,50426,49,48730,48,208033
Calaveras County,10046,37,16518,61,46112
Colusa County,3234,41,4554,57,21676
Contra Costa County,416386,72,152877,26,1156530
Del Norte County,4677,41,6461,56,27884
El Dorado County,51621,44,61838,53,194940
Fresno County,193025,53,164464,45,1006250
