In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
house_df = pd.read_csv('../Data/1976-2022-house.csv')
print(house_df.shape)
house_df = house_df[['year', 'state', 'state_po', 'party', 'candidatevotes', 'totalvotes', 'unofficial', 'district']]
house_df.head()

(32452, 20)


Unnamed: 0,year,state,state_po,party,candidatevotes,totalvotes,unofficial,district
0,1976,ALABAMA,AL,DEMOCRAT,58906,157170,False,1
1,1976,ALABAMA,AL,REPUBLICAN,98257,157170,False,1
2,1976,ALABAMA,AL,,7,157170,False,1
3,1976,ALABAMA,AL,DEMOCRAT,66288,156362,False,2
4,1976,ALABAMA,AL,REPUBLICAN,90069,156362,False,2


In [5]:
# Won't count votes if the party is NaN, so replace with OTHER
house_df['party'].fillna('OTHER', inplace=True)
house_df.head()

Unnamed: 0,year,state,state_po,party,candidatevotes,totalvotes,unofficial,district
0,1976,ALABAMA,AL,DEMOCRAT,58906,157170,False,1
1,1976,ALABAMA,AL,REPUBLICAN,98257,157170,False,1
2,1976,ALABAMA,AL,OTHER,7,157170,False,1
3,1976,ALABAMA,AL,DEMOCRAT,66288,156362,False,2
4,1976,ALABAMA,AL,REPUBLICAN,90069,156362,False,2


In [6]:
print(house_df['state_po'].unique())

['AL' 'AK' 'AZ' 'AR' 'CA' 'CO' 'CT' 'DE' 'FL' 'GA' 'HI' 'ID' 'IL' 'IN'
 'IA' 'KS' 'KY' 'LA' 'ME' 'MD' 'MA' 'MI' 'MN' 'MS' 'MO' 'MT' 'NE' 'NV'
 'NH' 'NJ' 'NM' 'NY' 'NC' 'ND' 'OH' 'OK' 'OR' 'PA' 'RI' 'SC' 'SD' 'TN'
 'TX' 'UT' 'VT' 'VA' 'WA' 'WV' 'WI' 'WY' 'DC']


In [7]:
house_df_2020_on = house_df.query('year >= 2020 and unofficial == False')
print(house_df_2020_on.shape)
house_df_2020_on.head()

(2816, 8)


Unnamed: 0,year,state,state_po,party,candidatevotes,totalvotes,unofficial,district
29636,2020,ALABAMA,AL,DEMOCRAT,116949,329075,False,1
29637,2020,ALABAMA,AL,REPUBLICAN,211825,329075,False,1
29638,2020,ALABAMA,AL,OTHER,301,329075,False,1
29639,2020,ALABAMA,AL,REPUBLICAN,197996,303569,False,2
29640,2020,ALABAMA,AL,DEMOCRAT,105286,303569,False,2


In [8]:
votes_by_year_state_party = house_df_2020_on.groupby(['year', 'state_po', 'party'])['candidatevotes'].sum()
# bad = house_df_2020_on.groupby(['year', 'state_po', 'party'])['candidatevotes'].value_counts()
votes_by_year_state_party[2020]['CO']
# print(votes_by_year_state_party[2020]['CO']['DEMOCRAT'])
# print(bad[2020]['CO'])
# print(votes_by_year_state_party[2020]['CO'])
# print(votes_by_year_state_party[2020]['CO']['DEMOCRAT'].keys())
# print(votes_by_year_state_party[2020]['CO'].keys())

party
APPROVAL VOTING PARTY         1441
DEMOCRAT                   1679052
LIBERTARIAN                  79100
REPUBLICAN                 1378248
UNAFFILIATED                  3708
UNITY PARTY OF COLORADO      23401
Name: candidatevotes, dtype: int64

In [9]:
years = [2020, 2022]
states = ['AL', 'AK', 'AZ', 'AR', 'CA', 'CO', 'CT', 'DE', 'FL', 'GA', 'HI', 'ID', 'IL', 'IN', 
        'IA', 'KS', 'KY', 'LA', 'ME', 'MD', 'MA', 'MI', 'MN', 'MS', 'MO', 'MT', 'NE', 'NV', 
        'NH', 'NJ', 'NM', 'NY', 'NC', 'ND', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 
        'TX', 'UT', 'VT', 'VA', 'WA', 'WV', 'WI', 'WY']
main_parties = ['DEMOCRAT', 'REPUBLICAN']

In [10]:
# make a dictionary where the first key:value is year:rest
# next is state:rest
# final is one of three parties (Democrat, Republican, Other) and their vote counts
# votes => first is year:states => states:parties => parties:votes
# votes[year][state][party] = votes
votes = {}
for year in years:
    votes[year] = {}
    for state in states:
        votes[year][state] = {}
        parties = list(votes_by_year_state_party[year][state].keys())
        for party in parties:
            if party in main_parties:
                votes[year][state][party] = votes_by_year_state_party[year][state][party]
            else:
                try:
                    votes[year][state]['OTHER'] += votes_by_year_state_party[year][state][party]
                except:
                    votes[year][state]['OTHER'] = votes_by_year_state_party[year][state][party] 

print(votes[2020])

{'AL': {'DEMOCRAT': 608809, 'OTHER': 26838, 'REPUBLICAN': 1416012}, 'AK': {'DEMOCRAT': 159856, 'OTHER': 1183, 'REPUBLICAN': 192126}, 'AZ': {'DEMOCRAT': 1629318, 'REPUBLICAN': 1638516, 'OTHER': 415}, 'AR': {'DEMOCRAT': 330485, 'OTHER': 20645, 'REPUBLICAN': 828266}, 'CA': {'DEMOCRAT': 11084234, 'REPUBLICAN': 5640667}, 'CO': {'OTHER': 107650, 'DEMOCRAT': 1679052, 'REPUBLICAN': 1378248}, 'CT': {'DEMOCRAT': 1022792, 'OTHER': 73485, 'REPUBLICAN': 676650}, 'DE': {'DEMOCRAT': 281382, 'OTHER': 10496, 'REPUBLICAN': 196392}, 'FL': {'DEMOCRAT': 4942287, 'OTHER': 53340, 'REPUBLICAN': 5469163}, 'GA': {'DEMOCRAT': 2393089, 'REPUBLICAN': 2490396, 'OTHER': 126}, 'HI': {'OTHER': 69807, 'DEMOCRAT': 354762, 'REPUBLICAN': 155215}, 'ID': {'OTHER': 32973, 'DEMOCRAT': 255531, 'REPUBLICAN': 561405}, 'IL': {'DEMOCRAT': 3355487, 'OTHER': 104403, 'REPUBLICAN': 2416929}, 'IN': {'DEMOCRAT': 1194901, 'OTHER': 62798, 'REPUBLICAN': 1738745}, 'IA': {'DEMOCRAT': 762271, 'OTHER': 78579, 'REPUBLICAN': 859418}, 'KS': {'DEM

In [11]:
print(votes[2020]['AL'])
# print(len(house_df_2020_on.query('year == 2020 and state_po == "AL"')))
# house_df_2020_on.query('year == 2020 and state_po == "AL"')

{'DEMOCRAT': 608809, 'OTHER': 26838, 'REPUBLICAN': 1416012}


# COVID-19 Data

In [19]:
covid_df = pd.read_csv('../Data/COVID-19_Reported.csv')
print(covid_df.shape)
covid_df.head()
covid_df = covid_df[['state', 'date', 'deaths_covid']]

(79823, 135)


In [20]:
covid_df['month'] = covid_df['date'].str.split('/').str[0]
covid_df['year'] = covid_df['date'].str.split('/').str[2]

In [21]:
print(min(covid_df['month'].unique()))
print(min(covid_df['year'].unique()))

1
2020


In [42]:
deaths_by_year_month_state = covid_df.groupby(['year', 'month', 'state'])['deaths_covid'].sum()
print(deaths_by_year_month_state['2020']['4'].keys())

Index(['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA', 'HI',
       'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME', 'MI', 'MN',
       'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM', 'NV', 'NY', 'OH',
       'OK', 'OR', 'PA', 'PR', 'RI', 'SC', 'SD', 'TN', 'TX', 'UT', 'VA', 'VI',
       'VT', 'WA', 'WI', 'WV', 'WY'],
      dtype='object', name='state')


In [40]:
len_states = []
for i in range(1, 13):
    len_states.append(len(deaths_by_year_month_state['2023'][str(i)].keys()))

len_states

[54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54, 54]