In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.chrome.options import Options

import scipy
import pandas
import numpy
from numpy import nan
from collections import defaultdict
import re


In [3]:

states_scores = defaultdict(list)
all_state_lists = []
def predict_next_election(state, state_list):
    new_state_list = []
    for item in state_list:
        new_list = item.tolist()
        new_state_list.append(new_list[0])
    new_state_list = [x for x in new_state_list if not numpy.isnan(x)]
    num_of_prev_elections = len(new_state_list)
    x_cords = list(range(1,num_of_prev_elections+1))
    
    slope, intercept, r_value, p_value, std_err = scipy.stats.linregress(x_cords, new_state_list)
    
    
    if r_value > 0:
        print(f"Democrats will win in {state} next time")
        all_state_lists.append([state,"Democrats"])
    else:
        print(f"Republicans will win in {state} next time")
        all_state_lists.append([state, "Republicans"])

def calculate_country_swing(prev_election, election):
    total_votes = prev_election['total_votes'].sum()
    rep_votes = prev_election['rep_votes'].sum()
    dem_votes = prev_election['dem_votes'].sum()
    curr_total_votes = election['total_votes'].sum()
    curr_rep_votes = election['rep_votes'].sum()
    curr_dem_votes = election['dem_votes'].sum()

    rep_percent = (rep_votes/total_votes)*100
    dem_percent = (dem_votes/total_votes)*100
    curr_rep_percent = (curr_rep_votes/curr_total_votes)*100
    curr_dem_percent = (curr_dem_votes/curr_total_votes)*100
    return (curr_dem_percent - curr_rep_percent) - (dem_percent - rep_percent)

def calculate_state_swing_score(prev_election, election):
    country_swing = calculate_country_swing(prev_election, election)
    for index, row in prev_election.iterrows():
        state_name = row['state']
        prev_state = prev_election.loc[prev_election['state'] == state_name]
        state = election.loc[election['state'] == state_name]
        state_swing = (state['dem_percent'] - state['rep_percent']) - (prev_state['dem_percent'] - prev_state['rep_percent'])
        states_scores[state_name].append(state_swing-country_swing)

def process_row(row):
    i = 1
    for word in row[1:]:
        if word is not nan and word.replace(',','').isnumeric():
            row[i] = int(word.replace(',',''))
        if word == '--' or word == '' or word == '*':
            row[i] = 0
        if word.find('.') != -1 and word not in ['Col.','Col.*']:
            if word.find('%') != -1:
                word = word[0:-1]
            row[i] = float(word)
        i += 1

    return row

def get_index_of_first_int(row):
    i = 0
    for word in row:
        if word.replace(',','').isnumeric():
            return i
        i += 1

    return i

def get_specific_election(year):
    return f"https://www.presidency.ucsb.edu/statistics/elections/{year}"

def get_election_dataframe(year):
    driver_exe = 'chromedriver'
    options = Options()
    options.add_argument("--headless")
    driver = webdriver.Chrome(driver_exe, options=options)
    URL = get_specific_election(year)
    driver.get(URL)
    a = driver.find_elements(By.XPATH, "//table[@class='table table-responsive']")
    results = a[0].text
    republican_indx = results.find('Republican')
    democratic_indx = results.find('Democratic')
    only_votes = results[results.find('Alabama'):]
    state_per_row = only_votes.split('\n')
    parse_rows = [row.split(' ') for row in state_per_row]
    dataframe_list = []
    for row in parse_rows:
        row = list(dict.fromkeys(row))
        indx = get_index_of_first_int(row)
        row = process_row(row)
        if republican_indx < democratic_indx:
            metrics = row[indx:]
        else:
            metrics = [row[indx]]+ row[indx+4:indx+7] + row[indx+1:indx+4] + row[indx+7:]
        state = row[0:indx]
        state = ' '.join(state)
        pattern = r'[^A-Za-z ]+'
        state = re.sub(pattern, '', state)
        metrics.insert(0, state)
        metrics_len = len(metrics)
        if metrics_len < 14:
            buffer = 14 - metrics_len
            buffer_lst = [nan] * buffer
            metrics += buffer_lst
        dataframe_list.append(metrics)
        if state == 'Wyoming':
            break

    columns = ['state','total_votes','rep_votes','rep_percent','rep_ev', 'dem_votes','dem_percent', 'dem_ev',
               'other_votes', 'other_percent', 'other_ev','socialist_vote', 'socialist_percent', 'socialist_ev']

    table = pandas.DataFrame(dataframe_list, columns=columns)
    return table

def iterate_over_elections():
    states_df = []
    for y in range(1900,2000,4):
 
        if y == 1976:
            continue
        states_df.append(get_election_dataframe(y))

    for i in range(1, len(states_df)):
        calculate_state_swing_score(states_df[i-1],states_df[i])


def main():
    iterate_over_elections()
    for state_name, state_score in states_scores.items():
        predict_next_election(state_name, state_score)
        

main()


pred_df = pandas.DataFrame(all_state_lists,['state','winner'])
print(pred_df)

Republicans will win in Alabama next time
Republicans will win in Arkansas next time
Democrats will win in California next time
Democrats will win in Colorado next time
Democrats will win in Connecticut next time
Democrats will win in Delaware next time
Democrats will win in Florida next time
Democrats will win in Georgia next time
Democrats will win in Idaho next time
Democrats will win in Illinois next time
Republicans will win in Indiana next time
Republicans will win in Iowa next time
Republicans will win in Kansas next time
Republicans will win in Kentucky next time
Republicans will win in Louisiana next time
Democrats will win in Maine next time
Republicans will win in Maryland next time
Republicans will win in Massachusetts next time
Democrats will win in Michigan next time
Republicans will win in Minnesota next time
Democrats will win in Mississippi next time
Democrats will win in Missouri next time
Democrats will win in Montana next time
Republicans will win in Nebraska next t

ValueError: Length of values (51) does not match length of index (2)