In [1]:
import pandas as pd
import numpy as np
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler

# Load the datasets
economic_data = pd.read_csv('Economic Data.csv')
election_data = pd.read_csv('Election Data.csv')
polling_data = pd.read_csv('Polling Data.csv')


In [2]:
# Step 1: Preprocess Economic Data
economic_data['Date'] = pd.to_datetime(economic_data['Date'], format='%b-%y')
economic_data['Year'] = economic_data['Date'].dt.year
yearly_economic_data = economic_data.groupby('Year').agg({
    'Inflation Rate': 'mean',
    'UMCSI': 'mean',
    'Unemployment Rate': 'mean',
    'Wage Growth Rate': 'mean'
}).reset_index()

# Step 2: Define recession years based on historical data and add the Recession indicator
recession_years = [2008, 2020]
election_data['Recession'] = election_data['Year'].apply(lambda x: 1 if x in recession_years else 0)

# Step 3: Define incumbent party for each year (1 for Democrat, 0 for Republican) and add Incumbent_Party
election_data['Incumbent_Party'] = election_data['Year'].apply(lambda x: 1 if x in [2008, 2012, 2020] else 0)

# Step 4: Add Recession_Incumbent_Impact as a product of Recession and Incumbent_Party
election_data['Recession_Incumbent_Impact'] = election_data['Recession'] * election_data['Incumbent_Party']

# Step 5: Merge economic data and election data
election_with_economic_data = pd.merge(election_data, yearly_economic_data, on="Year", how="left")


In [3]:
# Step 6: Preprocess Polling Data
polling_data['Year'] = pd.to_datetime(polling_data['start_date'], errors='coerce').dt.year
polling_data[['GOP Pct', 'DEM Pct']] = polling_data[['GOP Pct', 'DEM Pct']].apply(pd.to_numeric, errors='coerce')

# Ensure that `Recession` and `Incumbent_Party` are in polling_data by merging from election_data
polling_data = polling_data.merge(election_data[['Year', 'Recession', 'Incumbent_Party']], on='Year', how='left')

# Set default values for 2024: no recession and Democrats as the incumbent
polling_data.loc[polling_data['Year'] == 2024, 'Recession'] = 0
polling_data.loc[polling_data['Year'] == 2024, 'Incumbent_Party'] = 1

# Fill any remaining missing values for Recession and Incumbent_Party for other years if necessary
polling_data['Recession'].fillna(0, inplace=True)
polling_data['Incumbent_Party'].fillna(0, inplace=True)

# Adjust polling values based on recession impact on the incumbent party
polling_data['Adjusted_GOP_Pct'] = polling_data.apply(
    lambda row: row['GOP Pct'] - 2 if row['Recession'] == 1 and row['Incumbent_Party'] == 1 else row['GOP Pct'], axis=1
)
polling_data['Adjusted_DEM_Pct'] = polling_data.apply(
    lambda row: row['DEM Pct'] - 2 if row['Recession'] == 1 and row['Incumbent_Party'] == 0 else row['DEM Pct'], axis=1
)

# Fill any missing values in Adjusted_GOP_Pct and Adjusted_DEM_Pct with original polling values
polling_data[['Adjusted_GOP_Pct', 'Adjusted_DEM_Pct']] = polling_data[['Adjusted_GOP_Pct', 'Adjusted_DEM_Pct']].fillna(
    polling_data[['GOP Pct', 'DEM Pct']]
)

# Create adjusted polling values for recession years based on incumbent impact
polling_data['Adjusted_GOP_Pct'] = polling_data.apply(
    lambda row: row['GOP Pct'] - 2 if row['Recession'] == 1 and row['Incumbent_Party'] == 1 else row['GOP Pct'], axis=1
)
polling_data['Adjusted_DEM_Pct'] = polling_data.apply(
    lambda row: row['DEM Pct'] - 2 if row['Recession'] == 1 and row['Incumbent_Party'] == 0 else row['DEM Pct'], axis=1
)
polling_data[['Adjusted_GOP_Pct', 'Adjusted_DEM_Pct']] = polling_data[['Adjusted_GOP_Pct', 'Adjusted_DEM_Pct']].fillna(
    polling_data[['GOP Pct', 'DEM Pct']]
)

# Group adjusted polling data by year and state
yearly_polling_data = polling_data.groupby(['state', 'Year']).agg({
    'Adjusted_GOP_Pct': 'mean',
    'Adjusted_DEM_Pct': 'mean'
}).reset_index().rename(columns={'state': 'State'})

# Step 7: Merge all datasets together
election_with_economic_polling_data = pd.merge(election_with_economic_data, yearly_polling_data, on=['State', 'Year'], how='left')



In [4]:
# Step 8: Define features and prepare training data
features = ['Recession_Incumbent_Impact', 'Inflation Rate', 'UMCSI', 'Unemployment Rate', 'Wage Growth Rate', 'Adjusted_GOP_Pct', 'Adjusted_DEM_Pct', 'GOP', 'DEM', 'Year']
model_data = election_with_economic_polling_data.dropna(subset=features)
X = model_data[features]
y_gop = model_data['GOP']  # Target for GOP percentage
y_dem = model_data['DEM']  # Target for DEM percentage

# Step 9: Scale data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 10: Train regression models
rf_gop_pct_model = RandomForestRegressor(random_state=42, max_depth=5, n_estimators=100)
rf_dem_pct_model = RandomForestRegressor(random_state=42, max_depth=5, n_estimators=100)
rf_gop_pct_model.fit(X_scaled, y_gop)
rf_dem_pct_model.fit(X_scaled, y_dem)

# Step 11: Prepare 2024 data for prediction
polling_data_2024_avg = yearly_polling_data[yearly_polling_data['Year'] == 2024]
predict_2024_data = election_with_economic_polling_data[election_with_economic_polling_data['Year'] == 2020].copy()
predict_2024_data = pd.merge(predict_2024_data, polling_data_2024_avg, on='State', how='left', suffixes=('', '_2024'))

# Add 2024 recession impact
predict_2024_data['Recession'] = 1 if 2024 in recession_years else 0
predict_2024_data['Recession_Incumbent_Impact'] = predict_2024_data['Recession'] * predict_2024_data['Incumbent_Party']

# Step 12: Impute missing values and scale prediction data
imputer = SimpleImputer(strategy='mean')
X_predict_2024_final_imputed = imputer.fit_transform(predict_2024_data[features])
X_predict_2024_final_scaled = scaler.transform(X_predict_2024_final_imputed)

# Step 13: Predict 2024 GOP and DEM percentages by state
predicted_gop_pct_2024 = rf_gop_pct_model.predict(X_predict_2024_final_scaled)
predicted_dem_pct_2024 = rf_dem_pct_model.predict(X_predict_2024_final_scaled)

# Step 14: Update DataFrame with predictions
predict_2024_data['Predicted_GOP_Pct'] = predicted_gop_pct_2024
predict_2024_data['Predicted_DEM_Pct'] = predicted_dem_pct_2024
predict_2024_data['Predicted_Winning_Party_Final'] = np.where(
    predict_2024_data['Predicted_DEM_Pct'] > predict_2024_data['Predicted_GOP_Pct'], 'Democratic', 'Republican'
)

# Calculate total electoral votes by party
projected_electoral_votes = predict_2024_data.groupby('Predicted_Winning_Party_Final')['EV'].sum()
total_dem_votes = projected_electoral_votes.get('Democratic', 0)
total_gop_votes = projected_electoral_votes.get('Republican', 0)

print(f"Total projected electoral votes for Democrats: {total_dem_votes}")
print(f"Total projected electoral votes for Republicans: {total_gop_votes}")

# Step 18: Save final predictions to CSV
output_path = '2024 Election Predictions.csv'
predict_2024_data[['State', 'EV', 'Predicted_GOP_Pct', 'Predicted_DEM_Pct', 'Predicted_Winning_Party_Final']].to_csv(output_path, index=False)

Total projected electoral votes for Democrats: 303
Total projected electoral votes for Republicans: 235


