In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from census import Census
from us import states

# Load the dataset
file_path = '../data/project_data.csv'  # Replace with your dataset path
data = pd.read_csv(file_path)

# Prepare features and target
X = data.drop(columns=['Presidential_Winner', 'Turnout', 'District', 'Representative_Winner', 'Year'])
y = data['Presidential_Winner']

# Encode target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)  # Encode 'D', 'R', etc., to integers

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the final neural network model
def build_final_model():
    model = Sequential()
    model.add(Dense(128, input_dim=X_scaled.shape[1], activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))  # Sigmoid for binary classification
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

final_model = build_final_model()

# Train the model on the entire dataset
final_model.fit(X_scaled, y_encoded, epochs=100, batch_size=32, verbose=1)

# Function to fetch current district data
API_KEY = '8206bc1789ae1bb3348218afaa09fa0f680744f5'
c = Census(API_KEY)

def get_demographics_data(state, district, year):
    state_fips = states.lookup(state).fips
    data = c.acs5.state_congressional_district(
        ('NAME', 'B01003_001E', 'B02001_002E', 'B02001_003E', 'B02001_004E',
         'B02001_005E', 'B02001_006E', 'B02001_007E', 'B02001_008E', 'B01001_007E',
         'B01001_008E', 'B01001_009E', 'B01001_010E', 'B01001_011E', 'B01001_012E',
         'B01001_013E', 'B01001_014E', 'B01001_015E', 'B01001_016E', 'B01001_017E',
         'B01001_018E', 'B01001_019E', 'B01001_020E', 'B01001_021E', 'B01001_022E',
         'B01001_023E', 'B01001_024E', 'B01001_025E', 'B01001_002E', 'B01001_026E',
         'B19013_001E', 'B15003_001E', 'B23025_005E'), state_fips, district, year=year)

    df = pd.DataFrame(data)
    df = df.rename(columns={
        'NAME': 'District', 'B01003_001E': 'Total_Population',
        'B02001_002E': 'White_Alone', 'B02001_003E': 'Black_or_African_American_Alone',
        'B02001_004E': 'American_Indian_and_Alaska_Native_Alone', 'B02001_005E': 'Asian_Alone',
        'B02001_006E': 'Native_Hawaiian_and_Other_Pacific_Islander_Alone',
        'B02001_007E': 'Some_Other_Race_Alone', 'B02001_008E': 'Two_or_More_Races',
        'B01001_002E': 'Male_Population', 'B01001_026E': 'Female_Population',
        'B19013_001E': 'Median_Household_Income', 'B15003_001E': 'Educational_Attainment',
        'B23025_005E': 'Unemployment'
    })

    df['18-34'] = (df['B01001_007E'] + df['B01001_008E'] + df['B01001_009E'] +
                   df['B01001_010E'] + df['B01001_011E'] + df['B01001_012E'])
    df['35-64'] = (df['B01001_013E'] + df['B01001_014E'] + df['B01001_015E'] +
                   df['B01001_016E'] + df['B01001_017E'] + df['B01001_018E'] +
                   df['B01001_019E'])
    df['65 and older'] = (df['B01001_020E'] + df['B01001_021E'] + df['B01001_022E'] +
                          df['B01001_023E'] + df['B01001_024E'] + df['B01001_025E'])
    df['Male_Percentage'] = (df['Male_Population'] / df['Total_Population']) * 100
    df['Female_Percentage'] = (df['Female_Population'] / df['Total_Population']) * 100
    df['White_Percentage'] = (df['White_Alone'] / df['Total_Population']) * 100
    df['Black_Percentage'] = (df['Black_or_African_American_Alone'] / df['Total_Population']) * 100
    df['American_Indian_Percentage'] = (df['American_Indian_and_Alaska_Native_Alone'] / df['Total_Population']) * 100
    df['Asian_Percentage'] = (df['Asian_Alone'] / df['Total_Population']) * 100
    df['Native_Hawaiian_Percentage'] = (df['Native_Hawaiian_and_Other_Pacific_Islander_Alone'] / df['Total_Population']) * 100
    df['Other_Race_Percentage'] = (df['Some_Other_Race_Alone'] / df['Total_Population']) * 100
    df['Two_or_More_Races_Percentage'] = (df['Two_or_More_Races'] / df['Total_Population']) * 100

    df = df[['Total_Population', '18-34', '35-64', '65 and older', 'Male_Population',
             'Female_Population', 'Male_Percentage', 'Female_Percentage', 'White_Percentage',
             'Black_Percentage', 'American_Indian_Percentage', 'Asian_Percentage',
             'Native_Hawaiian_Percentage', 'Other_Race_Percentage', 'Two_or_More_Races_Percentage',
             'Median_Household_Income', 'Educational_Attainment', 'Unemployment']]

    return df

# Fetch current district data
current_data = get_demographics_data('IA', '01', 2022)

# Scale the current district data
current_data_scaled = scaler.transform(current_data)

# Predict the presidential winner for the current district
current_prediction = final_model.predict(current_data_scaled)
predicted_class = (current_prediction > 0.5).astype("int32")
predicted_label = label_encoder.inverse_transform(predicted_class.flatten())

print("Predicted Presidential Winner:", predicted_label[0])


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 457ms/step - accuracy: 0.5000 - loss: 0.6960
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6250 - loss: 0.6370
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6250 - loss: 0.6458
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6875 - loss: 0.6288
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.8750 - loss: 0.4830
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.8125 - loss: 0.5133
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.8125 - loss: 0.4843
Epoch 8/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.8750 - loss: 0.4451
Epoch 9/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m