In [41]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
import pandas as pd
from matplotlib import style
style.use("ggplot")
import plotly.express as px
from pandas import DataFrame

In [10]:
features = [
    "females",
    "males",
    "hispanic",
    "not hispanic",
    "White Alone",
    "Black or African American Alone",
    "American Indian or Alaska Native Alone",
    "Asian Alone",
    "Native Hawaiian and Other Pacific Islander Alone",
    "Two or more races",
    "Age: [0-10]",
    "Age: [11-20]",
    "Age: [21-30]",
    "Age: [31-40]",
    "Age: [41-50]",
    "Age: [51-60]",
    "Age: [61-70]",
    "Age: [71-80]",
    "Age: [81-84]",
    "Age: [85]+",
    "GDP Per Year (Normalized)",
    "Personal income per capita (Normalized)",
    "Personal Consumption expenditure per capita (Normalized)",
    "Presidential Approval Rating",
    "GOP/Total Senate Seats pre-election",
    "GOP/Total House Seats pre-election"
]

In [4]:
labels = [
    "GOP Votes/Total Votes for Presidential Election",
    "DNC Votes/Total Votes for Presidential Election",
    "1 = Voted GOP, 0 = Voted DNC"
]

In [35]:
states = ["Alaska",
          "Alabama",
          "Arkansas",
          "Arizona",
          "California",
          "Colorado",
          "Connecticut",
          "Delaware",
          "Florida",
          "Georgia",
          "Hawaii",
          "Iowa",
          "Idaho",
          "Illinois",
          "Indiana",
          "Kansas",
          "Kentucky",
          "Louisiana",
          "Massachusetts",
          "Maryland",
          "Maine",
          "Michigan",
          "Minnesota",
          "Missouri",
          "Mississippi",
          "Montana",
          "North Carolina",
          "North Dakota",
          "Nebraska",
          "New Hampshire",
          "New Jersey",
          "New Mexico",
          "Nevada",
          "New York",
          "Ohio",
          "Oklahoma",
          "Oregon",
          "Pennsylvania",
          "Rhode Island",
          "South Carolina",
          "South Dakota",
          "Tennessee",
          "Texas",
          "Utah",
          "Virginia",
          "Vermont",
          "Washington",
          "Wisconsin",
          "West Virginia",
          "Wyoming"
         ]

In [104]:
states_abbrevs = ["AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DE", "FL", "GA", 
          "HI", "IA", "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", 
          "ME", "MI", "MN", "MO", "MS", "MT", "NC", "ND", "NE", "NH", 
          "NJ", "NM", "NV", "NY", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI", "WV", "WY"]

In [7]:
data_df_train = pd.read_csv("combined-data_no-states_train.csv")

In [8]:
data_df_test = pd.read_csv("combined-data_no-states_test.csv")

In [23]:
X_train = np.array(data_df_train[features].values)
y_gop_train = (data_df_train[labels[0]].values.tolist())
y_dnc_train = (data_df_train[labels[1]].values.tolist())
y_binary_train = (data_df_train[labels[2]].values.tolist())

In [24]:
X_test = np.array(data_df_test[features].values)
y_gop_test = (data_df_test[labels[0]].values.tolist())
y_dnc_test = (data_df_test[labels[1]].values.tolist())
y_binary_test = (data_df_test[labels[2]].values.tolist())

In [31]:
from sklearn import preprocessing
from sklearn import utils
lab_enc = preprocessing.LabelEncoder()

encoded_gop_train = lab_enc.fit_transform(y_gop_train)
encoded_dnc_train = lab_enc.fit_transform(y_dnc_train)

encoded_gop_test = lab_enc.fit_transform(y_gop_test)
encoded_dnc_test = lab_enc.fit_transform(y_dnc_test)

In [47]:
from sklearn.svm import SVC
svclassifier = SVC(kernel='linear')
svclassifier.fit(X_train, y_binary_train)

SVC(kernel='linear')

In [48]:
y_pred = svclassifier.predict(X_test)

In [49]:
from sklearn.metrics import classification_report, confusion_matrix
print(confusion_matrix(y_binary_test,y_pred))
print(classification_report(y_binary_test,y_pred))

[[19  6]
 [ 5 20]]
              precision    recall  f1-score   support

           0       0.79      0.76      0.78        25
           1       0.77      0.80      0.78        25

    accuracy                           0.78        50
   macro avg       0.78      0.78      0.78        50
weighted avg       0.78      0.78      0.78        50



In [105]:
df_states = DataFrame(states,columns=['State'])
df_y_pred = DataFrame(y_pred, columns=['y_pred'])
df_y_binary_test = DataFrame(y_binary_test, columns=['y_binary_test'])
df_states_abbrevs = DataFrame(states_abbrevs, columns=['states_abbrevs'])

In [61]:
array5 = []
i = 0

while i < len(y_pred):
    if (int(df_y_pred._get_value(i, "y_pred")) != int(df_y_binary_test._get_value(i, "y_binary_test"))):
        array5.append(0)
    else:
        array5.append(1)
    i += 1 
    
correct = pd.DataFrame(data=array5, columns=["correct"])

In [107]:
result = pd.concat([df_states, df_states_abbrevs, df_y_pred, df_y_binary_test, correct], axis=1)
result.head()

Unnamed: 0,State,states_abbrevs,y_pred,y_binary_test,correct
0,Alaska,AK,1.0,1,1
1,Alabama,AL,1.0,1,1
2,Arkansas,AR,0.0,1,0
3,Arizona,AZ,1.0,0,0
4,California,CA,0.0,0,1


In [90]:
import json

with open('states_geojson.json') as f:
    data = json.load(f)

In [111]:
fig = px.choropleth(
                    result,
                    locations=result['states_abbrevs'],
                    locationmode="USA-states",
                    color='correct',
                    color_continuous_scale="YlGn",
                    range_color=[0,1],
                    scope="usa",
                    hover_name="State")
fig.update_geos(fitbounds='locations', visible=False)
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()