# Clean and Link datasets for 7th district of Illinois
### Load the datasets

In [68]:
import pandas as pd

people_labels_df = pd.read_csv('./Raw Datasets/Census/DECENNIALPL2020.P1-Column-Metadata.csv')
housing_labels_df = pd.read_csv('./Raw Datasets/Census/DECENNIALPL2020.H1-Column-Metadata.csv')
people_raw_df = pd.read_csv('./Raw Datasets/Census/DECENNIALPL2020.P1-Data.csv')
housing_raw_df = pd.read_csv('./Raw Datasets/Census/DECENNIALPL2020.H1-Data.csv')
votes_raw_df = pd.read_csv('./Raw Datasets/il_2020_2020_vtd.csv')
turnout_raw_df = pd.read_csv('./Raw Datasets/IL_l2_turnout_stats_vtd20.csv')

precinct_df = pd.read_csv('./Raw Datasets/IL7_precincts.csv')

### Precinct of 7th district of Illinois

Cleaning of the precincts that are not all in the same format.

In [69]:
precinct_list = precinct_df['Precinct'].tolist()
precinct_list = [str(x).replace("WARD", "Wd").replace("PRECINCT", "Pct") if "WARD" in str(x) else str(x)[:4] + str(x)[5:] for x in precinct_list]
print(len(precinct_list), "precincts")
print(precinct_list[:10])

393 precincts
['780026', '850033', '890025', '890030', '890071', 'Wd 01 Pct 21', 'Wd 02 Pct 23', 'Wd 03 Pct 02', 'Wd 03 Pct 07', 'Wd 03 Pct 19']


### Clean Census Labels

In [88]:
for label in people_labels_df['Label']:
    people_labels_df['Label'] = people_labels_df['Label'].replace(label, label.split('!!')[-1].replace(':', ''))

print("People Labels Shape: ", people_labels_df.shape)
people_labels_df.head()

People Labels Shape:  (73, 2)


Unnamed: 0,Column Name,Label
0,GEO_ID,Geography
1,NAME,Geographic Area Name
2,P1_001N,Total
3,P1_002N,Population of one race
4,P1_003N,White alone


In [71]:
for label in housing_labels_df['Label']:
    housing_labels_df['Label'] = housing_labels_df['Label'].replace(label, label.split('!!')[-1].replace(':', ''))

print("Housing Labels Shape: ", housing_labels_df.shape)
housing_labels_df.head()

Housing Labels Shape:  (5, 2)


Unnamed: 0,Column Name,Label
0,GEO_ID,Geography
1,NAME,Geographic Area Name
2,H1_001N,Total
3,H1_002N,Occupied
4,H1_003N,Vacant


### Clean Census Datasets

In [72]:
def clean_census_data(df):
    df = df.drop([0]) # Drop labels row
    for col in df.columns:
        if "Unnamed" in col:
            df = df.drop(columns=[col])

    df['GEO_ID'] = df['GEO_ID'].apply(lambda x: x.split('US')[1] if 'US' in x else x)
    df['NAME'] = df['NAME'].apply(lambda x: x.split(', Cook')[0] if 'Cook' in x else x)
    return df

def assign_labels(df, labels_df):
    for i in range(2, len(df.columns)):
        df = df.rename(columns={df.columns[i]: labels_df['Label'].iloc[i]})
    return df

def convert_to_num(df):
    for col in df.columns:
        if df[col].dtype == object and col != 'GEO_ID' and col != 'NAME':
            df[col] = df[col].astype(int)
    return df

def geoid_reduced(df):
    df['GEO_ID_REDUCED'] = df['GEO_ID'].apply(lambda x: str(x)[5:])
    return df

def only_precincts(df, precinct_list):
    df = df[df['NAME'].isin(precinct_list) | df['GEO_ID_REDUCED'].isin(precinct_list)]
    return df

In [73]:
people_df = clean_census_data(people_raw_df)
people_df = assign_labels(people_df, people_labels_df)
people_df = convert_to_num(people_df)
people_df = geoid_reduced(people_df)
people_df = only_precincts(people_df, precinct_list)
people_df = people_df.iloc[:, :27]
print("People Data Shape: ", people_df.shape)
print(people_df.dtypes)
people_df.head()

People Data Shape:  (392, 27)
GEO_ID                                                                           object
NAME                                                                             object
Total                                                                             int32
Population of one race                                                            int32
White alone                                                                       int32
Black or African American alone                                                   int32
American Indian and Alaska Native alone                                           int32
Asian alone                                                                       int32
Native Hawaiian and Other Pacific Islander alone                                  int32
Some Other Race alone                                                             int32
Population of two or more races                                                   int32
Po

Unnamed: 0,GEO_ID,NAME,Total,Population of one race,White alone,Black or African American alone,American Indian and Alaska Native alone,Asian alone,Native Hawaiian and Other Pacific Islander alone,Some Other Race alone,...,Black or African American; American Indian and Alaska Native,Black or African American; Asian,Black or African American; Native Hawaiian and Other Pacific Islander,Black or African American; Some Other Race,American Indian and Alaska Native; Asian,American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander,American Indian and Alaska Native; Some Other Race,Asian; Native Hawaiian and Other Pacific Islander,Asian; Some Other Race,Native Hawaiian and Other Pacific Islander; Some Other Race
12,17031001012,Wd 01 Pct 12,1748,1600,1409,43,1,64,0,83,...,1,4,0,0,1,0,3,0,1,0
13,17031001013,Wd 01 Pct 13,909,792,436,28,12,27,0,289,...,2,1,0,8,0,0,0,1,0,0
14,17031001014,Wd 01 Pct 14,1143,1033,841,52,6,96,0,38,...,1,3,0,3,0,0,0,0,0,0
15,17031001015,Wd 01 Pct 15,1250,1098,762,113,3,49,0,171,...,2,0,0,14,0,0,0,0,0,0
16,17031001016,Wd 01 Pct 16,1494,1330,882,31,27,102,5,283,...,3,2,0,5,0,0,6,0,0,0


In [74]:
# Manually compute the percentage by race
people_percent_df = pd.DataFrame()
people_percent_df['GEO_ID'] = people_df['GEO_ID']
people_percent_df['White alone'] = people_df['White alone'] / people_df['Population of one race']
people_percent_df['Black or African American alone'] = people_df['Black or African American alone'] / people_df['Population of one race']
people_percent_df['American Indian and Alaska Native alone'] = people_df['American Indian and Alaska Native alone'] / people_df['Population of one race']
people_percent_df['Asian alone'] = people_df['Asian alone'] / people_df['Population of one race']
people_percent_df['Native Hawaiian and Other Pacific Islander alone'] = people_df['Native Hawaiian and Other Pacific Islander alone'] / people_df['Population of one race']
people_percent_df['Some Other Race alone'] = people_df['Some Other Race alone'] / people_df['Population of one race']
people_percent_df['White; Black or African American'] = people_df['White; Black or African American'] / people_df['Population of two races']
people_percent_df['Black or African American; American Indian and Alaska Native'] = people_df['Black or African American; American Indian and Alaska Native'] / people_df['Population of two races']
people_percent_df['White; American Indian and Alaska Native'] = people_df['White; American Indian and Alaska Native'] / people_df['Population of two races']
people_percent_df['White; Asian'] = people_df['White; Asian'] / people_df['Population of two races']
people_percent_df['White; Native Hawaiian and Other Pacific Islander'] = people_df['White; Native Hawaiian and Other Pacific Islander'] / people_df['Population of two races']
people_percent_df['White; Some Other Race'] = people_df['White; Some Other Race'] / people_df['Population of two races']
people_percent_df['Black or African American; American Indian and Alaska Native'] = people_df['Black or African American; American Indian and Alaska Native'] / people_df['Population of two races']
people_percent_df['Black or African American; Asian'] = people_df['Black or African American; Asian'] / people_df['Population of two races']
people_percent_df['Black or African American; Native Hawaiian and Other Pacific Islander'] = people_df['Black or African American; Native Hawaiian and Other Pacific Islander'] / people_df['Population of two races']
people_percent_df['Black or African American; Some Other Race'] = people_df['Black or African American; Some Other Race'] / people_df['Population of two races']
people_percent_df['American Indian and Alaska Native; Asian'] = people_df['American Indian and Alaska Native; Asian'] / people_df['Population of two races']
people_percent_df['American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander'] = people_df['American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander'] / people_df['Population of two races']
people_percent_df['American Indian and Alaska Native; Some Other Race'] = people_df['American Indian and Alaska Native; Some Other Race'] / people_df['Population of two races']
people_percent_df['Asian; Native Hawaiian and Other Pacific Islander'] = people_df['Asian; Native Hawaiian and Other Pacific Islander'] / people_df['Population of two races']
people_percent_df['Asian; Some Other Race'] = people_df['Asian; Some Other Race'] / people_df['Population of two races']
people_percent_df['Native Hawaiian and Other Pacific Islander; Some Other Race'] = people_df['Native Hawaiian and Other Pacific Islander; Some Other Race'] / people_df['Population of two races']

people_percent_df.head()

Unnamed: 0,GEO_ID,White alone,Black or African American alone,American Indian and Alaska Native alone,Asian alone,Native Hawaiian and Other Pacific Islander alone,Some Other Race alone,White; Black or African American,Black or African American; American Indian and Alaska Native,White; American Indian and Alaska Native,...,White; Some Other Race,Black or African American; Asian,Black or African American; Native Hawaiian and Other Pacific Islander,Black or African American; Some Other Race,American Indian and Alaska Native; Asian,American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander,American Indian and Alaska Native; Some Other Race,Asian; Native Hawaiian and Other Pacific Islander,Asian; Some Other Race,Native Hawaiian and Other Pacific Islander; Some Other Race
12,17031001012,0.880625,0.026875,0.000625,0.04,0.0,0.051875,0.121429,0.007143,0.121429,...,0.45,0.028571,0.0,0.0,0.007143,0.0,0.021429,0.0,0.007143,0.0
13,17031001013,0.550505,0.035354,0.015152,0.034091,0.0,0.364899,0.070796,0.017699,0.026549,...,0.646018,0.00885,0.0,0.070796,0.0,0.0,0.0,0.00885,0.0,0.0
14,17031001014,0.814134,0.050339,0.005808,0.092933,0.0,0.036786,0.088235,0.009804,0.058824,...,0.607843,0.029412,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0
15,17031001015,0.693989,0.102914,0.002732,0.044627,0.0,0.155738,0.137681,0.014493,0.036232,...,0.536232,0.0,0.0,0.101449,0.0,0.0,0.0,0.0,0.0,0.0
16,17031001016,0.663158,0.023308,0.020301,0.076692,0.003759,0.212782,0.046358,0.019868,0.059603,...,0.655629,0.013245,0.0,0.033113,0.0,0.0,0.039735,0.0,0.0,0.0


In [75]:
housing_df = clean_census_data(housing_raw_df)
housing_df = assign_labels(housing_df, housing_labels_df)
housing_df = geoid_reduced(housing_df)
housing_df = only_precincts(housing_df, precinct_list)
housing_df = convert_to_num(housing_df)
print("Housing Data Shape: ", housing_df.shape)
print(housing_df.dtypes)
housing_df.head()

Housing Data Shape:  (392, 6)
GEO_ID            object
NAME              object
Total              int32
Occupied           int32
Vacant             int32
GEO_ID_REDUCED     int32
dtype: object


Unnamed: 0,GEO_ID,NAME,Total,Occupied,Vacant,GEO_ID_REDUCED
12,17031001012,Wd 01 Pct 12,946,896,50,1012
13,17031001013,Wd 01 Pct 13,404,378,26,1013
14,17031001014,Wd 01 Pct 14,624,573,51,1014
15,17031001015,Wd 01 Pct 15,622,578,44,1015
16,17031001016,Wd 01 Pct 16,785,736,49,1016


In [76]:
housing_percent_df = pd.DataFrame()
housing_percent_df['GEO_ID'] = housing_df['GEO_ID']
housing_percent_df['Occupied_percent'] = housing_df['Occupied'] / housing_df['Total']
housing_percent_df.head()

Unnamed: 0,GEO_ID,Occupied_percent
12,17031001012,0.947146
13,17031001013,0.935644
14,17031001014,0.918269
15,17031001015,0.92926
16,17031001016,0.93758


### Save clean IL7 GEOID

In [77]:
IL7_GEOID = merged_df["GEO_ID"].tolist()
print(IL7_GEOID[:10])

['17031001012', '17031001013', '17031001014', '17031001015', '17031001016', '17031001021', '17031001022', '17031001023', '17031001024', '17031001025']


### Merge Census Datasets

In [78]:
def merge_on_geo_id(df1, df2):
    return df1.merge(df2, on='GEO_ID')

In [79]:
people_housing_percent_df = merge_on_geo_id(people_percent_df, housing_percent_df)
people_housing_percent_df.head()

Unnamed: 0,GEO_ID,White alone,Black or African American alone,American Indian and Alaska Native alone,Asian alone,Native Hawaiian and Other Pacific Islander alone,Some Other Race alone,White; Black or African American,Black or African American; American Indian and Alaska Native,White; American Indian and Alaska Native,...,Black or African American; Asian,Black or African American; Native Hawaiian and Other Pacific Islander,Black or African American; Some Other Race,American Indian and Alaska Native; Asian,American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander,American Indian and Alaska Native; Some Other Race,Asian; Native Hawaiian and Other Pacific Islander,Asian; Some Other Race,Native Hawaiian and Other Pacific Islander; Some Other Race,Occupied_percent
0,17031001012,0.880625,0.026875,0.000625,0.04,0.0,0.051875,0.121429,0.007143,0.121429,...,0.028571,0.0,0.0,0.007143,0.0,0.021429,0.0,0.007143,0.0,0.947146
1,17031001013,0.550505,0.035354,0.015152,0.034091,0.0,0.364899,0.070796,0.017699,0.026549,...,0.00885,0.0,0.070796,0.0,0.0,0.0,0.00885,0.0,0.0,0.935644
2,17031001014,0.814134,0.050339,0.005808,0.092933,0.0,0.036786,0.088235,0.009804,0.058824,...,0.029412,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.918269
3,17031001015,0.693989,0.102914,0.002732,0.044627,0.0,0.155738,0.137681,0.014493,0.036232,...,0.0,0.0,0.101449,0.0,0.0,0.0,0.0,0.0,0.0,0.92926
4,17031001016,0.663158,0.023308,0.020301,0.076692,0.003759,0.212782,0.046358,0.019868,0.059603,...,0.013245,0.0,0.033113,0.0,0.0,0.039735,0.0,0.0,0.0,0.93758


### Clean Election Datasets

In [80]:
def clean_vtd(df):
    df = df.drop(columns=['STATEFP', 'DISTRICT'])
    df.rename(columns={'GEOID20': 'GEO_ID'}, inplace=True)
    df = df[df['COUNTYFP'] == 31] # Only keep Cook County
    df = df.drop(columns=['COUNTYFP'])
    return df

In [81]:
votes_df = clean_vtd(votes_raw_df)
votes_df = votes_df[:-1]

print('Votes data shape:', votes_df.shape)
votes_df.head()

Votes data shape: (3668, 12)


Unnamed: 0,GEO_ID,G20PREDBID,G20PRERTRU,G20PRELJOR,G20PREGHAW,G20PREACAR,G20PRESLAR,G20USSDDUR,G20USSRCUR,G20USSIWIL,G20USSLMAL,G20USSGBLA
534,17031001001,692.1,81.0,6.2,8.3,1.0,6.1,641.7,79.4,37.5,10.2,18.6
535,17031001002,540.0,53.0,5.0,7.0,0.0,6.0,514.0,53.0,25.0,6.0,12.0
536,17031001003,766.0,69.0,9.0,3.0,1.0,0.0,704.0,96.0,22.0,12.0,9.0
537,17031001004,733.0,95.0,8.0,3.0,1.0,2.0,673.0,107.0,21.0,13.0,14.0
538,17031001005,696.0,72.0,9.0,3.0,0.0,2.0,628.0,110.0,13.0,10.0,11.0


In [82]:
votes_percent_df = pd.DataFrame()
votes_percent_df['GEO_ID'] = votes_df['GEO_ID']
votes_percent_df['democrat_percent'] = votes_df['G20PREDBID'] / (votes_df['G20PREDBID'] + votes_df['G20PRERTRU'])
print('Votes percent data shape:', votes_percent_df.shape)
votes_percent_df.head()

Votes percent data shape: (3668, 2)


Unnamed: 0,GEO_ID,democrat_percent
534,17031001001,0.895227
535,17031001002,0.910624
536,17031001003,0.917365
537,17031001004,0.885266
538,17031001005,0.90625


### Merge Election Datasets with Census Datasets

In [83]:
merged_df = merge_on_geo_id(people_housing_percent_df, votes_percent_df)
merged_df = merged_df.dropna()
print('Merged data shape:', merged_df.shape)
merged_df.head()

Merged data shape: (392, 24)


Unnamed: 0,GEO_ID,White alone,Black or African American alone,American Indian and Alaska Native alone,Asian alone,Native Hawaiian and Other Pacific Islander alone,Some Other Race alone,White; Black or African American,Black or African American; American Indian and Alaska Native,White; American Indian and Alaska Native,...,Black or African American; Native Hawaiian and Other Pacific Islander,Black or African American; Some Other Race,American Indian and Alaska Native; Asian,American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander,American Indian and Alaska Native; Some Other Race,Asian; Native Hawaiian and Other Pacific Islander,Asian; Some Other Race,Native Hawaiian and Other Pacific Islander; Some Other Race,Occupied_percent,democrat_percent
0,17031001012,0.880625,0.026875,0.000625,0.04,0.0,0.051875,0.121429,0.007143,0.121429,...,0.0,0.0,0.007143,0.0,0.021429,0.0,0.007143,0.0,0.947146,0.842166
1,17031001013,0.550505,0.035354,0.015152,0.034091,0.0,0.364899,0.070796,0.017699,0.026549,...,0.0,0.070796,0.0,0.0,0.0,0.00885,0.0,0.0,0.935644,0.913545
2,17031001014,0.814134,0.050339,0.005808,0.092933,0.0,0.036786,0.088235,0.009804,0.058824,...,0.0,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.918269,0.902913
3,17031001015,0.693989,0.102914,0.002732,0.044627,0.0,0.155738,0.137681,0.014493,0.036232,...,0.0,0.101449,0.0,0.0,0.0,0.0,0.0,0.0,0.92926,0.855556
4,17031001016,0.663158,0.023308,0.020301,0.076692,0.003759,0.212782,0.046358,0.019868,0.059603,...,0.0,0.033113,0.0,0.0,0.039735,0.0,0.0,0.0,0.93758,0.89384


### Turnout

In [84]:
print('Turnout raw data shape:', turnout_raw_df.shape)
turnout = pd.DataFrame()
turnout = turnout_raw_df[turnout_raw_df['vtd_geoid20'].isin(IL7_GEOID)]
print('Turnout in IL7 data shape:', turnout.shape)
print("Number of registered voters: ", turnout['total_reg'].sum())
turnout = turnout.rename(columns={'vtd_geoid20': 'GEO_ID'})
turnout["Turnout"] = turnout["g20201103_voted_all"] / turnout["total_reg"]
print('Turnout mean:', turnout['Turnout'].mean())
turnout = turnout[['GEO_ID', 'Turnout']]
print('Turnout data shape:', turnout.shape)
turnout.head()

Turnout raw data shape: (10081, 65)
Turnout in IL7 data shape: (392, 65)
Number of registered voters:  352908
Turnout mean: 0.623442484513053
Turnout data shape: (392, 2)


Unnamed: 0,GEO_ID,Turnout
40,17031001024,0.583015
52,17031011001,0.630346
53,17031011013,0.66879
80,17031001023,0.748949
83,17031003021,0.809683


In [85]:
merged_df = merge_on_geo_id(merged_df, turnout)
merged_df = merged_df.dropna()
print('Merged turnout data shape:', merged_df.shape)
merged_df.to_csv('./Training Datasets/training_pres.csv', index=False)
print('Data saved to training_pres.csv')
merged_df.head()

Merged turnout data shape: (392, 25)
Data saved to training_pres.csv


Unnamed: 0,GEO_ID,White alone,Black or African American alone,American Indian and Alaska Native alone,Asian alone,Native Hawaiian and Other Pacific Islander alone,Some Other Race alone,White; Black or African American,Black or African American; American Indian and Alaska Native,White; American Indian and Alaska Native,...,Black or African American; Some Other Race,American Indian and Alaska Native; Asian,American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander,American Indian and Alaska Native; Some Other Race,Asian; Native Hawaiian and Other Pacific Islander,Asian; Some Other Race,Native Hawaiian and Other Pacific Islander; Some Other Race,Occupied_percent,democrat_percent,Turnout
0,17031001012,0.880625,0.026875,0.000625,0.04,0.0,0.051875,0.121429,0.007143,0.121429,...,0.0,0.007143,0.0,0.021429,0.0,0.007143,0.0,0.947146,0.842166,0.717904
1,17031001013,0.550505,0.035354,0.015152,0.034091,0.0,0.364899,0.070796,0.017699,0.026549,...,0.070796,0.0,0.0,0.0,0.00885,0.0,0.0,0.935644,0.913545,0.719925
2,17031001014,0.814134,0.050339,0.005808,0.092933,0.0,0.036786,0.088235,0.009804,0.058824,...,0.029412,0.0,0.0,0.0,0.0,0.0,0.0,0.918269,0.902913,0.762898
3,17031001015,0.693989,0.102914,0.002732,0.044627,0.0,0.155738,0.137681,0.014493,0.036232,...,0.101449,0.0,0.0,0.0,0.0,0.0,0.0,0.92926,0.855556,0.71079
4,17031001016,0.663158,0.023308,0.020301,0.076692,0.003759,0.212782,0.046358,0.019868,0.059603,...,0.033113,0.0,0.0,0.039735,0.0,0.0,0.0,0.93758,0.89384,0.782276


## Analysis using different Machine Learning Models


#### Analysis of the PCA components

In [86]:
import numpy as np
import pandas as pd

components = pca.components_

explained_variance = pca.explained_variance_ratio_

pca_df = pd.DataFrame(components, columns=merged_df.drop(columns=['GEO_ID', 'democrat_percent']).columns)
pca_df['explained_variance'] = explained_variance

pca_df = pca_df.sort_values(by='explained_variance', ascending=False)

print(pca_df.head())

important_vars = {}
for i, component in enumerate(components):
    important_vars[f'PC{i+1}'] = merged_df.drop(columns=['GEO_ID', 'democrat_percent']).columns[np.argmax(np.abs(component))]

print("Important variables for each principal component:")
for pc, var in important_vars.items():
    print(f"{pc}: {var}")

ValueError: Shape of passed values is (5, 22), indices imply (5, 23)

### Linear Regression

In [None]:
# White alone                                                                  
# Black or African American alone                                              
# American Indian and Alaska Native alone                                      
# Asian alone                                                                  
# Native Hawaiian and Other Pacific Islander alone                             
# Some Other Race alone                                                        
# White; Black or African American                                             
# Black or African American; American Indian and Alaska Native                 
# White; American Indian and Alaska Native                                     
# White; Asian                                                                 
# White; Native Hawaiian and Other Pacific Islander                            
# White; Some Other Race                                                       
# Black or African American; Asian                                             
# Black or African American; Native Hawaiian and Other Pacific Islander        
# Black or African American; Some Other Race                                   
# American Indian and Alaska Native; Asian                                     
# American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander
# American Indian and Alaska Native; Some Other Race                           
# Asian; Native Hawaiian and Other Pacific Islander                            
# Asian; Some Other Race                                                       
# Native Hawaiian and Other Pacific Islander; Some Other Race                  

new_data_point = [[0.03, 0.74, 0.01, 0.0, 0.0, 0.21, 0.22, 0.0, 0.0, 0.07, 0.0, 0.63, 0.03, 0.0, 0.03, 0, 0, 0, 0, 0, 0, 0.9]]

feature_names = ['White alone', 'Black or African American alone', 'American Indian and Alaska Native alone', 
                 'Asian alone', 'Native Hawaiian and Other Pacific Islander alone', 'Some Other Race alone', 
                 'White; Black or African American', 'Black or African American; American Indian and Alaska Native', 
                 'White; American Indian and Alaska Native', 'White; Asian', 
                 'White; Native Hawaiian and Other Pacific Islander', 'White; Some Other Race', 
                 'Black or African American; Asian', 'Black or African American; Native Hawaiian and Other Pacific Islander', 
                 'Black or African American; Some Other Race', 'American Indian and Alaska Native; Asian', 
                 'American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander', 
                 'American Indian and Alaska Native; Some Other Race', 'Asian; Native Hawaiian and Other Pacific Islander', 
                 'Asian; Some Other Race', 'Native Hawaiian and Other Pacific Islander; Some Other Race', 
                 'Occupied_percent']

new_data_point_df = pd.DataFrame(new_data_point, columns=feature_names)

prediction = model.predict(new_data_point)

print(f'Predicted democrat_percent: {prediction[0]}')

Predicted democrat_percent: 0.961344218160662


