In [1]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf

#  Import and read the final_database.csv.
import pandas as pd 
covid_df = pd.read_csv("final_database.csv")
covid_df.head()
covid_df.shape

(16649, 28)

In [2]:
# Drop the non-beneficial ID columns, 'EIN' and 'NAME'.
covid_df = covid_df.drop(columns=['id', 'state.1', 'state', 'county', 'date'])
covid_df.head()

Unnamed: 0,fips,cases,deaths,stay_at_home_announced,stay_at_home_effective,total_population,population_density_per_sqmi,percent_single_parent_households_CHR,percent_adults_with_diabetes,percent_food_insecure,...,percent_age_65_and_older,percent_age_17_and_younger,percent_disabled,percent_multi_unit_housing,percent_fair_or_poor_health,percent_not_proficient_in_english,percent_female,percent_rural,percent_minorities,Winning_party
0,53061,1,0,no,no,758649,363.586159,25.4,8.2,10.3,...,12.1,23.2,11.9,14.6,14.4,3.5,49.8,10.8,28.0,Democrat Win
1,53061,1,0,no,no,758649,363.586159,25.4,8.2,10.3,...,12.1,23.2,11.9,14.6,14.4,3.5,49.8,10.8,28.0,Democrat Win
2,53061,1,0,no,no,758649,363.586159,25.4,8.2,10.3,...,12.1,23.2,11.9,14.6,14.4,3.5,49.8,10.8,28.0,Democrat Win
3,17031,1,0,no,no,5227575,5531.878539,37.2,9.5,12.0,...,13.0,22.7,10.3,23.0,17.1,7.0,51.4,0.0,57.1,Democrat Win
4,53061,1,0,no,no,758649,363.586159,25.4,8.2,10.3,...,12.1,23.2,11.9,14.6,14.4,3.5,49.8,10.8,28.0,Democrat Win


In [3]:
# Determine the number of unique values in each column.
covid_df.nunique()

fips                                    1805
cases                                    440
deaths                                    48
stay_at_home_announced                     2
stay_at_home_effective                     2
total_population                        1795
population_density_per_sqmi             1805
percent_single_parent_households_CHR     426
percent_adults_with_diabetes             205
percent_food_insecure                    212
percent_insufficient_sleep               200
percent_unemployed_CDC                   162
percent_no_highschool_diploma            274
percent_age_65_and_older                 217
percent_age_17_and_younger               186
percent_disabled                         203
percent_multi_unit_housing               229
percent_fair_or_poor_health              212
percent_not_proficient_in_english        124
percent_female                           113
percent_rural                            747
percent_minorities                       595
Winning_pa

In [4]:
# Generate our categorical variable lists
covid_cat = covid_df.dtypes[covid_df.dtypes == "object"].index.tolist()
covid_cat

['stay_at_home_announced', 'stay_at_home_effective', 'Winning_party']

In [5]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(covid_df[covid_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(covid_cat)
encode_df.head()

Unnamed: 0,stay_at_home_announced_no,stay_at_home_announced_yes,stay_at_home_effective_no,stay_at_home_effective_yes,Winning_party_Democrat Win,Winning_party_Republican Win
0,1.0,0.0,1.0,0.0,1.0,0.0
1,1.0,0.0,1.0,0.0,1.0,0.0
2,1.0,0.0,1.0,0.0,1.0,0.0
3,1.0,0.0,1.0,0.0,1.0,0.0
4,1.0,0.0,1.0,0.0,1.0,0.0


In [6]:
# Merge one-hot encoded features and drop the originals
covid_df = covid_df.merge(encode_df,left_index=True, right_index=True)
covid_df = covid_df.drop(covid_cat,1)

covid_df.head()

Unnamed: 0,fips,cases,deaths,total_population,population_density_per_sqmi,percent_single_parent_households_CHR,percent_adults_with_diabetes,percent_food_insecure,percent_insufficient_sleep,percent_unemployed_CDC,...,percent_not_proficient_in_english,percent_female,percent_rural,percent_minorities,stay_at_home_announced_no,stay_at_home_announced_yes,stay_at_home_effective_no,stay_at_home_effective_yes,Winning_party_Democrat Win,Winning_party_Republican Win
0,53061,1,0,758649,363.586159,25.4,8.2,10.3,34.8,6.2,...,3.5,49.8,10.8,28.0,1.0,0.0,1.0,0.0,1.0,0.0
1,53061,1,0,758649,363.586159,25.4,8.2,10.3,34.8,6.2,...,3.5,49.8,10.8,28.0,1.0,0.0,1.0,0.0,1.0,0.0
2,53061,1,0,758649,363.586159,25.4,8.2,10.3,34.8,6.2,...,3.5,49.8,10.8,28.0,1.0,0.0,1.0,0.0,1.0,0.0
3,17031,1,0,5227575,5531.878539,37.2,9.5,12.0,33.4,9.7,...,7.0,51.4,0.0,57.1,1.0,0.0,1.0,0.0,1.0,0.0
4,53061,1,0,758649,363.586159,25.4,8.2,10.3,34.8,6.2,...,3.5,49.8,10.8,28.0,1.0,0.0,1.0,0.0,1.0,0.0
