In [1]:
import pandas as pd
import datetime as dt
df = "Resources/us-counties.csv"
df1 = pd.read_csv(df)
file = "Resources/county_mask_mandate_data.xlsx"
df2 = pd.read_excel(file)

## Read in Both Datasets

In [2]:
df1.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0.0
1,2020-01-22,Snohomish,Washington,53061.0,1,0.0
2,2020-01-23,Snohomish,Washington,53061.0,1,0.0
3,2020-01-24,Cook,Illinois,17031.0,1,0.0
4,2020-01-24,Snohomish,Washington,53061.0,1,0.0


In [3]:
#Shows the data types for the df1
df1.dtypes

date       object
county     object
state      object
fips      float64
cases       int64
deaths    float64
dtype: object

In [4]:
# count the number of rows with empty columns
count = df1['fips'].isna().sum()
print(count)

19744


In [5]:
#count the number of rows with data in each column
df1.count()

date      2135147
county    2135147
state     2135147
fips      2115403
cases     2135147
deaths    2086356
dtype: int64

In [6]:
# Drop the rows with blank data creating a new dataframe
df1_clean = df1.dropna()

In [7]:
# count the number of rows with data in each column from the clean dataframe
df1_clean.count()

date      2066612
county    2066612
state     2066612
fips      2066612
cases     2066612
deaths    2066612
dtype: int64

In [8]:
#change fips from float64 to int64 to match datatype of df1_clean "county_fips"
df1_clean['fips'].astype('int64')

0          53061
1          53061
2          53061
3          17031
4          53061
           ...  
2135142    56037
2135143    56039
2135144    56041
2135145    56043
2135146    56045
Name: fips, Length: 2066612, dtype: int64

In [9]:
# change deaths from float64 to int64 to match datatype of df1_clean "deaths"
df1_clean['deaths'].astype('int64')

0            0
1            0
2            0
3            0
4            0
          ... 
2135142    114
2135143     14
2135144     34
2135145     41
2135146     16
Name: deaths, Length: 2066612, dtype: int64

In [10]:
df1_clean['cases'].astype('int64')

0             1
1             1
2             1
3             1
4             1
           ... 
2135142    9241
2135143    8741
2135144    4827
2135145    2022
2135146    1341
Name: cases, Length: 2066612, dtype: int64

In [11]:
# Group by county to get the total_cases for each county, read the last day of 2020 as the total number of cases and deaths
df1_total = df1_clean.loc[df1_clean['date'] == "2020-12-31"]
df1_total.head()

Unnamed: 0,date,county,state,fips,cases,deaths
881492,2020-12-31,Autauga,Alabama,1001.0,4190,48.0
881493,2020-12-31,Baldwin,Alabama,1003.0,13601,161.0
881494,2020-12-31,Barbour,Alabama,1005.0,1514,32.0
881495,2020-12-31,Bibb,Alabama,1007.0,1834,46.0
881496,2020-12-31,Blount,Alabama,1009.0,4641,63.0


In [12]:
# Drop duplicates so only county shows once
df1_total = df1_total.drop_duplicates(subset=["fips"])
df1_total.head()

Unnamed: 0,date,county,state,fips,cases,deaths
881492,2020-12-31,Autauga,Alabama,1001.0,4190,48.0
881493,2020-12-31,Baldwin,Alabama,1003.0,13601,161.0
881494,2020-12-31,Barbour,Alabama,1005.0,1514,32.0
881495,2020-12-31,Bibb,Alabama,1007.0,1834,46.0
881496,2020-12-31,Blount,Alabama,1009.0,4641,63.0


In [13]:
df1_total.describe

<bound method NDFrame.describe of               date      county    state     fips  cases  deaths
881492  2020-12-31     Autauga  Alabama   1001.0   4190    48.0
881493  2020-12-31     Baldwin  Alabama   1003.0  13601   161.0
881494  2020-12-31     Barbour  Alabama   1005.0   1514    32.0
881495  2020-12-31        Bibb  Alabama   1007.0   1834    46.0
881496  2020-12-31      Blount  Alabama   1009.0   4641    63.0
...            ...         ...      ...      ...    ...     ...
884732  2020-12-31  Sweetwater  Wyoming  56037.0   2966    16.0
884733  2020-12-31       Teton  Wyoming  56039.0   2138     4.0
884734  2020-12-31       Uinta  Wyoming  56041.0   1558     7.0
884735  2020-12-31    Washakie  Wyoming  56043.0    780    19.0
884736  2020-12-31      Weston  Wyoming  56045.0    476     2.0

[3140 rows x 6 columns]>

## DataSet 2 Mask Mandate

In [14]:
df2.head()

Unnamed: 0,state_fips,state_name,county_fips,county_name,county_start_date,county_end_date,county_conditions,county_source,escalation,defiance,county_start_edate,state_start_date,state_end_date,state_conditions,state_source,state_start_edate,earliest_start_edate,county_fips_string,earliest_start_date
0,1,ALABAMA,1067,Henry County,NaT,,,,,,,07/16/2020,07/31/2020,all public places,https://www.alabamapublichealth.gov/legal/asse...,22112.0,22112.0,1067,7-16-2020
1,1,ALABAMA,1071,Jackson County,2020-07-01,,all public places,https://www.examiner.net/news/20200629/jackson...,,,22097.0,07/16/2020,07/31/2020,all public places,https://www.alabamapublichealth.gov/legal/asse...,22112.0,22097.0,1071,7-1-2020
2,1,ALABAMA,1093,Marion County,2020-07-09,,retail only,https://fox59.com/news/coronavirus/starting-to...,,,22105.0,07/16/2020,07/31/2020,all public places,https://www.alabamapublichealth.gov/legal/asse...,22112.0,22105.0,1093,7-9-2020
3,1,ALABAMA,1073,Jefferson County,2020-06-29,,all public places,https://www.wbrc.com/2020/06/29/jefferson-coun...,,,22095.0,07/16/2020,07/31/2020,all public places,https://www.alabamapublichealth.gov/legal/asse...,22112.0,22095.0,1073,6-29-2020
4,1,ALABAMA,1099,Monroe County,NaT,,,,,,,07/16/2020,07/31/2020,all public places,https://www.alabamapublichealth.gov/legal/asse...,22112.0,22112.0,1099,7-16-2020


In [15]:
df2.dtypes

state_fips                       int64
state_name                      object
county_fips                      int64
county_name                     object
county_start_date       datetime64[ns]
county_end_date                 object
county_conditions               object
county_source                   object
escalation                      object
defiance                        object
county_start_edate             float64
state_start_date                object
state_end_date                  object
state_conditions                object
state_source                    object
state_start_edate              float64
earliest_start_edate           float64
county_fips_string               int64
earliest_start_date             object
dtype: object

In [16]:
df2.count()

state_fips              3150
state_name              3150
county_fips             3150
county_name             3149
county_start_date       1500
county_end_date          137
county_conditions       1377
county_source           1778
escalation                34
defiance                 203
county_start_edate      1500
state_start_date        1652
state_end_date           504
state_conditions        2045
state_source            3150
state_start_edate       1652
earliest_start_edate    2103
county_fips_string      3150
earliest_start_date     2103
dtype: int64

In [17]:
# Drop the unneeded columns 
df2_clean1 = df2.drop(['escalation', 'defiance', 'county_conditions', 'county_source', 'county_start_edate', 'state_start_edate', 'earliest_start_edate', 'county_fips_string', 'state_conditions', 'state_source'], axis=1)

df2_clean1.head()

Unnamed: 0,state_fips,state_name,county_fips,county_name,county_start_date,county_end_date,state_start_date,state_end_date,earliest_start_date
0,1,ALABAMA,1067,Henry County,NaT,,07/16/2020,07/31/2020,7-16-2020
1,1,ALABAMA,1071,Jackson County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020
2,1,ALABAMA,1093,Marion County,2020-07-09,,07/16/2020,07/31/2020,7-9-2020
3,1,ALABAMA,1073,Jefferson County,2020-06-29,,07/16/2020,07/31/2020,6-29-2020
4,1,ALABAMA,1099,Monroe County,NaT,,07/16/2020,07/31/2020,7-16-2020


In [18]:
# Create our mask_mandate column with binary values Yes or No
no_mandate_df = df2_clean1[df2_clean1.county_start_date.isnull()]
mandate_df = df2_clean1[df2_clean1.county_start_date.notnull()]

In [19]:
# Create mask_mandate column
no_mandate_df["mask_mandate"] = "0"
mandate_df["mask_mandate"] = "1"
merged_df = no_mandate_df.append(mandate_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  no_mandate_df["mask_mandate"] = "0"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mandate_df["mask_mandate"] = "1"


Show duration of mask mandate in 2020 only

In [20]:
# Create a null and not null dataset based on county start date
duration_df = merged_df[merged_df.county_start_date.notnull()]
duration_df1 = merged_df[merged_df.county_start_date.isnull()]
# Create a column that keeps up with month started 
duration_df['month_started'] = pd.DatetimeIndex(duration_df['county_start_date']).month
duration_df1['month_started'] = "0"


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  duration_df['month_started'] = pd.DatetimeIndex(duration_df['county_start_date']).month
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  duration_df1['month_started'] = "0"


In [21]:
# Create a new column that is the difference between month started and total months in a year
duration_df["mandate_duration_months"] = (12 - duration_df.month_started)
duration_df1["mandate_duration_months"] = 0
final_df = duration_df.append(duration_df1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  duration_df["mandate_duration_months"] = (12 - duration_df.month_started)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  duration_df1["mandate_duration_months"] = 0


In [22]:
final_df

Unnamed: 0,state_fips,state_name,county_fips,county_name,county_start_date,county_end_date,state_start_date,state_end_date,earliest_start_date,mask_mandate,month_started,mandate_duration_months
1,1,ALABAMA,1071,Jackson County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020,1,7,5
2,1,ALABAMA,1093,Marion County,2020-07-09,,07/16/2020,07/31/2020,7-9-2020,1,7,5
3,1,ALABAMA,1073,Jefferson County,2020-06-29,,07/16/2020,07/31/2020,6-29-2020,1,6,6
10,1,ALABAMA,1063,Greene County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020,1,7,5
23,1,ALABAMA,1089,Madison County,2020-07-07,,07/16/2020,07/31/2020,7-7-2020,1,7,5
...,...,...,...,...,...,...,...,...,...,...,...,...
3144,56,WYOMING,56011,Crook County,NaT,,,,,0,0,0
3145,56,WYOMING,56027,Niobrara County,NaT,,,,,0,0,0
3146,56,WYOMING,56015,Goshen County,NaT,,,,,0,0,0
3147,56,WYOMING,56029,Park County,NaT,,,,,0,0,0


Attempting Outer Merge on FIPS number

In [23]:
final_df_fips = final_df.rename(columns={'county_fips': 'fips'})

final_df_fips.dtypes

state_fips                          int64
state_name                         object
fips                                int64
county_name                        object
county_start_date          datetime64[ns]
county_end_date                    object
state_start_date                   object
state_end_date                     object
earliest_start_date                object
mask_mandate                       object
month_started                      object
mandate_duration_months             int64
dtype: object

In [24]:
final_df_fips.duplicated(subset='fips')

1       False
2       False
3       False
10      False
23      False
        ...  
3144    False
3145    False
3146    False
3147    False
3149    False
Length: 3150, dtype: bool

In [25]:
final_df_fips.dtypes

state_fips                          int64
state_name                         object
fips                                int64
county_name                        object
county_start_date          datetime64[ns]
county_end_date                    object
state_start_date                   object
state_end_date                     object
earliest_start_date                object
mask_mandate                       object
month_started                      object
mandate_duration_months             int64
dtype: object

In [26]:
df1_total.dtypes

date       object
county     object
state      object
fips      float64
cases       int64
deaths    float64
dtype: object

In [27]:
df1_total.astype({'fips': 'int64'}).dtypes

date       object
county     object
state      object
fips        int64
cases       int64
deaths    float64
dtype: object

In [28]:
final_df.head

<bound method NDFrame.head of       state_fips state_name  county_fips            county_name  \
1              1  ALABAMA           1071     Jackson County       
2              1  ALABAMA           1093      Marion County       
3              1  ALABAMA           1073   Jefferson County       
10             1  ALABAMA           1063      Greene County       
23             1  ALABAMA           1089     Madison County       
...          ...        ...          ...                    ...   
3144          56  WYOMING          56011       Crook County       
3145          56  WYOMING          56027    Niobrara County       
3146          56  WYOMING          56015      Goshen County       
3147          56  WYOMING          56029        Park County       
3149          56  WYOMING          56017  Hot Springs County      

     county_start_date county_end_date state_start_date state_end_date  \
1           2020-07-01             NaN       07/16/2020     07/31/2020   
2           2020-

In [29]:
## full_merged_df = final_df_fips.join(df1_total, on='fips')

full_merged_df = pd.merge(final_df_fips, df1_total, on='fips', how='outer')
full_merged_df


Unnamed: 0,state_fips,state_name,fips,county_name,county_start_date,county_end_date,state_start_date,state_end_date,earliest_start_date,mask_mandate,month_started,mandate_duration_months,date,county,state,cases,deaths
0,1.0,ALABAMA,1071.0,Jackson County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020,1,7,5.0,2020-12-31,Jackson,Alabama,5097.0,34.0
1,1.0,ALABAMA,1093.0,Marion County,2020-07-09,,07/16/2020,07/31/2020,7-9-2020,1,7,5.0,2020-12-31,Marion,Alabama,2087.0,43.0
2,1.0,ALABAMA,1073.0,Jefferson County,2020-06-29,,07/16/2020,07/31/2020,6-29-2020,1,6,6.0,2020-12-31,Jefferson,Alabama,52339.0,697.0
3,1.0,ALABAMA,1063.0,Greene County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020,1,7,5.0,2020-12-31,Greene,Alabama,672.0,20.0
4,1.0,ALABAMA,1089.0,Madison County,2020-07-07,,07/16/2020,07/31/2020,7-7-2020,1,7,5.0,2020-12-31,Madison,Alabama,22197.0,178.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3160,,,69120.0,,NaT,,,,,,,,2020-12-31,Tinian,Northern Mariana Islands,2.0,0.0
3161,,,46102.0,,NaT,,,,,,,,2020-12-31,Oglala Lakota,South Dakota,1927.0,35.0
3162,,,78010.0,,NaT,,,,,,,,2020-12-31,St. Croix,Virgin Islands,829.0,7.0
3163,,,78020.0,,NaT,,,,,,,,2020-12-31,St. John,Virgin Islands,174.0,1.0


In [30]:
##full_merged_df.to_csv('full_merged_df.csv', index=False)

Joining Full Df with Population csv

In [31]:
population_df = pd.read_csv('POPULATION_TEST.csv')
population_df.head()


Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,FIPS,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,...,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE042020,POPESTIMATE2020
0,40,3,6,1,0,1000,Alabama,Alabama,4779736,4780118,...,4816632,4831586,4843737,4854803,4866824,4877989,4891628,4907965,4920706,4921532
1,50,3,6,1,1,1001,Alabama,Autauga County,54571,54582,...,54970,54747,54922,54903,55302,55448,55533,55769,56130,56145
2,50,3,6,1,3,1003,Alabama,Baldwin County,182265,182263,...,190203,194978,199306,203101,207787,212737,218071,223565,227989,229287
3,50,3,6,1,5,1005,Alabama,Barbour County,27457,27454,...,27172,26946,26768,26300,25828,25169,24887,24657,24652,24589
4,50,3,6,1,7,1007,Alabama,Bibb County,22915,22904,...,22657,22510,22541,22553,22590,22532,22300,22313,22199,22136


In [32]:
population_df = population_df.rename(columns={'FIPS': 'fips'})

In [33]:
##NEED TO ENABLE THIS MERGE
##full_merged_df['fips'].isin(population_df['fips']).value_counts()

In [34]:
## Join the population df with the merged_final for a mega_dataframe

mega_merged_df = pd.merge(full_merged_df, population_df, on='fips', how='outer')



In [35]:
mega_merged_df.head()

Unnamed: 0,state_fips,state_name,fips,county_name,county_start_date,county_end_date,state_start_date,state_end_date,earliest_start_date,mask_mandate,...,POPESTIMATE2012,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE042020,POPESTIMATE2020
0,1.0,ALABAMA,1071.0,Jackson County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020,1,...,53107.0,52986.0,52592.0,52229.0,52058.0,51865.0,51649.0,51672.0,51642.0,51582.0
1,1.0,ALABAMA,1093.0,Marion County,2020-07-09,,07/16/2020,07/31/2020,7-9-2020,1,...,30495.0,30236.0,30215.0,30137.0,29958.0,29825.0,29776.0,29826.0,29749.0,29703.0
2,1.0,ALABAMA,1073.0,Jefferson County,2020-06-29,,07/16/2020,07/31/2020,6-29-2020,1,...,658183.0,659434.0,660185.0,660717.0,660507.0,659892.0,659672.0,658539.0,656529.0,655342.0
3,1.0,ALABAMA,1063.0,Greene County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020,1,...,8849.0,8749.0,8585.0,8509.0,8485.0,8312.0,8212.0,8104.0,8034.0,7990.0
4,1.0,ALABAMA,1089.0,Madison County,2020-07-07,,07/16/2020,07/31/2020,7-7-2020,1,...,342823.0,346764.0,349973.0,353194.0,356985.0,362061.0,367386.0,372547.0,377894.0,379453.0


In [36]:
mega_merged_df.to_csv('mega_merged_df.csv')

## Join ELECTION CSV

In [37]:
# Read in election csv
csv =  "Resources/elec_results_2020.csv"
elec_df = pd.read_csv(csv)
elec_df.head()

Unnamed: 0,state_fips,state_name,red_blue_2020
0,1,ALABAMA,1
1,2,ALASKA,1
2,4,ARIZONA,0
3,5,ARKANSAS,1
4,6,CALIFORNIA,0


In [38]:
elec_df.state_name.dtype

dtype('O')

In [39]:
# Turn dataframe into a dictionary
election_series = elec_df[["state_name", "red_blue_2020"]]
key_value = election_series.set_index('state_name')['red_blue_2020'].to_dict()
print(key_value)

{'ALABAMA': 1, 'ALASKA': 1, 'ARIZONA': 0, 'ARKANSAS': 1, 'CALIFORNIA': 0, 'COLORADO': 0, 'CONNECTICUT': 0, 'DELAWARE': 0, 'DISTRICT OF COLUMBIA': 0, 'FLORIDA': 1, 'GEORGIA': 0, 'HAWAII': 0, 'IDAHO': 1, 'ILLINOIS': 0, 'INDIANA': 1, 'IOWA': 1, 'KANSAS': 1, 'KENTUCKY': 1, 'LOUISIANA': 1, 'MAINE': 0, 'MARYLAND': 0, 'MASSACHUSETTS': 0, 'MICHIGAN': 0, 'MINNESOTA': 0, 'MISSISSIPPI': 1, 'MISSOURI': 1, 'MONTANA': 1, 'NEBRASKA': 1, 'NEVADA': 0, 'NEW HAMPSHIRE': 0, 'NEW JERSEY': 0, 'NEW MEXICO': 0, 'NEW YORK': 0, 'NORTH CAROLINA': 1, 'NORTH DAKOTA': 1, 'OHIO': 1, 'OKLAHOMA': 1, 'OREGON': 0, 'PENNSYLVANIA': 0, 'RHODE ISLAND': 0, 'SOUTH CAROLINA': 1, 'SOUTH DAKOTA': 1, 'TENNESSEE': 1, 'TEXAS': 1, 'UTAH': 1, 'VERMONT': 0, 'VIRGINIA': 0, 'WASHINGTON': 0, 'WEST VIRGINIA': 1, 'WISCONSIN': 0, 'WYOMING': 1}


In [40]:
# Map the value of dictionary to mega_merged_df 1 for Rep 0 for Dem
mega_merged_df["elec_result"] = mega_merged_df["state_name"].map(key_value)

In [41]:
# Show merged_df with elec results
mega_merged_df.head()

Unnamed: 0,state_fips,state_name,fips,county_name,county_start_date,county_end_date,state_start_date,state_end_date,earliest_start_date,mask_mandate,...,POPESTIMATE2013,POPESTIMATE2014,POPESTIMATE2015,POPESTIMATE2016,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE042020,POPESTIMATE2020,elec_result
0,1.0,ALABAMA,1071.0,Jackson County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020,1,...,52986.0,52592.0,52229.0,52058.0,51865.0,51649.0,51672.0,51642.0,51582.0,
1,1.0,ALABAMA,1093.0,Marion County,2020-07-09,,07/16/2020,07/31/2020,7-9-2020,1,...,30236.0,30215.0,30137.0,29958.0,29825.0,29776.0,29826.0,29749.0,29703.0,
2,1.0,ALABAMA,1073.0,Jefferson County,2020-06-29,,07/16/2020,07/31/2020,6-29-2020,1,...,659434.0,660185.0,660717.0,660507.0,659892.0,659672.0,658539.0,656529.0,655342.0,
3,1.0,ALABAMA,1063.0,Greene County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020,1,...,8749.0,8585.0,8509.0,8485.0,8312.0,8212.0,8104.0,8034.0,7990.0,
4,1.0,ALABAMA,1089.0,Madison County,2020-07-07,,07/16/2020,07/31/2020,7-7-2020,1,...,346764.0,349973.0,353194.0,356985.0,362061.0,367386.0,372547.0,377894.0,379453.0,


In [42]:
# Split the county name into two columns NEEDS SOME WORK BECAUSE THERE ARE ARE SOME WITH MORE THAN ONE WORD AND END IN OTHER THINGS THAN COUNTY (BOROUGH, PARISH, CITY, CENSUS AREA)
# may not be needed if using FIP as the identifier

#df2_clean1['county'] = df2_clean1['county_name'].str.split(' ', expand=True)[0]
#df2_clean1['name'] = df2_clean1['county_name'].str.split(' ', expand=True)[1]

#df2_clean1.head()

Merge Lat Long for Tableau

In [43]:
file = "Resources/uscounties_lat_long.csv"
dfe = pd.read_csv(file)

dfe.head()

Unnamed: 0,county_name,fips,state_name,lat,lng
0,Los Angeles,6037,California,34.3207,-118.2248
1,Cook,17031,Illinois,41.8401,-87.8168
2,Harris,48201,Texas,29.8577,-95.3936
3,Maricopa,4013,Arizona,33.349,-112.4915
4,San Diego,6073,California,33.0341,-116.7353


In [46]:
merged_latlong = pd.merge(mega_merged_df, dfe, how="left", on=["fips", "fips"])
merged_latlong.head()

Unnamed: 0,state_fips,state_name_x,fips,county_name_x,county_start_date,county_end_date,state_start_date,state_end_date,earliest_start_date,mask_mandate,...,POPESTIMATE2017,POPESTIMATE2018,POPESTIMATE2019,POPESTIMATE042020,POPESTIMATE2020,elec_result,county_name_y,state_name_y,lat,lng
0,1.0,ALABAMA,1071.0,Jackson County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020,1,...,51865.0,51649.0,51672.0,51642.0,51582.0,,Jackson,Alabama,34.7795,-85.9994
1,1.0,ALABAMA,1093.0,Marion County,2020-07-09,,07/16/2020,07/31/2020,7-9-2020,1,...,29825.0,29776.0,29826.0,29749.0,29703.0,,Marion,Alabama,34.1366,-87.8871
2,1.0,ALABAMA,1073.0,Jefferson County,2020-06-29,,07/16/2020,07/31/2020,6-29-2020,1,...,659892.0,659672.0,658539.0,656529.0,655342.0,,Jefferson,Alabama,33.5543,-86.8964
3,1.0,ALABAMA,1063.0,Greene County,2020-07-01,,07/16/2020,07/31/2020,7-1-2020,1,...,8312.0,8212.0,8104.0,8034.0,7990.0,,Greene,Alabama,32.8531,-87.9522
4,1.0,ALABAMA,1089.0,Madison County,2020-07-07,,07/16/2020,07/31/2020,7-7-2020,1,...,362061.0,367386.0,372547.0,377894.0,379453.0,,Madison,Alabama,34.7631,-86.5502


In [None]:
merged_latlong.to_csv('Resources/tableau_data.csv', index=False)

In [45]:
#df2_clean2.count()