# **Environment Setup**

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import datetime
import re
import numpy as np

# **Data Clean-up**

In [3]:
# read in the data
vacc = pd.read_csv('/content/drive/MyDrive/COVID-19_Vaccinations_in_the_United_States_County.csv')

In [4]:
# check the data
print(vacc.shape)
vacc.head(10)

(1621285, 66)


Unnamed: 0,Date,FIPS,MMWR_week,Recip_County,Recip_State,Completeness_pct,Administered_Dose1_Recip,Administered_Dose1_Pop_Pct,Administered_Dose1_Recip_5Plus,Administered_Dose1_Recip_5PlusPop_Pct,...,Booster_Doses_Vax_Pct_UR_Equity,Booster_Doses_12PlusVax_Pct_UR_Equity,Booster_Doses_18PlusVax_Pct_UR_Equity,Booster_Doses_65PlusVax_Pct_UR_Equity,Census2019,Census2019_5PlusPop,Census2019_5to17Pop,Census2019_12PlusPop,Census2019_18PlusPop,Census2019_65PlusPop
0,04/20/2022,53033,16,King County,WA,96.0,2000651.0,88.8,1999897.0,94.1,...,4.0,4.0,4.0,4.0,2252782.0,2125383.0,324217.0,1946808.0,1801166.0,304315.0
1,04/20/2022,55023,16,Crawford County,WI,96.8,10011.0,62.1,10011.0,65.2,...,8.0,8.0,8.0,8.0,16131.0,15362.0,2464.0,14166.0,12898.0,3867.0
2,04/20/2022,12131,16,Walton County,FL,98.7,43580.0,58.8,43570.0,62.3,...,2.0,2.0,3.0,2.0,74071.0,69930.0,10943.0,63887.0,58987.0,14977.0
3,04/20/2022,13001,16,Appling County,GA,89.5,7558.0,41.1,7558.0,43.9,...,6.0,6.0,6.0,6.0,18386.0,17209.0,3336.0,15376.0,13873.0,3257.0
4,04/20/2022,13035,16,Butts County,GA,89.5,9891.0,39.7,9891.0,42.0,...,2.0,2.0,2.0,2.0,24936.0,23565.0,3732.0,21552.0,19833.0,3942.0
5,04/20/2022,13105,16,Elbert County,GA,89.5,9105.0,47.4,9105.0,50.4,...,6.0,6.0,6.0,6.0,19194.0,18075.0,3041.0,16369.0,15034.0,4087.0
6,04/20/2022,13239,16,Quitman County,GA,89.5,915.0,39.8,915.0,42.3,...,6.0,6.0,6.0,5.0,2299.0,2165.0,296.0,2022.0,1869.0,705.0
7,04/20/2022,13275,16,Thomas County,GA,89.5,24803.0,55.8,24802.0,59.5,...,6.0,7.0,7.0,6.0,44451.0,41675.0,7743.0,37494.0,33932.0,8304.0
8,04/20/2022,13301,16,Warren County,GA,89.5,2861.0,54.5,2861.0,57.4,...,6.0,6.0,7.0,6.0,5254.0,4981.0,785.0,4590.0,4196.0,1216.0
9,04/20/2022,13303,16,Washington County,GA,89.5,10194.0,50.0,10193.0,53.0,...,6.0,6.0,7.0,6.0,20374.0,19231.0,3254.0,17480.0,15977.0,3627.0


In [5]:
# delete rows with county == unknown county
clean_vacc = vacc[vacc.Recip_County != 'Unknown County']
clean_vacc.shape

(1592656, 66)

In [6]:
# delete rows with Date == null
clean_vacc = clean_vacc[clean_vacc.Date.notnull()]
clean_vacc.shape

(1592656, 66)

In [7]:
# delete rows with Recip_County == null
clean_vacc = clean_vacc[clean_vacc.Recip_County.notnull()]
clean_vacc.shape

(1592656, 66)

In [8]:
# delete rows with Recip_State == null
clean_vacc = clean_vacc[clean_vacc.Recip_State.notnull()]
clean_vacc.shape

(1592656, 66)

In [9]:
# filter out Texas counties
clean_vacc = clean_vacc[clean_vacc.Recip_State == 'TX']
clean_vacc.shape

(125476, 66)

In [10]:
# separate into three dfs -- first does, second dose, and booster dose
first_dose = clean_vacc[['Date', 'Recip_County', 'Recip_State', 'Administered_Dose1_Recip']]
second_dose = clean_vacc[['Date', 'Recip_County', 'Recip_State', 'Series_Complete_Yes']]
third_dose = clean_vacc[['Date', 'Recip_County', 'Recip_State', 'Booster_Doses']]

In [11]:
print(first_dose.shape)
first_dose.head(3)

(125476, 4)


Unnamed: 0,Date,Recip_County,Recip_State,Administered_Dose1_Recip
47,04/20/2022,Eastland County,TX,7955.0
48,04/20/2022,Hays County,TX,168998.0
49,04/20/2022,Hunt County,TX,48645.0


In [12]:
print(second_dose.shape)
second_dose.head(3)

(125476, 4)


Unnamed: 0,Date,Recip_County,Recip_State,Series_Complete_Yes
47,04/20/2022,Eastland County,TX,6899.0
48,04/20/2022,Hays County,TX,144118.0
49,04/20/2022,Hunt County,TX,42276.0


In [13]:
print(third_dose.shape)
third_dose.head(3)

(125476, 4)


Unnamed: 0,Date,Recip_County,Recip_State,Booster_Doses
47,04/20/2022,Eastland County,TX,2606.0
48,04/20/2022,Hays County,TX,57363.0
49,04/20/2022,Hunt County,TX,14446.0


In [14]:
clean_first_dose = first_dose[first_dose.Administered_Dose1_Recip.notnull()]
print(clean_first_dose.shape)

(125460, 4)


In [15]:
clean_second_dose = second_dose[second_dose.Series_Complete_Yes.notnull()]
print(clean_second_dose.shape)

(125460, 4)


In [16]:
clean_third_dose = third_dose[third_dose.Booster_Doses.notnull()]
print(clean_third_dose.shape)

(32242, 4)


# **1. Get new table for the first dose**

In [17]:
# extract unique county
all_county = pd.unique(clean_first_dose[['Recip_County']].values.ravel())
print(all_county[:10])
print(len(all_county))

# extract unique dates
all_date = pd.unique(clean_first_dose[['Date']].values.ravel())
all_date = np.flip(all_date)
print(all_date[:10])
print(len(all_date))

# 254 rows
# 494 + 2 cols

['Eastland County' 'Hays County' 'Hunt County' 'Hutchinson County'
 'Newton County' 'Nolan County' 'Wharton County' 'Clay County'
 'Crane County' 'Crosby County']
254
['12/13/2020' '12/14/2020' '12/15/2020' '12/16/2020' '12/17/2020'
 '12/18/2020' '12/19/2020' '12/20/2020' '12/21/2020' '12/22/2020']
494


In [18]:
col_names = ['Recip_County', 'Recip_State'] + list(all_date)
print(col_names)
print(len(col_names))

['Recip_County', 'Recip_State', '12/13/2020', '12/14/2020', '12/15/2020', '12/16/2020', '12/17/2020', '12/18/2020', '12/19/2020', '12/20/2020', '12/21/2020', '12/22/2020', '12/23/2020', '12/24/2020', '12/25/2020', '12/26/2020', '12/27/2020', '12/28/2020', '12/29/2020', '12/30/2020', '12/31/2020', '01/01/2021', '01/02/2021', '01/03/2021', '01/04/2021', '01/05/2021', '01/06/2021', '01/07/2021', '01/08/2021', '01/09/2021', '01/10/2021', '01/11/2021', '01/12/2021', '01/13/2021', '01/14/2021', '01/15/2021', '01/16/2021', '01/17/2021', '01/18/2021', '01/19/2021', '01/20/2021', '01/21/2021', '01/22/2021', '01/23/2021', '01/24/2021', '01/25/2021', '01/26/2021', '01/27/2021', '01/28/2021', '01/29/2021', '01/30/2021', '01/31/2021', '02/01/2021', '02/02/2021', '02/03/2021', '02/04/2021', '02/05/2021', '02/06/2021', '02/07/2021', '02/08/2021', '02/09/2021', '02/10/2021', '02/11/2021', '02/12/2021', '02/13/2021', '02/14/2021', '02/15/2021', '02/16/2021', '02/17/2021', '02/18/2021', '02/19/2021', '0

In [19]:
# build a new df
new_first_dose = pd.DataFrame(columns = col_names, index = all_county)
print(new_first_dose.shape)
new_first_dose.head(10)

(254, 496)


Unnamed: 0,Recip_County,Recip_State,12/13/2020,12/14/2020,12/15/2020,12/16/2020,12/17/2020,12/18/2020,12/19/2020,12/20/2020,...,04/11/2022,04/12/2022,04/13/2022,04/14/2022,04/15/2022,04/16/2022,04/17/2022,04/18/2022,04/19/2022,04/20/2022
Eastland County,,,,,,,,,,,...,,,,,,,,,,
Hays County,,,,,,,,,,,...,,,,,,,,,,
Hunt County,,,,,,,,,,,...,,,,,,,,,,
Hutchinson County,,,,,,,,,,,...,,,,,,,,,,
Newton County,,,,,,,,,,,...,,,,,,,,,,
Nolan County,,,,,,,,,,,...,,,,,,,,,,
Wharton County,,,,,,,,,,,...,,,,,,,,,,
Clay County,,,,,,,,,,,...,,,,,,,,,,
Crane County,,,,,,,,,,,...,,,,,,,,,,
Crosby County,,,,,,,,,,,...,,,,,,,,,,


In [20]:
# put countys in
new_first_dose['Recip_County'] = all_county
print(new_first_dose.shape)
new_first_dose.head(10)

(254, 496)


Unnamed: 0,Recip_County,Recip_State,12/13/2020,12/14/2020,12/15/2020,12/16/2020,12/17/2020,12/18/2020,12/19/2020,12/20/2020,...,04/11/2022,04/12/2022,04/13/2022,04/14/2022,04/15/2022,04/16/2022,04/17/2022,04/18/2022,04/19/2022,04/20/2022
Eastland County,Eastland County,,,,,,,,,,...,,,,,,,,,,
Hays County,Hays County,,,,,,,,,,...,,,,,,,,,,
Hunt County,Hunt County,,,,,,,,,,...,,,,,,,,,,
Hutchinson County,Hutchinson County,,,,,,,,,,...,,,,,,,,,,
Newton County,Newton County,,,,,,,,,,...,,,,,,,,,,
Nolan County,Nolan County,,,,,,,,,,...,,,,,,,,,,
Wharton County,Wharton County,,,,,,,,,,...,,,,,,,,,,
Clay County,Clay County,,,,,,,,,,...,,,,,,,,,,
Crane County,Crane County,,,,,,,,,,...,,,,,,,,,,
Crosby County,Crosby County,,,,,,,,,,...,,,,,,,,,,


In [22]:
# put data in
for date in all_date:
  for county in all_county:
    # temp1 = clean_first_dose.loc[clean_first_dose['Date'] == date]
    # temp2 = temp1.loc[temp1['Recip_County'] == county]
    # temp2 = temp2.reset_index()
    # new_first_dose[date][county] = temp2['Administered_Dose1_Recip'][0]
    # temp = clean_first_dose[(clean_first_dose['Date'] == date) & (clean_first_dose['Recip_County'] == county)]
    temp = clean_first_dose.loc[(clean_first_dose['Date'] == date) & (clean_first_dose['Recip_County'] == county)]
    temp = temp.reset_index()
    if len(temp) > 0:
      new_first_dose[date][county] = temp['Administered_Dose1_Recip'][0]
    else:
      new_first_dose[date][county] = 0.0

print(new_first_dose.shape)
new_first_dose.head(10)

                        Recip_County Recip_State 12/13/2020 12/14/2020  \
Eastland County      Eastland County         NaN        0.0        0.0   
Hays County              Hays County         NaN        0.0        0.0   
Hunt County              Hunt County         NaN        0.0        0.0   
Hutchinson County  Hutchinson County         NaN        0.0        0.0   
Newton County          Newton County         NaN        0.0        0.0   
...                              ...         ...        ...        ...   
Concho County          Concho County         NaN        0.0        0.0   
Matagorda County    Matagorda County         NaN        0.0        0.0   
Montgomery County  Montgomery County         NaN        0.0        0.0   
Howard County          Howard County         NaN        0.0        0.0   
Lubbock County        Lubbock County         NaN        0.0        0.0   

                  12/15/2020 12/16/2020 12/17/2020 12/18/2020 12/19/2020  \
Eastland County          0.0       

Unnamed: 0,Recip_County,Recip_State,12/13/2020,12/14/2020,12/15/2020,12/16/2020,12/17/2020,12/18/2020,12/19/2020,12/20/2020,...,04/11/2022,04/12/2022,04/13/2022,04/14/2022,04/15/2022,04/16/2022,04/17/2022,04/18/2022,04/19/2022,04/20/2022
Eastland County,Eastland County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7942.0,7945.0,7946.0,7948.0,7949.0,7953.0,7954.0,7954.0,7955.0,7955.0
Hays County,Hays County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,168644.0,168674.0,168714.0,168776.0,168839.0,168908.0,168931.0,168978.0,168997.0,168998.0
Hunt County,Hunt County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,48568.0,48571.0,48580.0,48584.0,48600.0,48620.0,48632.0,48640.0,48644.0,48645.0
Hutchinson County,Hutchinson County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,8489.0,8491.0,8493.0,8495.0,8496.0,8498.0,8499.0,8497.0,8499.0,8499.0
Newton County,Newton County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3945.0,3947.0,3947.0,3946.0,3949.0,3949.0,3949.0,3949.0,3949.0,3949.0
Nolan County,Nolan County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6832.0,6831.0,6832.0,6835.0,6835.0,6836.0,6834.0,6834.0,6834.0,6834.0
Wharton County,Wharton County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,24622.0,24631.0,24637.0,24665.0,24672.0,24678.0,24680.0,24691.0,24706.0,24706.0
Clay County,Clay County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,5404.0,5404.0,5405.0,5404.0,5405.0,5405.0,5407.0,5410.0,5410.0,5410.0
Crane County,Crane County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2316.0,2316.0,2317.0,2318.0,2318.0,2319.0,2320.0,2320.0,2321.0,2321.0
Crosby County,Crosby County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2913.0,2913.0,2913.0,2913.0,2913.0,2913.0,2913.0,2913.0,2913.0,2913.0


In [27]:
# save as csv
new_first_dose.to_csv('/content/drive/MyDrive/first_dose_data.csv', index = False)

# **2. Get a new table for the second dose**

In [28]:
# extract unique county
all_county = pd.unique(clean_second_dose[['Recip_County']].values.ravel())
print(all_county[:10])
print(len(all_county))

# extract unique dates
all_date = pd.unique(clean_second_dose[['Date']].values.ravel())
all_date = np.flip(all_date)
print(all_date[:10])
print(len(all_date))

# 254 rows
# 494 + 2 cols

['Eastland County' 'Hays County' 'Hunt County' 'Hutchinson County'
 'Newton County' 'Nolan County' 'Wharton County' 'Clay County'
 'Crane County' 'Crosby County']
254
['12/13/2020' '12/14/2020' '12/15/2020' '12/16/2020' '12/17/2020'
 '12/18/2020' '12/19/2020' '12/20/2020' '12/21/2020' '12/22/2020']
494


In [29]:
col_names = ['Recip_County', 'Recip_State'] + list(all_date)
print(col_names)
print(len(col_names))

['Recip_County', 'Recip_State', '12/13/2020', '12/14/2020', '12/15/2020', '12/16/2020', '12/17/2020', '12/18/2020', '12/19/2020', '12/20/2020', '12/21/2020', '12/22/2020', '12/23/2020', '12/24/2020', '12/25/2020', '12/26/2020', '12/27/2020', '12/28/2020', '12/29/2020', '12/30/2020', '12/31/2020', '01/01/2021', '01/02/2021', '01/03/2021', '01/04/2021', '01/05/2021', '01/06/2021', '01/07/2021', '01/08/2021', '01/09/2021', '01/10/2021', '01/11/2021', '01/12/2021', '01/13/2021', '01/14/2021', '01/15/2021', '01/16/2021', '01/17/2021', '01/18/2021', '01/19/2021', '01/20/2021', '01/21/2021', '01/22/2021', '01/23/2021', '01/24/2021', '01/25/2021', '01/26/2021', '01/27/2021', '01/28/2021', '01/29/2021', '01/30/2021', '01/31/2021', '02/01/2021', '02/02/2021', '02/03/2021', '02/04/2021', '02/05/2021', '02/06/2021', '02/07/2021', '02/08/2021', '02/09/2021', '02/10/2021', '02/11/2021', '02/12/2021', '02/13/2021', '02/14/2021', '02/15/2021', '02/16/2021', '02/17/2021', '02/18/2021', '02/19/2021', '0

In [30]:
# build a new df
new_second_dose = pd.DataFrame(columns = col_names, index = all_county)
print(new_second_dose.shape)
new_second_dose.head(10)

(254, 496)


Unnamed: 0,Recip_County,Recip_State,12/13/2020,12/14/2020,12/15/2020,12/16/2020,12/17/2020,12/18/2020,12/19/2020,12/20/2020,...,04/11/2022,04/12/2022,04/13/2022,04/14/2022,04/15/2022,04/16/2022,04/17/2022,04/18/2022,04/19/2022,04/20/2022
Eastland County,,,,,,,,,,,...,,,,,,,,,,
Hays County,,,,,,,,,,,...,,,,,,,,,,
Hunt County,,,,,,,,,,,...,,,,,,,,,,
Hutchinson County,,,,,,,,,,,...,,,,,,,,,,
Newton County,,,,,,,,,,,...,,,,,,,,,,
Nolan County,,,,,,,,,,,...,,,,,,,,,,
Wharton County,,,,,,,,,,,...,,,,,,,,,,
Clay County,,,,,,,,,,,...,,,,,,,,,,
Crane County,,,,,,,,,,,...,,,,,,,,,,
Crosby County,,,,,,,,,,,...,,,,,,,,,,


In [31]:
# put countys in
new_second_dose['Recip_County'] = all_county
print(new_second_dose.shape)
new_second_dose.head(10)

(254, 496)


Unnamed: 0,Recip_County,Recip_State,12/13/2020,12/14/2020,12/15/2020,12/16/2020,12/17/2020,12/18/2020,12/19/2020,12/20/2020,...,04/11/2022,04/12/2022,04/13/2022,04/14/2022,04/15/2022,04/16/2022,04/17/2022,04/18/2022,04/19/2022,04/20/2022
Eastland County,Eastland County,,,,,,,,,,...,,,,,,,,,,
Hays County,Hays County,,,,,,,,,,...,,,,,,,,,,
Hunt County,Hunt County,,,,,,,,,,...,,,,,,,,,,
Hutchinson County,Hutchinson County,,,,,,,,,,...,,,,,,,,,,
Newton County,Newton County,,,,,,,,,,...,,,,,,,,,,
Nolan County,Nolan County,,,,,,,,,,...,,,,,,,,,,
Wharton County,Wharton County,,,,,,,,,,...,,,,,,,,,,
Clay County,Clay County,,,,,,,,,,...,,,,,,,,,,
Crane County,Crane County,,,,,,,,,,...,,,,,,,,,,
Crosby County,Crosby County,,,,,,,,,,...,,,,,,,,,,


In [32]:
# put data in
for date in all_date:
  for county in all_county:
    temp = clean_second_dose.loc[(clean_second_dose['Date'] == date) & (clean_second_dose['Recip_County'] == county)]
    temp = temp.reset_index()
    if len(temp) > 0:
      new_second_dose[date][county] = temp['Series_Complete_Yes'][0]
    else:
      new_second_dose[date][county] = 0.0

new_second_dose.head(10)

Unnamed: 0,Recip_County,Recip_State,12/13/2020,12/14/2020,12/15/2020,12/16/2020,12/17/2020,12/18/2020,12/19/2020,12/20/2020,...,04/11/2022,04/12/2022,04/13/2022,04/14/2022,04/15/2022,04/16/2022,04/17/2022,04/18/2022,04/19/2022,04/20/2022
Eastland County,Eastland County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6878.0,6881.0,6885.0,6888.0,6891.0,6895.0,6896.0,6898.0,6899.0,6899.0
Hays County,Hays County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,143805.0,143836.0,143894.0,143947.0,143980.0,144042.0,144068.0,144092.0,144118.0,144118.0
Hunt County,Hunt County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,42217.0,42221.0,42229.0,42236.0,42245.0,42262.0,42265.0,42266.0,42276.0,42276.0
Hutchinson County,Hutchinson County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7328.0,7330.0,7329.0,7330.0,7331.0,7332.0,7332.0,7333.0,7335.0,7335.0
Newton County,Newton County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,3430.0,3430.0,3431.0,3430.0,3430.0,3430.0,3431.0,3431.0,3431.0,3431.0
Nolan County,Nolan County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,6198.0,6197.0,6199.0,6203.0,6206.0,6208.0,6207.0,6207.0,6206.0,6206.0
Wharton County,Wharton County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,20963.0,20974.0,20976.0,20989.0,20997.0,21006.0,21010.0,21015.0,21022.0,21022.0
Clay County,Clay County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,4875.0,4874.0,4875.0,4875.0,4877.0,4875.0,4876.0,4877.0,4878.0,4878.0
Crane County,Crane County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1786.0,1786.0,1786.0,1787.0,1785.0,1785.0,1786.0,1787.0,1788.0,1788.0
Crosby County,Crosby County,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,2594.0,2594.0,2595.0,2595.0,2595.0,2595.0,2595.0,2597.0,2598.0,2598.0


In [33]:
# save as csv
new_second_dose.to_csv('/content/drive/MyDrive/second_dose_data.csv', index = False)

# **3. Get a new table for the booster dose**

In [35]:
# extract unique county
all_county = pd.unique(clean_third_dose[['Recip_County']].values.ravel())
print(all_county[:10])
print(len(all_county))

# extract unique dates
all_date = pd.unique(clean_third_dose[['Date']].values.ravel())
all_date = np.flip(all_date)
print(all_date[:10])
print(len(all_date))

# 254 rows
# 127 + 2 cols

['Eastland County' 'Hays County' 'Hunt County' 'Hutchinson County'
 'Newton County' 'Nolan County' 'Wharton County' 'Clay County'
 'Crane County' 'Crosby County']
254
['12/15/2021' '12/16/2021' '12/17/2021' '12/18/2021' '12/19/2021'
 '12/20/2021' '12/21/2021' '12/22/2021' '12/23/2021' '12/24/2021']
127


In [36]:
col_names = ['Recip_County', 'Recip_State'] + list(all_date)
print(col_names)
print(len(col_names))

['Recip_County', 'Recip_State', '12/15/2021', '12/16/2021', '12/17/2021', '12/18/2021', '12/19/2021', '12/20/2021', '12/21/2021', '12/22/2021', '12/23/2021', '12/24/2021', '12/25/2021', '12/26/2021', '12/27/2021', '12/28/2021', '12/29/2021', '12/30/2021', '12/31/2021', '01/01/2022', '01/02/2022', '01/03/2022', '01/04/2022', '01/05/2022', '01/06/2022', '01/07/2022', '01/08/2022', '01/09/2022', '01/10/2022', '01/11/2022', '01/12/2022', '01/13/2022', '01/14/2022', '01/15/2022', '01/16/2022', '01/17/2022', '01/18/2022', '01/19/2022', '01/20/2022', '01/21/2022', '01/22/2022', '01/23/2022', '01/24/2022', '01/25/2022', '01/26/2022', '01/27/2022', '01/28/2022', '01/29/2022', '01/30/2022', '01/31/2022', '02/01/2022', '02/02/2022', '02/03/2022', '02/04/2022', '02/05/2022', '02/06/2022', '02/07/2022', '02/08/2022', '02/09/2022', '02/10/2022', '02/11/2022', '02/12/2022', '02/13/2022', '02/14/2022', '02/15/2022', '02/16/2022', '02/17/2022', '02/18/2022', '02/19/2022', '02/20/2022', '02/21/2022', '0

In [37]:
# build a new df
new_third_dose = pd.DataFrame(columns = col_names, index = all_county)
print(new_third_dose.shape)
new_third_dose.head(10)

(254, 129)


Unnamed: 0,Recip_County,Recip_State,12/15/2021,12/16/2021,12/17/2021,12/18/2021,12/19/2021,12/20/2021,12/21/2021,12/22/2021,...,04/11/2022,04/12/2022,04/13/2022,04/14/2022,04/15/2022,04/16/2022,04/17/2022,04/18/2022,04/19/2022,04/20/2022
Eastland County,,,,,,,,,,,...,,,,,,,,,,
Hays County,,,,,,,,,,,...,,,,,,,,,,
Hunt County,,,,,,,,,,,...,,,,,,,,,,
Hutchinson County,,,,,,,,,,,...,,,,,,,,,,
Newton County,,,,,,,,,,,...,,,,,,,,,,
Nolan County,,,,,,,,,,,...,,,,,,,,,,
Wharton County,,,,,,,,,,,...,,,,,,,,,,
Clay County,,,,,,,,,,,...,,,,,,,,,,
Crane County,,,,,,,,,,,...,,,,,,,,,,
Crosby County,,,,,,,,,,,...,,,,,,,,,,


In [38]:
# put countys in
new_third_dose['Recip_County'] = all_county
print(new_third_dose.shape)
new_third_dose.head(10)

(254, 129)


Unnamed: 0,Recip_County,Recip_State,12/15/2021,12/16/2021,12/17/2021,12/18/2021,12/19/2021,12/20/2021,12/21/2021,12/22/2021,...,04/11/2022,04/12/2022,04/13/2022,04/14/2022,04/15/2022,04/16/2022,04/17/2022,04/18/2022,04/19/2022,04/20/2022
Eastland County,Eastland County,,,,,,,,,,...,,,,,,,,,,
Hays County,Hays County,,,,,,,,,,...,,,,,,,,,,
Hunt County,Hunt County,,,,,,,,,,...,,,,,,,,,,
Hutchinson County,Hutchinson County,,,,,,,,,,...,,,,,,,,,,
Newton County,Newton County,,,,,,,,,,...,,,,,,,,,,
Nolan County,Nolan County,,,,,,,,,,...,,,,,,,,,,
Wharton County,Wharton County,,,,,,,,,,...,,,,,,,,,,
Clay County,Clay County,,,,,,,,,,...,,,,,,,,,,
Crane County,Crane County,,,,,,,,,,...,,,,,,,,,,
Crosby County,Crosby County,,,,,,,,,,...,,,,,,,,,,


In [39]:
# put data in
for date in all_date:
  for county in all_county:
    temp = clean_third_dose.loc[(clean_third_dose['Date'] == date) & (clean_third_dose['Recip_County'] == county)]
    temp = temp.reset_index()
    if len(temp) > 0:
      new_third_dose[date][county] = temp['Booster_Doses'][0]
    else:
      new_third_dose[date][county] = 0.0

new_third_dose.head(10)

Unnamed: 0,Recip_County,Recip_State,12/15/2021,12/16/2021,12/17/2021,12/18/2021,12/19/2021,12/20/2021,12/21/2021,12/22/2021,...,04/11/2022,04/12/2022,04/13/2022,04/14/2022,04/15/2022,04/16/2022,04/17/2022,04/18/2022,04/19/2022,04/20/2022
Eastland County,Eastland County,,0.0,1681.0,1698.0,1785.0,1789.0,1794.0,1802.0,1815.0,...,2574.0,2578.0,2582.0,2587.0,2591.0,2599.0,2599.0,2600.0,2606.0,2606.0
Hays County,Hays County,,58.0,33294.0,34019.0,34733.0,35429.0,35623.0,35952.0,36748.0,...,56780.0,56856.0,56948.0,57054.0,57137.0,57226.0,57283.0,57320.0,57363.0,57363.0
Hunt County,Hunt County,,23.0,9236.0,9355.0,9495.0,9570.0,9631.0,9792.0,9942.0,...,14290.0,14311.0,14332.0,14354.0,14374.0,14403.0,14421.0,14430.0,14446.0,14446.0
Hutchinson County,Hutchinson County,,23.0,1691.0,1726.0,1767.0,1782.0,1785.0,1798.0,1858.0,...,2666.0,2667.0,2670.0,2676.0,2678.0,2683.0,2685.0,2685.0,2690.0,2690.0
Newton County,Newton County,,0.0,893.0,903.0,908.0,911.0,915.0,943.0,958.0,...,1336.0,1339.0,1341.0,1341.0,1343.0,1343.0,1344.0,1344.0,1346.0,1346.0
Nolan County,Nolan County,,0.0,1864.0,1902.0,1935.0,1941.0,1944.0,1962.0,1974.0,...,2571.0,2574.0,2576.0,2581.0,2583.0,2587.0,2588.0,2588.0,2596.0,2596.0
Wharton County,Wharton County,,0.0,4078.0,4151.0,4256.0,4358.0,4372.0,4498.0,4562.0,...,7137.0,7156.0,7162.0,7173.0,7181.0,7195.0,7199.0,7208.0,7231.0,7231.0
Clay County,Clay County,,0.0,1219.0,1239.0,1250.0,1287.0,1291.0,1302.0,1316.0,...,1816.0,1816.0,1824.0,1827.0,1830.0,1832.0,1834.0,1836.0,1839.0,1839.0
Crane County,Crane County,,0.0,382.0,384.0,385.0,386.0,386.0,386.0,387.0,...,506.0,507.0,509.0,509.0,512.0,513.0,513.0,517.0,518.0,518.0
Crosby County,Crosby County,,0.0,650.0,666.0,670.0,675.0,677.0,682.0,684.0,...,1010.0,1010.0,1011.0,1013.0,1014.0,1015.0,1017.0,1020.0,1020.0,1020.0


In [40]:
# save as csv
new_third_dose.to_csv('/content/drive/MyDrive/booster_dose_data.csv', index = False)