In [1]:
from shapely.geometry import Point
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster

In [4]:
nash_weather = pd.read_csv('../data/weather_stats.csv')
nash_weather = nash_weather[['STATION', 'NAME', 'LATITUDE', 'LONGITUDE', 'DATE', 'TAVG', 'TMAX', 'TMIN']]
nash_weather = nash_weather.rename({'TAVG': 'temp_avg', 'TMAX':'temp_max', 'TMIN':'temp_min'}, axis =1)
nash_weather['DATE'] = pd.to_datetime(nash_weather['DATE'])

In [5]:
nash_weather.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 37056 entries, 0 to 37055
Data columns (total 8 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   STATION    37056 non-null  object        
 1   NAME       37056 non-null  object        
 2   LATITUDE   37056 non-null  float64       
 3   LONGITUDE  37056 non-null  float64       
 4   DATE       37056 non-null  datetime64[ns]
 5   temp_avg   808 non-null    float64       
 6   temp_max   7535 non-null   float64       
 7   temp_min   7535 non-null   float64       
dtypes: datetime64[ns](1), float64(5), object(2)
memory usage: 2.3+ MB


In [6]:
nash_weather['geometry'] = nash_weather.apply(lambda x: Point((float(x.LONGITUDE), 
                                                         float(x.LATITUDE))), 
                                        axis=1)
nash_weather.head(50)

Unnamed: 0,STATION,NAME,LATITUDE,LONGITUDE,DATE,temp_avg,temp_max,temp_min,geometry
0,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.098414,-87.096259,2020-03-01,,,,POINT (-87.096259 36.098414)
1,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.098414,-87.096259,2020-03-02,,,,POINT (-87.096259 36.098414)
2,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.098414,-87.096259,2020-03-03,,,,POINT (-87.096259 36.098414)
3,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.098414,-87.096259,2020-03-04,,,,POINT (-87.096259 36.098414)
4,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.098414,-87.096259,2020-03-05,,,,POINT (-87.096259 36.098414)
5,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.098414,-87.096259,2020-03-06,,,,POINT (-87.096259 36.098414)
6,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.098414,-87.096259,2020-03-07,,,,POINT (-87.096259 36.098414)
7,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.098414,-87.096259,2020-03-08,,,,POINT (-87.096259 36.098414)
8,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.098414,-87.096259,2020-03-09,,,,POINT (-87.096259 36.098414)
9,US1TNCH0002,"KINGSTON SPRINGS 0.3 ENE, TN US",36.098414,-87.096259,2020-03-10,,,,POINT (-87.096259 36.098414)


In [7]:
nash_weather['NAME'].value_counts()

MOUNT JULIET 6.1 SSW, TN US               412
NASHVILLE INTERNATIONAL AIRPORT, TN US    412
FRANKLIN 4.4 W, TN US                     412
MURFREESBORO 5.5 NNW, TN US               412
SMYRNA 3.0 ESE, TN US                     412
                                         ... 
GREENBRIER 1.4 N, TN US                    34
NASHVILLE 6.7 SE, TN US                    11
PLEASANT VIEW 2.8 ESE, TN US                5
ARRINGTON 1.3 NNE, TN US                    4
OLD HICKORY 1.2 SSE, TN US                  1
Name: NAME, Length: 118, dtype: int64

In [8]:
zipcodes = gpd.read_file('../data/zipcodes.geojson')

In [13]:
zipcodes = zipcodes[['zip', 'geometry']]
zipcodes.head()

Unnamed: 0,zip,geometry
0,37115,"MULTIPOLYGON (((-86.68725 36.31821, -86.68722 ..."
1,37216,"MULTIPOLYGON (((-86.73451 36.23774, -86.73425 ..."
2,37204,"MULTIPOLYGON (((-86.77914 36.13424, -86.77923 ..."
3,37027,"MULTIPOLYGON (((-86.81258 36.06319, -86.81263 ..."
4,37064,"MULTIPOLYGON (((-87.02197 36.01200, -87.02140 ..."


In [15]:
covid_cases = pd.read_csv('../data/COVID_CountByZipPerDate 03292021.csv')
covid_cases = covid_cases.rename(columns = {'Specimen Collection Date' : 'date', 'Zip':'zip','Cases':'cases'})
covid_cases['zip'] = covid_cases['zip'].astype('Int64')
covid_cases['zip'] = covid_cases['zip'].astype('str')
covid_cases['date'] = pd.to_datetime(covid_cases['date'])
covid_cases.head()

Unnamed: 0,date,zip,cases
0,2020-03-06,37217,1
1,2020-03-10,37214,1
2,2020-03-11,37013,1
3,2020-03-11,37204,2
4,2020-03-11,37205,1


In [16]:
covid_cases = covid_cases.merge(zipcodes, how = 'left', on = 'zip')
covid_cases.head()

Unnamed: 0,date,zip,cases,geometry
0,2020-03-06,37217,1,"MULTIPOLYGON (((-86.67355 36.12729, -86.66687 ..."
1,2020-03-10,37214,1,"MULTIPOLYGON (((-86.65585 36.11824, -86.66410 ..."
2,2020-03-11,37013,1,"MULTIPOLYGON (((-86.66517 36.09108, -86.66526 ..."
3,2020-03-11,37204,2,"MULTIPOLYGON (((-86.77914 36.13424, -86.77923 ..."
4,2020-03-11,37205,1,"MULTIPOLYGON (((-86.81907 36.13351, -86.81688 ..."
