# COVID-19 Statistics for Indiana

## Imports

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import geopandas as gp

## Constants

In [None]:
indianaCaseDataUrl = "https://hub.mph.in.gov/dataset/bd08cdd3-9ab1-4d70-b933-41f9ef7b809d/resource/afaa225d-ac4e-4e80-9190-f6800c366b58/download/covid_report_county_date.xlsx"
indianaCaseDataDictUrl= "https://hub.mph.in.gov/dataset/bd08cdd3-9ab1-4d70-b933-41f9ef7b809d/resource/5ff3931f-aa68-4ee6-ac1d-d6c5d6cca50a/download/covid_report_county_date_dictionary.xlsx"

## Data Import

In [None]:
df = pd.read_excel(indianaCaseDataUrl)
dfDict = pd.read_excel(indianaCaseDataDictUrl)

## Data Assessment

In [None]:
df

### Missing Data

In [None]:
numberOfRows = df.shape[0]
100 * (numberOfRows - df.count()) / numberOfRows

No missing data means we don't need to drop data or impute values.

### Dataframe column data types

In [None]:
df.dtypes

#### Fix incorrect column data types

In [None]:
df.DATE = pd.to_datetime(df.DATE)
df.COUNTY_NAME = df.COUNTY_NAME.astype('category')

### Updated column data types

In [None]:
df.dtypes

In [None]:
df.COVID_COUNT.sum()

In [None]:
df[['COUNTY_NAME', 'COVID_COUNT']].groupby('COUNTY_NAME').sum('COVID_COUNT')

In [None]:
dailyCaseDeathIndiana = df[['DATE','COVID_COUNT','COVID_DEATHS']].groupby('DATE').sum()

In [None]:
sns.pairplot(dailyCaseDeathIndiana)

In [None]:
df.head()

In [None]:
dailyCaseDeathCountyIndiana = df.groupby(['COUNTY_NAME','DATE']).sum().groupby(level=0).cumsum().reset_index()

In [None]:
plt.figure(figsize = (16,9))
sns.lineplot(y = "COVID_DEATHS", x = "DATE", hue="COUNTY_NAME", data=dailyCaseDeathCountyIndiana, legend=None)

In [None]:
indianaCountyGeoJSONURL = "https://raw.githubusercontent.com/deldersveld/topojson/master/countries/us-states/IN-18-indiana-counties.json"

Citation:

Eldersveld, D. (2020) TopoJSON Collection (Version 1.0) [Source Code]. https://github.com/deldersveld/topojson.

In [None]:
geoDataFrame = gp.read_file(indianaCountyGeoJSONURL)
geoDataFrame.plot()

In [None]:
geoDataFrame.head()

## Create a county organized Covid-19 DataFrame

In [None]:
countyDataFrame = df.groupby(['COUNTY_NAME']).sum()

## Merge GeoJSON with Covid-19 DataFrame

In [None]:
geoDataFrameMerged = geoDataFrame.merge(countyDataFrame, right_on="COUNTY_NAME", left_on="NAME")

In [None]:
geoDataFrameMerged.head(5)

## Modify the DataFrame for centering labels for ease of comprehension.

In [None]:
geoDataFrameMerged["center"] = geoDataFrameMerged["geometry"].centroid
michiganCountyNames = geoDataFrameMerged.copy()
michiganCountyNames.set_geometry("center", inplace=True)

## Plot Case County Data

In [None]:
ax = geoDataFrameMerged.plot(column="COVID_COUNT", legend=True, figsize=(15,10), cmap='YlOrRd')

for x, y, label in zip(michiganCountyNames.geometry.x, michiganCountyNames.geometry.y, michiganCountyNames["NAME"]):
    plt.text(x, y, label, fontsize = 10)

## Plot Death County Data

In [None]:
ax = geoDataFrameMerged.plot(column="COVID_DEATHS", legend=True, figsize=(15,10), cmap='YlOrRd')

for x, y, label in zip(michiganCountyNames.geometry.x, michiganCountyNames.geometry.y, michiganCountyNames["NAME"]):
    plt.text(x, y, label, fontsize = 10)

## Plot Testing County Data

In [None]:
ax = geoDataFrameMerged.plot(column="COVID_TESTS", legend=True, figsize=(15,10), cmap='YlOrRd')

for x, y, label in zip(michiganCountyNames.geometry.x, michiganCountyNames.geometry.y, michiganCountyNames["NAME"]):
    plt.text(x, y, label, fontsize = 10)

## Plot Positive Test Rate County Data

In [None]:
ax = geoDataFrameMerged.plot(column="POSITIVE_TEST_RATE", legend=True, figsize=(15,10), cmap='YlOrRd')

for x, y, label in zip(michiganCountyNames.geometry.x, michiganCountyNames.geometry.y, michiganCountyNames["NAME"]):
    plt.text(x, y, label, fontsize = 10)

In [None]:
df['State'] = 'Indiana'

In [None]:
byCountyDF = df.loc[:,['DATE','State','COUNTY_NAME','COVID_COUNT','COVID_DEATHS']]

In [None]:
byCountyDF.rename(columns={'DATE':'Date','State':'State','COUNTY_NAME':'County','COVID_COUNT':'Cases','COVID_DEATHS':'Deaths'}, inplace=True)

In [None]:
byCountyDF.to_csv('../data/processed/by_county/IN.csv', index=False)

In [None]:
byStateDF = df.groupby(['DATE']).sum().reset_index()
byStateDF['STATE'] = 'Indiana'
byStateDF['NEGATIVE'] = byStateDF['COVID_TESTS_ADMINISTRATED'] - byStateDF['COVID_POSITIVE_TESTS_ADMIN']
byStateDF = byStateDF.loc[:,['DATE','STATE','COVID_COUNT','COVID_POSITIVE_TESTS_ADMIN','NEGATIVE','COVID_DEATHS']]

In [None]:
byStateDF.rename(columns={'DATE':'Date','STATE':'State','COVID_COUNT':'Cases','COVID_POSITIVE_TESTS_ADMIN':'Positive','NEGATIVE':'Negative', 'COVID_DEATHS':'Deaths'}, inplace=True)

In [None]:
byStateDF.to_csv('../data/processed/by_state/IN.csv', index=False)