# Michigan COVID-19 Data

## Imports

In [None]:
import pandas as pd
import numpy as np

import os
import glob
import requests
from bs4 import BeautifulSoup

import seaborn as sns
import matplotlib.pyplot as plt

import geopandas as gp

## Michigan COVID-19 Data
### Download the latest COVID-19 data

In [None]:
michiganCovid19Url = 'https://www.michigan.gov/coronavirus/0,9753,7-406-98163_98173---,00.html'
michiganCovid19PageSoup = BeautifulSoup(requests.get(michiganCovid19Url).content)

Make the michigan external data directory to store the data if it isn't there already. 

In [None]:
externalPath = '../data/external/michiganCovid19Data/'
dir = os.path.dirname(externalPath)
if not os.path.exists(dir):
    os.makedirs(dir)

Download the data into the the above folder.

In [None]:
# Delete the existing data as michigan datetime stamps their data
michiganDataFileNames = glob.glob('../data/external/michiganCovid19Data/*')
for fileName in michiganDataFileNames:
    os.remove(fileName)

In [None]:
for urlHtml in michiganCovid19PageSoup.find(id='comp_115341').find_all('a'):
    url = 'https://www.michigan.gov/' + urlHtml['href']
    with open(externalPath + url.split('/')[-1],"wb") as file:
        response = requests.get(url)
        file.write(response.content)

### Read the day by day Michigan covid 19 data

In [None]:
glob.glob('../data/external/michiganCovid19Data/Cases_and_Deaths_by_County_and_by_Date_of_Symptom_Onset_or_by_Date_of_Death*')[0]
michiganDayByDayDf = pd.read_excel(glob.glob('../data/external/michiganCovid19Data/Cases_and_Deaths_by_County_and_by_Date_of_Symptom_Onset_or_by_Date_of_Death*')[0])
#df = pd.read_excel("../data/external/michiganCovid19Data/Cases_and_Deaths_by_County_2020-11-02_706751_7.xlsx")
#df = pd.read_excel("../data/external/michiganCovid19Data/Cases_by_Demographics_Statewide_2020-11-02_706753_7.xlsx")
#df = pd.read_excel("../data/external/michiganCovid19Data/Covid-19_Tests_by_County_2020-11-02_706754_7.xlsx")
#df = pd.read_excel("../data/external/michiganCovid19Data/Diagnostic_Tests_by_Result_and_County_2020-11-02_706755_7.xlsx")
michiganDayByDayTestingDf = pd.read_excel(glob.glob('../data/external/michiganCovid19Data/Diagnostic_Tests_by_Result_and_County*')[0])

In [None]:
michiganDayByDayDf

### Tidy data
Let's look to see if there is any missing data. Looks like the date data is missing.

In [None]:
michiganDayByDayDf.count()/michiganDayByDayDf.shape[0]

Since only 3 dates are missing and they don't seem impactful, let's filter out the missing date values.

In [None]:
michiganDayByDayDf[~np.isnat(michiganDayByDayDf["Date"])]

Let's look at the datatypes. Wow, it got most of them right! Let's get the CASE_STATUS and COUNTY set as category.

In [None]:
michiganDayByDayDf.dtypes

In [None]:
michiganDayByDayDf['CASE_STATUS'] = michiganDayByDayDf['CASE_STATUS'].astype('category')
michiganDayByDayDf['COUNTY'] = michiganDayByDayDf['COUNTY'].astype('category')

In [None]:
michiganDayByDayDf.dtypes

In [None]:
michiganDayByDayDf = michiganDayByDayDf[michiganDayByDayDf['CASE_STATUS'] == 'Confirmed']
michiganDayByDayDf

### Let's graphically view the data

Is there a correlation between cases and deaths?

In [None]:
dailyCaseDeathMichigan = michiganDayByDayDf[["Date","Cases","Deaths"]].groupby(["Date"]).sum()

In [None]:
sns.pairplot(dailyCaseDeathMichigan)

# Get Michigan GeoJSON

In [None]:
michiganCountyGeoJSONURL = "https://raw.githubusercontent.com/deldersveld/topojson/master/countries/us-states/MI-26-michigan-counties.json"

Citation:

Eldersveld, D. (2020) TopoJSON Collection (Version 1.0) [Source Code]. https://github.com/deldersveld/topojson.

## Plot empty map to confirm data source fitness

In [None]:
geoDataFrame = gp.read_file(michiganCountyGeoJSONURL)
geoDataFrame.plot()

## Understanding the GeoJSON Data

In [None]:
geoDataFrame.head(5)

## Create a county organized Covid-19 DataFrame

In [None]:
countyDataFrame = michiganDayByDayDf.groupby(['COUNTY']).sum()

## Merge GeoJSON with Covid-19 DataFrame

In [None]:
geoDataFrameMerged = geoDataFrame.merge(countyDataFrame, right_on="COUNTY", left_on="NAME")

In [None]:
geoDataFrameMerged.head(5)

## Modify the DataFrame for centering labels for ease of comprehension.

In [None]:
geoDataFrameMerged["center"] = geoDataFrameMerged["geometry"].centroid
michiganCountyNames = geoDataFrameMerged.copy()
michiganCountyNames.set_geometry("center", inplace=True)

## Plot Case County Data

In [None]:
ax = geoDataFrameMerged.plot(column="Cases", legend=True, figsize=(15,10), cmap='YlOrRd')

for x, y, label in zip(michiganCountyNames.geometry.x, michiganCountyNames.geometry.y, michiganCountyNames["NAME"]):
    plt.text(x, y, label, fontsize = 10)

## Plot Death County Data

In [None]:
ax = geoDataFrameMerged.plot(column="Deaths", legend=True, figsize=(15,10), cmap='YlOrRd')

for x, y, label in zip(michiganCountyNames.geometry.x, michiganCountyNames.geometry.y, michiganCountyNames["NAME"]):
    plt.text(x, y, label, fontsize = 10)

# Export Data for Final Project Notebook
## First export the data by county

In [None]:
michiganDayByDayDf["State"] = 'Michigan'

In [None]:
michiganExportDataFrameByCounty = michiganDayByDayDf[["Date","State","COUNTY","Cases","Deaths"]]

In [None]:
michiganExportDataFrameByCounty.columns=["Date","State","County","Cases","Deaths"]

In [None]:
michiganExportDataFrameByCounty

In [None]:
michiganExportDataFrameByCounty.to_csv("../data/processed/by_county/michigan.csv", index=False)

## Second export by state

In [None]:
michiganDayByDayTestingDf.columns=["County","Date", "Negative", "Positive","Total"]
michiganDayByDayTestingDf.groupby(["Date"]).sum()

In [None]:
michiganExportDataFrameByState = michiganExportDataFrameByCounty.groupby("Date").sum()

In [None]:
michiganExportDataFrameByState["State"] = "Michigan"

In [None]:
michiganExportDataFrameByState

In [None]:
groupedTestingByDate = michiganDayByDayTestingDf.groupby("Date").sum()

In [None]:
groupedTestingByDate

In [None]:
michiganExportDataFrameByState = michiganExportDataFrameByState.merge(groupedTestingByDate,left_index=True,right_index=True)

In [None]:
michiganExportDataFrameByState = michiganExportDataFrameByState[["State","Cases","Positive","Negative","Deaths"]]

In [None]:
michiganExportDataFrameByState.to_csv("../data/processed/by_state/michigan.csv")