# Michigan COVID-19 Data

## Imports

In [None]:
import pandas as pd
import numpy as np

import os
import requests
from bs4 import BeautifulSoup

## Michigan COVID-19 Data
### Download the latest COVID-19 data

In [None]:
michiganCovid19Url = 'https://www.michigan.gov/coronavirus/0,9753,7-406-98163_98173---,00.html'
michiganCovid19PageSoup = BeautifulSoup(requests.get(michiganCovid19Url).content)

Make the michigan external data directory to store the data if it isn't there already. 

In [None]:
externalPath = '../data/external/michiganCovid19Data/'
dir = os.path.dirname(externalPath)
if not os.path.exists(dir):
    os.makedirs(dir)

Download the data into the the above folder.

In [None]:
for urlHtml in michiganCovid19PageSoup.find(id='comp_115341').find_all('a'):
    url = 'https://www.michigan.gov/' + urlHtml['href']
    with open(externalPath + url.split('/')[-1],"wb") as file:
        response = requests.get(url)
        file.write(response.content)

### Read the day by day Michigan covid 19 data

In [None]:
michiganDayByDayDf = pd.read_excel("../data/external/michiganCovid19Data/Cases_and_Deaths_by_County_and_by_Date_of_Symptom_Onset_or_by_Date_of_Death2020-11-02_706752_7.xlsx")
#df = pd.read_excel("../data/external/michiganCovid19Data/Cases_and_Deaths_by_County_2020-11-02_706751_7.xlsx")
#df = pd.read_excel("../data/external/michiganCovid19Data/Cases_by_Demographics_Statewide_2020-11-02_706753_7.xlsx")
#df = pd.read_excel("../data/external/michiganCovid19Data/Covid-19_Tests_by_County_2020-11-02_706754_7.xlsx")
#df = pd.read_excel("../data/external/michiganCovid19Data/Diagnostic_Tests_by_Result_and_County_2020-11-02_706755_7.xlsx")

In [None]:
michiganDayByDayDf

### Tidy data
Let's look to see if there is any missing data. Looks like the date data is missing.

In [None]:
michiganDayByDayDf.count()/michiganDayByDayDf.shape[0]

Since only 3 dates are missing and they don't seem impactful, let's filter out the missing date values.

In [None]:
michiganDayByDayDf[~np.isnat(michiganDayByDayDf["Date"])]

Let's look at the datatypes. Wow, it got most of them right! Let's get the CASE_STATUS and COUNTY set as category.

In [None]:
michiganDayByDayDf.dtypes

In [None]:
michiganDayByDayDf['CASE_STATUS'] = michiganDayByDayDf['CASE_STATUS'].astype('category')
michiganDayByDayDf['COUNTY'] = michiganDayByDayDf['COUNTY'].astype('category')