In [8]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import urllib.request
import re as re
from bs4 import BeautifulSoup
import plotly.express as ps

In [9]:
df = pd.read_csv("https://coronavirus.ohio.gov/static/dashboards/COVIDSummaryData.csv", low_memory=False)
# The last row is summary
df_sum = df.iloc[[-1]]
# actual data is in the other columns
df_data = df.iloc[:-1].copy()

In [10]:
count_columns = ["Case Count", "Death Due to Illness Count", "Hospitalized Count"]
for c in count_columns:
    df_data[c] = df_data[c].astype(int)
df_data["Onset Date"] = pd.to_datetime( df_data["Onset Date"])


In [11]:
cases = df_data.groupby(["Onset Date", "County"])['Case Count'].sum().reset_index()
cases["State"] = "Ohio"
cases = cases.rename(columns={"Onset Date":"Date", "Case Count":"Cases"})
# cases = cases.set_index(["State", "Date", "County"])
cases

Unnamed: 0,Date,County,Cases,State
0,2020-01-02,Erie,1,Ohio
1,2020-01-02,Licking,1,Ohio
2,2020-01-02,Lucas,1,Ohio
3,2020-01-02,Mahoning,2,Ohio
4,2020-01-02,Summit,1,Ohio
...,...,...,...,...
18595,2020-11-30,Muskingum,1,Ohio
18596,2020-11-30,Pike,1,Ohio
18597,2020-11-30,Shelby,1,Ohio
18598,2020-11-30,Warren,1,Ohio


In [12]:
death_data = df_data[ df_data["Date Of Death"] != "Unknown"]
death_data = death_data[ death_data["Date Of Death"].notnull()]
death_data["Date Of Death"] = pd.to_datetime( death_data["Date Of Death"])
deaths = death_data.groupby(["Date Of Death", "County"])['Death Due to Illness Count'].sum().reset_index()
deaths["State"] = "Ohio"
deaths = deaths.rename( columns={"Date Of Death":"Date", "Death Due to Illness Count":"Deaths"})
# deaths = deaths.set_index(["State", "Date", "County"])
deaths

Unnamed: 0,Date,County,Deaths,State
0,2020-03-01,Summit,1,Ohio
1,2020-03-17,Franklin,2,Ohio
2,2020-03-18,Lucas,1,Ohio
3,2020-03-20,Cuyahoga,1,Ohio
4,2020-03-20,Erie,1,Ohio
...,...,...,...,...
4048,2020-11-29,Marion,1,Ohio
4049,2020-11-29,Summit,3,Ohio
4050,2020-11-29,Tuscarawas,0,Ohio
4051,2020-11-30,Columbiana,0,Ohio


In [13]:
total_data = pd.merge(cases, deaths, left_on=["State", "Date", "County"], right_on=["State", "Date", "County"])
total_data.to_csv("../data/processed/by_county/ohio.csv", index=False)
total_data

Unnamed: 0,Date,County,Cases,State,Deaths
0,2020-03-01,Summit,4,Ohio,1
1,2020-03-17,Franklin,49,Ohio,2
2,2020-03-18,Lucas,19,Ohio,1
3,2020-03-20,Cuyahoga,50,Ohio,1
4,2020-03-20,Erie,1,Ohio,1
...,...,...,...,...,...
3924,2020-11-29,Hancock,15,Ohio,0
3925,2020-11-29,Lorain,38,Ohio,0
3926,2020-11-29,Marion,34,Ohio,1
3927,2020-11-29,Summit,25,Ohio,3


In [14]:
# tests from https://covidtracking.com/data
df_tracker = pd.read_csv("https://covidtracking.com/data/download/all-states-history.csv")
df_tracker["date"] = pd.to_datetime(df_tracker["date"])
ohio = df_tracker[ df_tracker["state"] == "OH"].sort_values("date", ascending = True)

In [15]:
total_data2 = ohio[["date", "positive", "negative"]].copy()
total_data2 = total_data2.rename(columns={"date":"Date","positive":"Positive", "negative":"Negative"})
total_data2 = pd.merge( total_data2, total_data.groupby(["Date"])["Cases","Deaths"].sum(), left_on="Date", right_on="Date")
total_data2["State"] = "Ohio"
total_data2 = total_data2.fillna(0)
for c in ["Positive", "Negative", "Deaths"]:
    total_data2[c] = total_data2[c].astype(int)
total_data2.to_csv("../data/processed/by_state/ohio.csv", index=False)
total_data2

  total_data2 = pd.merge( total_data2, total_data.groupby(["Date"])["Cases","Deaths"].sum(), left_on="Date", right_on="Date")


Unnamed: 0,Date,Positive,Negative,Cases,Deaths,State
0,2020-03-17,67,140,49,2,Ohio
1,2020-03-18,88,140,19,1,Ohio
2,2020-03-20,169,140,76,5,Ohio
3,2020-03-21,247,140,11,1,Ohio
4,2020-03-22,351,140,51,4,Ohio
...,...,...,...,...,...,...
252,2020-11-25,382743,5474145,2155,15,Ohio
253,2020-11-26,382743,5474145,352,9,Ohio
254,2020-11-27,399808,5519733,1224,5,Ohio
255,2020-11-28,406703,5622006,817,4,Ohio
