In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import urllib.request
import re as re
from bs4 import BeautifulSoup
import plotly.express as ps

In [None]:
import json
from urllib.request import urlopen

## Download Two Data Sets 

### From official OHIO site `https://coronavirus.ohio.gov/wps/portal/gov/covid-19/dashboards`

In [None]:
df = pd.read_csv("https://coronavirus.ohio.gov/static/dashboards/COVIDSummaryData.csv", low_memory=False)
# The last row is summary
df_sum = df.iloc[[-1]]
# actual data is in the other columns
df_data = df.iloc[:-1].copy()

The data seems to be almost clean and tidy. The only missing data are in `Admission Date` and `Date Of Death` columns.

In [None]:
df_data.head(10)

In [None]:
df_data.shape

In [None]:
df_data.dtypes

In [None]:
print("Missing data:\n")
nRows = df_data.shape[0]
100*(nRows - df_data.count())/nRows

It means probably that 94% of the people are alive and 80% were not hosplalized. 

Let us assign the appoporate types

In [None]:
count_columns = ["Case Count", "Death Due to Illness Count", "Hospitalized Count"]
for c in count_columns:
    df_data[c] = df_data[c].astype(int)
df_data["Onset Date"] = pd.to_datetime( df_data["Onset Date"])

You can check that total sums of these columns agree with the corresponding columns in `df_sum` set, so we do not need `df_sum`

In [None]:
pd.DataFrame(index = count_columns, data = [[df_data[c].sum(), df_sum[c].iloc[0]] for c in count_columns], 
             columns= ["sum","df_sum"])

In [None]:
print(
    df_data.sort_values("Onset Date", ascending = False).iloc[10:20].to_latex()
)

Recovered Cases are cases with a symptom onset date >21 prior who are not deceased

In [None]:
def get_nrecovered(day_, data = df_data,  delta = pd.Timedelta('21 day')):
    return data[ (data["Onset Date"] == day_ - delta) & (data["Date Of Death"].isna() )].shape[0]

In [None]:
def create_cases(data = df_data):
    df_cases = data.groupby("Onset Date")[count_columns].sum()
    for d in df_cases.index:
        df_cases.loc[d, "Recovered"] = get_nrecovered(d, data=data)
    return df_cases

In [None]:
def plot_cases(df_cases, case=True, death=True, hosp=True, recovered=True, ax = None):
    if ax == None:
        ax = plt.gca()
    if case:
        ax = sns.lineplot(data = df_cases, x="Onset Date", y="Case Count", label = "Cases", ax=ax)
    if death:
        ax = sns.lineplot(data = df_cases, x="Onset Date", y="Death Due to Illness Count", label = "Deaths", ax=ax)
    if hosp:
        ax = sns.lineplot(data = df_cases, x="Onset Date", y="Hospitalized Count", label = "Hospitalized", ax=ax)
    if recovered:
        ax = sns.lineplot(data = df_cases, x="Onset Date", y="Recovered", label = "Recovered", ax=ax)
    return ax

In [None]:
fig, ax = plt.subplots(1, 1, figsize = (15, 10))
plot_cases(create_cases())

In [None]:
fig, ax = plt.subplots( 1, 1, figsize = (15, 10))
for sex in df_data["Sex"].unique():
    df_cases = create_cases( df_data[ df_data["Sex"] == sex])
    ax = sns.lineplot(data = df_cases, x="Onset Date", y="Death Due to Illness Count", label = sex)
ax.set(title = df_data["Death Due to Illness Count"].sum())

In [None]:
fig, ax = plt.subplots( 1, 1, figsize = (15, 10))
plot_cases( create_cases(df_data), case=False, hosp=False, recovered=False)
plt.grid()

In [None]:
fig, ax = plt.subplots(1, 1, figsize = (15, 10))
plot_cases( create_cases(df_data.query("County == 'Wood'")))
plt.title("Wood County")
plt.show()

### Map

In [None]:
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

In [None]:
ohio_counties = [c for c in counties["features"] if c["properties"]["STATE"] == "39"]
ohio_counties_names = [c["properties"]["NAME"] for c in ohio_counties]
print( sorted( ohio_counties_names))
# sorted([c["properties"]["NAME"] for c in counties["features"] if c["properties"]["STATE"]=="39"])

In [None]:
import plotly.express as px
fig = px.choropleth_mapbox(ohio_counties_names, geojson=ohio_counties, 
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5)
fig.show()

In [None]:
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

import pandas as pd
df = pd.read_csv("https://raw.githubusercontent.com/plotly/datasets/master/fips-unemp-16.csv",
                   dtype={"fips": str})

import plotly.express as px

fig = px.choropleth_mapbox(df, geojson=counties, locations='fips', color='unemp',
                           color_continuous_scale="Viridis",
                           range_color=(0, 12),
                           mapbox_style="carto-positron",
                           zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                           opacity=0.5,
                           labels={'unemp':'unemployment rate'}
                          )
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

### From COVID tracker https://covidtracking.com/data

In [None]:
# tests from https://covidtracking.com/data
df_tracker = pd.read_csv("https://covidtracking.com/data/download/all-states-history.csv")
df_tracker["date"] = pd.to_datetime(df_tracker["date"])
ohio = df_tracker[ df_tracker["state"] == "OH"].sort_values("date", ascending = True)

### Comparing Different Data Sets

In [None]:
df_cases = create_cases().cumsum()

In [None]:
fig, axes = plt.subplots(ncols=1, nrows = 4, figsize = (10,15))
fields = [ ["death", "Death Due to Illness Count"], 
          ["hospitalized", "Hospitalized Count"], 
          ["positive", "Case Count"],
          ["recovered", "Recovered"]
         ]
for ax, f in zip(axes, fields):
    sns.lineplot(data = ohio, x="date", y=f[0], label = "covid_tracking", ax=ax)
    sns.lineplot(data = df_cases, x="Onset Date", y=f[1], label = "OHIO dashboard", ax=ax)
    ax.grid()

Simple change of the dates for `Deaths`, `Hospitalizations`, `Cases`?

I do not understand the defference with the `Recovered` field. Too many cumsum's ?

Trying to obtain Covid Tracker distribution of hospitalization dates from OHIO dashboard

In [None]:
def extract_cases(date_field, count_field, data=df_data, timeDelta = pd.Timedelta("10d")):
    # only cases with known date
    data_ = data[ data[date_field].notnull() & (data[date_field] != "Unknown")]
    # type conversion and shift
    data_.loc[:,date_field] = pd.to_datetime( data_[date_field]);
    data_.loc[:,date_field] = data_[date_field] + timeDelta
    # extracting only required fieds
    data_ = data_.groupby(date_field)[count_field].sum().cumsum()
    data_ = data_.reset_index()
    return data_

In [None]:
deaths_deathDate = extract_cases(date_field="Date Of Death", count_field="Death Due to Illness Count")
sns.lineplot(
    data = deaths_deathDate, x="Date Of Death", y="Death Due to Illness Count", label = "Ohio Dashboard"
)

sns.lineplot(data = ohio, x="date", y="death", label = "covid tracker")

In [None]:

sns.lineplot(
    data = extract_cases(date_field="Admission Date", count_field="Hospitalized Count", timeDelta=pd.Timedelta("0d")),
    x="Admission Date", y="Hospitalized Count", label = "Ohio Dashboard"
)

sns.lineplot(data = ohio, x="date", y="hospitalized", label = "covid tracker")

## Mortality

* Yearly satistics on the mortality rates can be found on https://www.cdc.gov/nchs/pressroom/states/ohio/ohio.htm.
Need to parse the page to get it

* Info of the mortality rates can be found on CDC webpage https://www.cdc.gov/nchs/data_access/vitalstatsonline.htm#Mortality_Multiple
This is rather strange file, though, will not use it


In [None]:
baseUrl = "https://www.cdc.gov/nchs/pressroom/states/ohio/ohio.htm"
page = urllib.request.urlopen(baseUrl).read()
soup = BeautifulSoup(page, features="lxml")
panes = soup.findAll(class_ = "tab-pane")

In [None]:
# def flatten(L):
#     return [L2 for L1 in L for L2 in L1]
# def read_table_line(L):
#     return flatten([[L.find("a").string], [t.string for t in L.findAll("td")]])

In [None]:
def parse_pane(pane):
    table = pane.findAll(class_ = "sos-table table")[1]
    title = table.find("th").string
    state, year = title[:2], title[-4:]
    lines = table.findAll("tr")
    df_deaths = pd.DataFrame(columns = ["State","Year","Cause","Deaths"])
    for L in lines[1:]:
        cause = L.find("a").string
        deaths = L.findAll("td")[0].string.replace(",","")
        df_deaths = df_deaths.append({"State":state, "Year":int(year), 
                                      "Cause":cause, "Deaths":int(deaths)}, ignore_index=True)
    # drugs
    table = pane.findAll(class_ = "sos-table table")[2]
    drug_row = [L for L in table.findAll("tr") if "Drug" in L.find("th").string][0]
    drugs = drug_row.findAll("td")[0].string.replace(",","")
    df_deaths = df_deaths.append({"State":state, "Year":int(year), 
                                  "Cause":"Drugs", "Deaths":int(drugs)}, ignore_index=True)
    
    df_deaths["Year"] = df_deaths["Year"].astype(int)
    df_deaths["Deaths"] = df_deaths["Deaths"].astype(int)
    return df_deaths


In [None]:
df_deaths = pd.concat([parse_pane(P) for P in panes]).reset_index().drop( columns="index")

In [None]:
df_deaths

In [None]:
# Export to Latex for the report
df_deaths_wide = df_deaths.replace("Chronic Lower Respiratory Diseases", "Chronic Lower Respiratory Disease").\
    pivot(index="Cause", columns="Year", values="Deaths")
df_deaths_wide["Mean"] = df_deaths_wide.mean(axis=1).astype(int)
df_deaths_wide = df_deaths_wide.sort_values("Mean", ascending = False)
print(df_deaths_wide.to_latex())

In [None]:
summary = df_deaths.groupby("Cause")["Deaths"].describe()[["mean","std"]].sort_values("mean", ascending = False).reset_index()
summary

In [None]:
covid_deaths = create_cases()["Death Due to Illness Count"].sum()
print("COVID deaths:", covid_deaths)

In [None]:
fig, ax = plt.subplots(1,1,figsize = (15,7))
ax = sns.barplot(data = summary, y="Cause", x="mean")
plt.xticks(rotation = 90)
ax.axvline(x=covid_deaths, linewidth = 2, color = 'red', linestyle = '--')
# plt.xticks(rotation=45)
plt.show()

Here is the other data set

In [None]:
# https://www.cdc.gov/nchs/nvss/vsrr/provisional-tables.htm
df = pd.read_csv("https://data.cdc.gov/api/views/hmz2-vwda/rows.csv?accessType=DOWNLOAD")

In [None]:
df.columns

In [None]:
df_cases = create_cases()

In [None]:
df_cases

In [None]:
dfd = df[ (df["State"] == "OHIO") & (df.Indicator == "Number of Deaths") & (df["Year"] == 2019)]
sns.scatterplot(data = dfd, x="Month", y="Data Value")
plt.xticks(rotation=90)
plt.ylim(0)
plt.show()

In [None]:
df.Indicator.unique()

In [None]:
df.columns

For some reason number of deaths in this data set is higher than previously

In [None]:
death_2017 = df_deaths[ df_deaths["Year"] == 2017]["Deaths"].sum()
death_2019 = df.query("State == 'OHIO' & Year == 2019& Indicator == 'Number of Deaths'")["Data Value"].sum()
print("2017: ", death_2017)
print("2019: ", death_2019)
print(" fact = %.2f" % (death_2019/death_2017))

The other dataset for deaths  

In [None]:
# https://data.cdc.gov/NCHS/Weekly-Counts-of-Deaths-by-State-and-Select-Causes/muzy-jte6/data
us_mortality_data = pd.read_csv("https://data.cdc.gov/api/views/muzy-jte6/rows.csv?accessType=DOWNLOAD")
us_mortality_data["Week Ending Date"] = pd.to_datetime( us_mortality_data["Week Ending Date"])

In [None]:
us_mortality_data["Jurisdiction of Occurrence"].unique()

In [None]:
state = "Georgia"
fig, ax = plt.subplots(figsize = (20, 10))
data = us_mortality_data[ us_mortality_data["Jurisdiction of Occurrence"] == state]
# data = data[-20:-2]
# data = data[ data["Week Ending Date"] < np.datetime64("Oct 1,  2020")]
for c in [
    'Septicemia (A40-A41)', 'Malignant neoplasms (C00-C97)',
       'Diabetes mellitus (E10-E14)', 'Alzheimer disease (G30)',
       'Influenza and pneumonia (J09-J18)',
       'Chronic lower respiratory diseases (J40-J47)',
       'Other diseases of respiratory system (J00-J06,J30-J39,J67,J70-J98)',
       'Nephritis, nephrotic syndrome and nephrosis (N00-N07,N17-N19,N25-N27)',
       'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified (R00-R99)',
       'Diseases of heart (I00-I09,I11,I13,I20-I51)',
       'Cerebrovascular diseases (I60-I69)',
       'COVID-19 (U071, Multiple Cause of Death)',
       'COVID-19 (U071, Underlying Cause of Death)']:
    sns.lineplot(
        x=data["Week Ending Date"], 
        y=data[c].rolling(window=2).mean(), label = c, palette="blue")
plt.title(state)
plt.xticks(rotation = 90);
plt.ylim(0)
plt.grid()
plt.show()

In [None]:
from scipy.signal import find_peaks

In [None]:
peaks

In [None]:
death = us_mortality_data[ us_mortality_data["Jurisdiction of Occurrence"] == "Mississippi"][[\
                        "Week Ending Date", "COVID-19 (U071, Multiple Cause of Death)"]]
death = death.rename(columns={"COVID-19 (U071, Multiple Cause of Death)":"d"})
death.d = death["d"].rolling(window = 2).mean()
sns.lineplot(x=death["Week Ending Date"], y=death["d"])
peaks = find_peaks(death.d, distance=5)[0]
sns.scatterplot(
    x=[death["Week Ending Date"].values[p] for p in peaks],
    y=[death["d"].values[p] for p in peaks])

In Ohio **Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified** cause of death is rising!

## Population

In [None]:
# from https://www.census.gov/data/datasets/time-series/demo/popest/2010s-state-total.html#par_textimage_1873399417
df_population = \
    pd.read_excel("https://www2.census.gov/programs-surveys/popest/tables/2010-2019/state/totals/nst-est2019-01.xlsx")

df_Population = pd.DataFrame(
    index = df_population.iloc[3:59,0].str.replace(".","").str.upper(), 
    columns =list(df_population.iloc[2,3:])
)
df_Population = pd.DataFrame( df_population.iloc[3:59, 3:] )
df_Population = df_Population.set_index(df_population.iloc[3:59,0].str.replace(".","").str.upper())
df_Population = df_Population.rename(columns=df_population.iloc[2,3:].astype(int))
df_Population.index.names = ["Territory"]

In [None]:
print("Population of Ohio did not change much during last years")
pop_Ohio = df_Population.loc["OHIO",:]
print("Ohio population: %d +- %d ( %.2f %%)" % 
      ( pop_Ohio.mean(), pop_Ohio.std(), 100*pop_Ohio.std()/pop_Ohio.mean())
     )


In [None]:
pop_Ohio

## Tests

It could be very instructive to check the number of cases normalized by a number of issued tests

In [None]:
sns.lineplot(data = ohio, x="date", y="totalTestResults", label = "totalTestResults")
sns.lineplot(data = ohio, x="date", y="positive", label = "positive")
sns.lineplot(data = ohio, x="date", y="negative", label = "negative")
# sns.lineplot(data = ohio, x="date", y="recovered", label = "recovered")
# sns.lineplot(data = ohio, x="date", y="recovered", label = "recovered
plt.xticks(rotation = 90)
plt.show()

In [None]:
ohio.columns

## Ratios, etc

In [None]:
df_cases = create_cases()

As we can see a second wave is evident

In [None]:
sns.lineplot( x=df_cases.index, y=df_cases["Death Due to Illness Count"])

In [None]:
sns.lineplot(x = df_cases.index, y=df_cases["Hospitalized Count"]/df_cases["Case Count"])

In [None]:
df_cases = create_cases().cumsum()

ohio_dr = pd.DataFrame()
ohio_dr["date"] = ohio["date"]
ohio_dr["ratio"] = ohio["death"]/ohio["positive"]
sns.lineplot(data = ohio_dr, x="date", y="ratio")
plt.ylabel("deaths/#positive")
plt.show()

In [None]:
sns.lineplot(x=ohio_dr["date"], y=ohio["death"])

In [None]:
nDeaths = df_data[df_data["Date Of Death"].notnull()]["Case Count"].sum()
print("%d people died probably of COVID" % nDeaths )
nCOVIDDeaths = df_data["Death Due to Illness Count"].sum()
print("In %d cases [%d %%] COVID was comfirmed " % (nCOVIDDeaths, 100*nCOVIDDeaths/nDeaths))
nDiedInHospital = np.count_nonzero( df_data["Date Of Death"].notnull() &  df_data["Admission Date"].notnull() )
print("%d people died in hispitals" % nDiedInHospital)

In [None]:
np.count_nonzero( df_data["Date Of Death"].notnull() &  df_data["Admission Date"].notnull() )

In [None]:
df_DiedInHospitals = df_data[ df_data["Date Of Death"].notnull() &  df_data["Admission Date"].notnull()]
df_DiedInHospitals = df_DiedInHospitals[ df_DiedInHospitals["Admission Date"] != "Unknown"]
df_DiedInHospitals["Admission Date"] = pd.to_datetime( df_DiedInHospitals["Admission Date"])
df_DiedInHospitals = df_DiedInHospitals[ df_DiedInHospitals["Date Of Death"] != "Unknown"]
df_DiedInHospitals["Date Of Death"] = pd.to_datetime( df_DiedInHospitals["Date Of Death"])
df_DiedInHospitals["time_in_Hospital"] = (df_DiedInHospitals["Date Of Death"] - df_DiedInHospitals["Admission Date"])
df_DiedInHospitals["time_in_Hospital"] = df_DiedInHospitals["time_in_Hospital"].dt.days

sns.histplot(df_DiedInHospitals["time_in_Hospital"])

df_DiedInHospitals["time_in_Hospital"].describe()

# Get Ohio GeoJSON

In [None]:
ohioCountyGeoJSONURL = 'https://raw.githubusercontent.com/deldersveld/topojson/master/countries/us-states/OH-39-ohio-counties.json'

Citation: 

Eldersveld, D. (2020) TopoJSON Collection (Version 1.0) [Source Code]. https://github.com/deldersveld/topojson.

## Import geopandas for plotting

[Geopandas](https://geopandas.org/ "Geopandas")

In [None]:
import geopandas as gp

## Plot empty map to confirm data source fitness

In [None]:
geoDataFrame = gp.read_file(ohioCountyGeoJSONURL)
geoDataFrame.plot()

In [None]:
countyDataFrame = df_data.groupby(["County"])['Death Due to Illness Count'].sum()

## Understanding the GeoJSON data

In [None]:
geoDataFrame.head(5)

## Create a county organized Covid-19 DataFrame

In [None]:
df_data.columns

In [None]:
countyDataFrame = df_data.groupby(["County"]).sum()

## Merge GeoJSON DataFrame with Covid-19 DataFrame

In [None]:
geoDataFrameMerged = geoDataFrame.merge(countyDataFrame, right_on="County", left_on="NAME")

In [None]:
geoDataFrameMerged.head()

## Modify the DataFrame for centering labels for ease of comprehension.

In [None]:
geoDataFrameMerged["center"] = geoDataFrameMerged["geometry"].centroid
ohioCountyNames = geoDataFrameMerged.copy()
ohioCountyNames.set_geometry("center", inplace=True)

## Plot Case County Data

In [None]:
ax = geoDataFrameMerged.plot(column="Case Count", legend=True, figsize=(15,10), cmap='YlOrRd')

for x, y, label in zip(ohioCountyNames.geometry.x, ohioCountyNames.geometry.y, ohioCountyNames["NAME"]):
    plt.text(x, y, label, fontsize = 10)

## Plot Deaths Due to Illness Count

In [None]:
ax = geoDataFrameMerged.plot(column="Death Due to Illness Count", legend=True, figsize=(15,10), cmap='YlOrRd')

for x, y, label in zip(ohioCountyNames.geometry.x, ohioCountyNames.geometry.y, ohioCountyNames["NAME"]):
    plt.text(x, y, label, fontsize = 10)

## Plot Hospitalized Count

In [None]:
ax = geoDataFrameMerged.plot(column="Hospitalized Count", legend=True, figsize=(12,8))
for x, y, label in zip(ohioCountyNames.geometry.x, ohioCountyNames.geometry.y, ohioCountyNames["NAME"]):
    plt.text(x, y, label, fontsize = 10)

## Movie

In [None]:
import bar_chart_race

In [None]:
death_ohio = us_mortality_data[ us_mortality_data["Jurisdiction of Occurrence"] == "Ohio"][["Week Ending Date", 
        'Septicemia (A40-A41)', 'Malignant neoplasms (C00-C97)',
       'Diabetes mellitus (E10-E14)', 'Alzheimer disease (G30)',
       'Influenza and pneumonia (J09-J18)',
       'Chronic lower respiratory diseases (J40-J47)',
       'Other diseases of respiratory system (J00-J06,J30-J39,J67,J70-J98)',
       'Nephritis, nephrotic syndrome and nephrosis (N00-N07,N17-N19,N25-N27)',
       'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified (R00-R99)',
       'Diseases of heart (I00-I09,I11,I13,I20-I51)',
       'Cerebrovascular diseases (I60-I69)',
       'COVID-19 (U071, Multiple Cause of Death)',
       'COVID-19 (U071, Underlying Cause of Death)']].set_index("Week Ending Date")
death_ohio = death_ohio.iloc[60:98]
len(death_ohio)

In [None]:
bar_chart_race.bar_chart_race(death_ohio, filename = "Ohio_death_causes.mp4",
                              sort='desc',
    n_bars=10,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=10,
    interpolate_period=False,
    label_bars=True,
    bar_size=.95,
    period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
    period_fmt='%B %d, %Y',
    period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                      's': f'Total deaths: {v.nlargest(6).sum():,.0f}',
                                      'ha': 'right', 'size': 8, 'family': 'Courier New'},
    perpendicular_bar_func='median',
    period_length=4*500,
    figsize=(5, 3),
    dpi=144,
    cmap='dark12',
    title='COVID-19 Deaths by Country',
    title_size='',
    bar_label_size=7,
    tick_label_size=7,
    shared_fontdict={'family' : 'Helvetica', 'color' : '.1'},
    scale='linear',
    writer=None,
    fig=None,
    bar_kwargs={'alpha': .7},
    filter_column_colors=False)  

In [None]:
death_texas = us_mortality_data[ us_mortality_data["Jurisdiction of Occurrence"] == "Texas"][["Week Ending Date", 
        'Septicemia (A40-A41)', 'Malignant neoplasms (C00-C97)',
       'Diabetes mellitus (E10-E14)', 'Alzheimer disease (G30)',
       'Influenza and pneumonia (J09-J18)',
       'Chronic lower respiratory diseases (J40-J47)',
       'Other diseases of respiratory system (J00-J06,J30-J39,J67,J70-J98)',
       'Nephritis, nephrotic syndrome and nephrosis (N00-N07,N17-N19,N25-N27)',
       'Symptoms, signs and abnormal clinical and laboratory findings, not elsewhere classified (R00-R99)',
       'Diseases of heart (I00-I09,I11,I13,I20-I51)',
       'Cerebrovascular diseases (I60-I69)',
       'COVID-19 (U071, Multiple Cause of Death)',
       'COVID-19 (U071, Underlying Cause of Death)']].set_index("Week Ending Date")
death_texas = death_texas.iloc[60:]
len(death_texas)

In [None]:
bar_chart_race.bar_chart_race(death_texas, filename = "Texas_death_causes.mp4",
                              sort='desc',
    n_bars=10,
    fixed_order=False,
    fixed_max=True,
    steps_per_period=10,
    interpolate_period=False,
    label_bars=True,
    bar_size=.95,
    period_label={'x': .99, 'y': .25, 'ha': 'right', 'va': 'center'},
    period_fmt='%B %d, %Y',
    period_summary_func=lambda v, r: {'x': .99, 'y': .18,
                                      's': f'Total deaths: {v.nlargest(6).sum():,.0f}',
                                      'ha': 'right', 'size': 8, 'family': 'Courier New'},
    perpendicular_bar_func='median',
    period_length=4*500,
    figsize=(5, 3),
    dpi=144,
    cmap='dark12',
    title='COVID-19 Deaths by Country',
    title_size='',
    bar_label_size=7,
    tick_label_size=7,
    shared_fontdict={'family' : 'Helvetica', 'color' : '.1'},
    scale='linear',
    writer=None,
    fig=None,
    bar_kwargs={'alpha': .7},
    filter_column_colors=False)  