<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"></ul></div>

In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import numpy as np

In [3]:
MIN_ACTIVE = 50
MIN_DAYS = 10

In [4]:
input_confirmed = pd.read_csv('time_series_19-covid-Confirmed.csv')
input_recovered = pd.read_csv('time_series_19-covid-Recovered.csv')
input_deaths = pd.read_csv('time_series_19-covid-Deaths.csv')

In [5]:
def preprocess_columns(df): 
    result = df.drop(['Lat', 'Long'], axis=1)
    return result.set_index(['Country/Region', 'Province/State'])

In [6]:
confirmed = preprocess_columns(input_confirmed)
recovered = preprocess_columns(input_recovered)
deaths = preprocess_columns(input_deaths)

In [7]:
active_cases = confirmed - recovered - deaths

In [8]:
active_cases_countries = active_cases.groupby('Country/Region').sum()

In [9]:
first_idx_of_max_bool_value = (active_cases_countries >= MIN_ACTIVE).idxmax(axis=1)
is_over_100 = (active_cases_countries >= MIN_ACTIVE).any(axis=1)
idx_over_100 = first_idx_of_max_bool_value[is_over_100]

In [10]:
rows = []
days_no = active_cases_countries.shape[1]
for location, idx in idx_over_100.items():
    # cast to float to be able to contain nan values
    values = active_cases_countries.loc[location][idx:].values.astype('float')
    if len(values) >= MIN_DAYS or location == 'Czechia':
        padded = np.pad(values, pad_width=(0, days_no - len(values),), mode='constant', constant_values=np.nan).tolist()
        rows.append([location] + padded)

In [11]:
over_100 = pd.DataFrame(rows).set_index(0)

In [12]:
over_100.index.name = 'country'

In [13]:
over_100

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,43,44,45,46,47,48,49,50,51,52
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Bahrain,52.0,55.0,56.0,81.0,81.0,81.0,88.0,160.0,160.0,145.0,...,,,,,,,,,,
China,503.0,595.0,858.0,1325.0,1970.0,2737.0,5277.0,5834.0,7835.0,9375.0,...,27402.0,25230.0,23702.0,22159.0,20335.0,18933.0,17567.0,16116.0,14859.0,13569.0
Cruise Ship,61.0,61.0,64.0,135.0,135.0,175.0,175.0,218.0,285.0,355.0,...,,,,,,,,,,
Czechia,91.0,94.0,141.0,,,,,,,,...,,,,,,,,,,
France,86.0,116.0,176.0,188.0,272.0,362.0,635.0,929.0,1098.0,1181.0,...,,,,,,,,,,
Germany,63.0,114.0,143.0,180.0,246.0,466.0,653.0,781.0,1022.0,1156.0,...,,,,,,,,,,
Iran,79.0,71.0,170.0,281.0,427.0,749.0,1144.0,1968.0,2278.0,2667.0,...,,,,,,,,,,
Italy,59.0,150.0,221.0,311.0,438.0,593.0,821.0,1053.0,1577.0,1835.0,...,,,,,,,,,,
Japan,53.0,60.0,65.0,75.0,82.0,99.0,124.0,136.0,147.0,165.0,...,,,,,,,,,,
"Korea, South",87.0,186.0,415.0,578.0,807.0,945.0,1227.0,1731.0,2302.0,3107.0,...,,,,,,,,,,


In [14]:
import chart_studio.plotly as py

In [15]:
import plotly.graph_objects as go

In [16]:
czechia_index = over_100.index.get_indexer(['Czechia'])[0]

In [17]:
over_100[(czechia_index+1):]

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,43,44,45,46,47,48,49,50,51,52
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
France,86.0,116.0,176.0,188.0,272.0,362.0,635.0,929.0,1098.0,1181.0,...,,,,,,,,,,
Germany,63.0,114.0,143.0,180.0,246.0,466.0,653.0,781.0,1022.0,1156.0,...,,,,,,,,,,
Iran,79.0,71.0,170.0,281.0,427.0,749.0,1144.0,1968.0,2278.0,2667.0,...,,,,,,,,,,
Italy,59.0,150.0,221.0,311.0,438.0,593.0,821.0,1053.0,1577.0,1835.0,...,,,,,,,,,,
Japan,53.0,60.0,65.0,75.0,82.0,99.0,124.0,136.0,147.0,165.0,...,,,,,,,,,,
"Korea, South",87.0,186.0,415.0,578.0,807.0,945.0,1227.0,1731.0,2302.0,3107.0,...,,,,,,,,,,
Kuwait,56.0,56.0,56.0,58.0,58.0,61.0,63.0,63.0,68.0,70.0,...,,,,,,,,,,
Norway,56.0,87.0,108.0,147.0,176.0,204.0,399.0,597.0,701.0,995.0,...,,,,,,,,,,
Singapore,50.0,54.0,57.0,53.0,52.0,50.0,50.0,48.0,48.0,38.0,...,,,,,,,,,,
Spain,82.0,118.0,162.0,218.0,254.0,393.0,460.0,626.0,1013.0,1628.0,...,,,,,,,,,,


In [18]:
over_100

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,43,44,45,46,47,48,49,50,51,52
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Bahrain,52.0,55.0,56.0,81.0,81.0,81.0,88.0,160.0,160.0,145.0,...,,,,,,,,,,
China,503.0,595.0,858.0,1325.0,1970.0,2737.0,5277.0,5834.0,7835.0,9375.0,...,27402.0,25230.0,23702.0,22159.0,20335.0,18933.0,17567.0,16116.0,14859.0,13569.0
Cruise Ship,61.0,61.0,64.0,135.0,135.0,175.0,175.0,218.0,285.0,355.0,...,,,,,,,,,,
Czechia,91.0,94.0,141.0,,,,,,,,...,,,,,,,,,,
France,86.0,116.0,176.0,188.0,272.0,362.0,635.0,929.0,1098.0,1181.0,...,,,,,,,,,,
Germany,63.0,114.0,143.0,180.0,246.0,466.0,653.0,781.0,1022.0,1156.0,...,,,,,,,,,,
Iran,79.0,71.0,170.0,281.0,427.0,749.0,1144.0,1968.0,2278.0,2667.0,...,,,,,,,,,,
Italy,59.0,150.0,221.0,311.0,438.0,593.0,821.0,1053.0,1577.0,1835.0,...,,,,,,,,,,
Japan,53.0,60.0,65.0,75.0,82.0,99.0,124.0,136.0,147.0,165.0,...,,,,,,,,,,
"Korea, South",87.0,186.0,415.0,578.0,807.0,945.0,1227.0,1731.0,2302.0,3107.0,...,,,,,,,,,,


In [19]:
shifted = pd.concat([
    over_100.iloc[:czechia_index],
    over_100.iloc[(czechia_index+1):],
    over_100.iloc[[czechia_index]],
])

In [20]:
color_step = 300 / len(shifted)
colors = {}
sizes = {}
for i, name in enumerate(shifted.index):
    if name == 'Czechia':
        colors[name] = 'hsv(0,100,100)'
        sizes[name] = 4
    else:
        colors[name] = 'hsv({},40,90)'.format(30 + color_step*i)
        sizes[name] = 2

In [39]:
reference_line = []
REFERENCE_INCREASE = 1.33
for i in range(len(shifted.columns)):
    reference_line.append(MIN_ACTIVE * REFERENCE_INCREASE**i)

In [49]:
fig = go.Figure()
for name, series in shifted.iterrows():
    fig.add_trace(go.Scatter(x=series.index, y=series.values, mode='lines',
        name=name,
        line=dict(color=colors[name], width=sizes[name]),
        connectgaps=True,
    ))
fig.add_trace(go.Scatter(x=series.index, y=reference_line, mode='lines',
        name='33% increase',
        line=dict(color='black', width=4, dash='dash'),
        showlegend=False
))

In [74]:
small_text = """
Zdroj: https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_time_series
"""
small_text2 = """
Vybrány země, kde je sledován výskyt COVID-19 alespoň {} dnů a měly alespoň 50 aktivních výskytů.
""".format(MIN_DAYS)


annotations = [dict(
    xref='paper',
    yref='paper',
    x=0.5,
    y=-0.13,
    xanchor='center',
    yanchor='top',
    text=small_text,
    font=dict(
        family='Arial',
        size=12,
        color='rgb(150,150,150)'
    ),
    showarrow=False
),
dict(
    xref='paper',
    yref='paper',
    x=0.5,
    y=-0.18,
    xanchor='center',
    yanchor='top',
    text=small_text2,
    font=dict(
        family='Arial',
        size=12,
        color='rgb(150,150,150)'
    ),
    showarrow=False
),
dict(
    x=25,
    y=4.7,
    xref="x",
    yref="y",
    text="33% nárůst",
    showarrow=True,
    arrowhead=1,
    ax=0,
    ay=-40
)
]

fig.update_layout(
    yaxis_type="log",
    title="Aktivní počet případů, podle počtu dnů od {}-tého případu".format(MIN_ACTIVE),
    xaxis_title="Počet dnů od {}-tého případu".format(MIN_ACTIVE),
    annotations=annotations
)

In [75]:
from chart_studio.tools import set_credentials_file, set_config_file

In [76]:
set_credentials_file(username=username, api_key=api_key)

In [77]:
set_config_file(world_readable=True, sharing='public')

In [78]:
py.plot(filename='covid-active-with-czechia', figure_or_data=fig)

'https://plot.ly/~victor.brada/97/'