In [None]:
import os
import pandas as pd
import numpy as np

from IPython import get_ipython


from sklearn.metrics import mean_absolute_error as mae

import matplotlib.pyplot as plt
get_ipython().run_line_magic('matplotlib', 'inline')

import seaborn as sns
sns.set()

import plotly
import plotly.express as px
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.io as pio
init_notebook_mode(connected=True)
plotly.offline.init_notebook_mode(connected=True)

In [None]:
path = './data/time_series_20200323.csv'
df = pd.read_csv(path)
df['date'] = pd.to_datetime(df['date'])
df.head(10)

In [None]:
df_poland = df[df['country']=='Poland']

In [None]:
plot([{'x':df_poland.date, 'y':df_poland.confirmed}], filename='plots/pl_confirmed')


In [None]:
fig = go.Figure(data=go.Scatter(x=df_poland.date, y=df_poland.confirmed,
                    mode='lines+markers',
                    name='lines+markers'))

plot(fig,filename='plots/pl_confirmed')

In [None]:
def linear_func(values, k=None, b=0):
    mean_values = np.mean(values)
    if k is None:
        return [mean_values] * len(values)
    
    return [idx*k+b for idx, _ in enumerate(values)]

In [None]:
print('mean', mae(df_poland.confirmed, linear_func(df_poland.confirmed)))
print('linear', mae(df_poland.confirmed, linear_func(df_poland.confirmed, k=0, b=0)))

In [None]:
best_k = 0
best_b = 0
best_mae = mae(df_poland.confirmed, linear_func(df_poland.confirmed, k=best_k, b=best_b))

for k in np.linspace(0.00001, -1, 1):
    for b in np.linspace(0.001, -1, 1):
        actual_mae = mae(df_poland.confirmed, linear_func(df_poland.confirmed, k=k, b=b))
        if actual_mae < best_mae:
            best_mae = actual_mae
            best_k = k
            best_b = b

print("Best mae: {} k: {} b: {}".format(best_mae, best_k, best_b))

In [None]:
df_countries = df.groupby(['country','date'], as_index=False)['confirmed'].sum()

In [None]:
df_countries.head()

In [None]:
for index, country_group in df_countries.groupby(['country'], as_index=False):
    print(index)
    print(country_group.size)

In [None]:
fig = go.Figure()

for index, country_group in df_countries.groupby(['country'], as_index=False):
    fig.add_trace(go.Scatter(x=country_group.date, y=country_group.confirmed,
                    mode='lines+markers',
                    name=index))

fig.update_layout(template='plotly_dark', title='Szereg czasowy')  
plot(fig,filename='plots/global_confirmed')