In [1]:
%pylab inline
import pandas as pd
from scipy.signal import savgol_filter
import requests
import numpy as np

Populating the interactive namespace from numpy and matplotlib


In [None]:
def make_patch_spines_invisible(ax):
  ax.set_frame_on(True)
  ax.patch.set_visible(False)
  for sp in ax.spines.values():
    sp.set_visible(False)


def plot_field(groups, sel_regions, field, minval=1, ms = 4):
  for g in groups.groups:

    x_data = ['-'.join(i.split('T')[0].split('-')[1:]) for i in np.datetime_as_string(groups.get_group(g)['data'].values)]  
    data = groups.get_group(g)[field]

    m = max(data)
    if g in sel_regions:
      plot(x_data, data, 'o-', ms=ms, label='%s, max: %d' %(g,m))

  title(field)
  grid()
  xlabel('Date')
  ylabel('#')
  _ = xticks(rotation=90, ha="right")
  legend()


def plot_grad(x_data, raw_data, title_=''):
  grad = np.gradient(raw_data)
  grad_hat = savgol_filter(grad, sliding_win_size, polynomial_fit_degree) 
  plot(x_data, grad_hat, 'o-', ms=ms, label=('Derivative smoothed'))
  plot(x_data, grad, 'o-', ms=ms, label=('Derivative'))
  legend()
  title(title_)
  grid()
  xlabel('Date')
  ylabel('cases per day')
  _ = xticks(rotation=45)
  
  return grad


def get_df_from_json_url(url):
  response = requests.get(url)
  df = pd.read_json(response.text, orient='records')
  return df

In [None]:
# data from https://github.com/CSSEGISandData/2019-nCoV

#raw_it_province_df = pd.read_json('https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-json/dpc-covid19-ita-province.json', orient='records')
#raw_it_region_df = pd.read_json('https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-json/dpc-covid19-ita-regioni.json') 
#raw_it_df = pd.read_json('https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-json/dpc-covid19-ita-andamento-nazionale.json')

# Previous code replaced by the following three lines to bypass ValueError: Expected object or value given by new encoding of json files (they init with \ufeff byte order mark)
raw_it_province_df = get_df_from_json_url('https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-json/dpc-covid19-ita-province.json')
raw_it_region_df = get_df_from_json_url('https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-json/dpc-covid19-ita-regioni.json') 
raw_it_df = get_df_from_json_url('https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-json/dpc-covid19-ita-andamento-nazionale.json')

raw_it_region_df['data'] = pd.to_datetime(raw_it_region_df['data'], format='%Y-%m-%d %H:%M:%S')
raw_it_province_df['data'] = pd.to_datetime(raw_it_province_df['data'], format='%Y-%m-%d %H:%M:%S')
raw_it_df['data'] = pd.to_datetime(raw_it_df['data'], format='%Y-%m-%d %H:%M:%S')

groups_province = raw_it_province_df.groupby('denominazione_provincia') 

# Parameters for savgol filter (Smooting curves)
sliding_win_size = 15
polynomial_fit_degree = 3