In [None]:
###
### LIBRARIES
###

import load_data

import os
import pandas as pd
import numpy as np
import datetime as dt

# matplotlib
%matplotlib inline
from matplotlib import pyplot as plt
import matplotlib.dates as mdates
from matplotlib import colors 

#seaborn
import seaborn as sns

# same as ggplot2 in R
from plotnine import *

# geographical plots
import geopandas as gpd
from shapely.geometry import Point, Polygon
import pysal.viz.mapclassify as mc

from scipy.optimize import curve_fit, fsolve


In [None]:
###
### FUNCTIONS
###

def logistic_model(x,a,b,c):
    return c/(1+np.exp(-(x-b)/a))

def fit_log_model(data, region):
    
    aux = data_regions[data_regions['denominazione_regione']==region]
    x = aux['timestamp']
    y = aux['totale_casi']
    
    fit = curve_fit(logistic_model,x,y,p0=[2,100,20000])
    
    return fit

def plot_forecasting(df_regions, df_forecasting, region, FMT, start_date):
    
    df_regions_subset = df_regions[df_regions['denominazione_regione']==region][['timestamp','totale_casi']]
    
    df_forecasting_subset = df_forecasting[df_forecasting['Region']==region]
    
    a = df_forecasting_subset['a'].values
    b = df_forecasting_subset['b'].values
    c = df_forecasting_subset['c'].values
    end_date = df_forecasting_subset['End_Date'].values
    
    # max time 
    max_time = np.max(df_regions_subset['timestamp'])
    end_time = (dt.datetime.strptime(end_date[0],FMT) - dt.datetime.strptime(start_date, FMT)).days
    
    # plot
    x_forecast = range(max_time + 1, end_time)
    forecast = logistic_model(x_forecast,a,b,c)

    fig, ax = plt.subplots(figsize=(10, 8))
    plt.scatter(df_regions_subset['timestamp'],df_regions_subset['totale_casi'])
    plt.scatter(range(max_time + 1, end_time), logistic_model(range(max_time + 1, end_time),a,b,c))
    plt.title('Forecasting: {}, End Date Estimated: {}'.format(region,end_date))
    plt.show()
    

In [None]:
###
### DIRECTORIES
###

PROJECT_DIR = os.getcwd()
DATA_DIR = os.path.join(os.path.dirname(PROJECT_DIR),'Data')
# SHAPEFILES_DIR = os.path.join(os.path.dirname(PROJECT_DIR),'Shapefiles')
RESULTS_DIR = os.path.join(os.path.dirname(PROJECT_DIR),'Results')

In [None]:
###
### LOADING DATA
###

### 1) Covid_Data: if anything is specified, loads data using url github to repository.
###                if a DATA_DIR is specified, loads data downloaded as csv
data_provinces, data_regions, data_national = load_data.load_covid_data()



In [None]:
###
### MODELS
###

today = dt.datetime.now().strftime('%Y-%m-%d')

### 1) fit logistic model for each region

start_date = "2020-01-01T00:00:00"
FMT = '%Y-%m-%dT%H:%M:%S'
data_regions['timestamp'] = data_regions['data'].\
            map(lambda x: (dt.datetime.strptime(x, FMT) - dt.datetime.strptime("2020-01-01T00:00:00", FMT)).days)

regions = np.unique(data_regions['denominazione_regione'])

# define empty list
results = []
model = 'Logistic'

for region in regions:
    
    fit = fit_log_model(data_regions, region)
    
    # erros of estimation
    errors = [np.sqrt(fit[1][i][i]) for i in [0,1,2]]
    
    a = fit[0][0]
    b = fit[0][1]
    c = fit[0][2] # is the asymptot -> total max of cases!
    
    # estimate the date of end
    sol = int(fsolve(lambda x : logistic_model(x,a,b,c) - int(c),b))
    final_date = dt.datetime.strptime(start_date, FMT) + dt.timedelta(days=sol)
    final_date = final_date.strftime('%Y-%m-%dT%H:%M:%S')
    aux = [region, model, a, b, c, errors[2],final_date]
    
    results.append(aux)

# create dataframe
df_logistic_model = pd.DataFrame(results, columns=['Region', 'Model', 'a', 'b', 'c','error to c (+/-)','End_Date'])
df_logistic_model.to_excel(os.path.join(RESULTS_DIR,'Logistic_Forecasting'+'_'+today + '.xlsx'))


In [None]:
### Estimated cases
'Total estimated cases in Italy: {} with possible error +/- {}'.format(round(np.sum(df_logistic_model['c'])),\
                                                                       round(np.sum(df_logistic_model['error to c (+/-)'])))

In [None]:
###
### PLOT FORECASTING
###

plot_forecasting(data_regions, df_logistic_model, 'Liguria', FMT, start_date)
