In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from sklearn.metrics import root_mean_squared_error as rmse
import time
import tkinter as tk
from tkinter import messagebox
import math
import matplotlib as mpl
from scipy.spatial import ConvexHull

# our functions
import predict_Beta_I
import choice_start_day
import plot_hyb

import warnings
warnings.filterwarnings(action='ignore')

# to account for updates when files change
%load_ext autoreload
%autoreload 2

In [2]:
df_seeds = pd.read_csv('initial_data/seeds_set_parameters.csv')
df_seeds[df_seeds.set!='test']['seed_number'].values

array([   0,    1,    2, ..., 1497, 1498, 1499])

In [40]:
seed_dirs='initial_data/initial_data_ba_10000/'
seed_number=0
seed_df =  pd.read_csv(seed_dirs+f'seir_seed_{seed_number}.csv')
seed_df = seed_df.iloc[:,:5].copy()
seed_df.columns = ['S','E','I','R','Beta']
end_df  = seed_df[(seed_df.E==0)&(seed_df.I==0)]
if end_df.shape[0]:
    seed_df = seed_df.iloc[:end_df.index[0]].copy()


In [35]:
start_day = choice_start_day.choose_method(seed_df, 
                                           'roll_var_npeople')
start_day

np.int64(10)

In [27]:
def apply_methods(seed_dirs='initial_data/initial_data_ba_10000/',
                 idx_s=0, idx_e=11, n_seeds=1201, show_fig_flag=False):
    
    seed_numbers = df_seeds[df_seeds.set!='test']['seed_number'].values[:n_seeds]

    types_start_day = ['roll_var_npeople']#, 'roll_var', 'roll_var_seq']

    methods = ['last value','rolling mean last value',
               'expanding mean last value','biexponential decay', 

            'median beta', 'regression (day)',

            'median beta;\nshifted forecast',
            'regression (day);\nshifted forecast',
            'regression (day);\nincremental learning',
            'regression (day, SEIR, previous I)','lstm (day, E, previous I)'
              ]

    new_labels = ['last_value', 'rolling_mean_last_value',
            'expanding_mean_last_value','biexponential_decay', 

            'median_beta', 'regression_day',

            'median_beta_shifted_forecast',
            'regression_day_shifted_forecast',
            'regression_day_incremental_learning',
            'regression_day_SEIR_previous_I','lstm_day_E_previous_I'
                 ]

    for type_start_day in types_start_day:

        for beta_pred,new_label in zip(methods[idx_s:idx_e], 
                                       new_labels[idx_s:idx_e]):

            if 'median' in beta_pred:
                model_path = f'{seed_dirs}/median_beta.csv'
            elif 'regression (day)' in beta_pred: 
                model_path = f'{seed_dirs}/regression_day_for_seir.joblib'
            elif '(day, SEIR, previous I)' in beta_pred:
                model_path = f'{seed_dirs}/regression_day_SEIR_for_seir.joblib'
            elif 'lstm' in beta_pred:
                model_path = f'{seed_dirs}/lstm_day_E_prev_I_for_seir.keras'    
            else:
                model_path=''
            print(model_path)
            all_rmse_I, all_rmse_Beta, all_peak, \
                execution_time, start_days = plot_hyb.main_f(I_prediction_method='seir', 
                                    stochastic=False, count_stoch_line=0, 
                                    beta_prediction_method=beta_pred, 
                                    type_start_day=type_start_day, 
                                    seed_numbers=seed_numbers, 
                                    show_fig_flag=show_fig_flag,
                                    seed_dirs=seed_dirs, 
                                    sigma=0.1, gamma=0.08, 
                                    ax=None, model_path=model_path)
            # creating a dataframe for peaks
            all_peak = pd.DataFrame(all_peak, columns=['actual_peak_I', 'predicted_peak_I', 
                                                       'actual_peak_day', 'predicted_peak_day'])
            # creating a dataframe for peaks RMSE, predicted time, start day
            rmse_df = pd.DataFrame({
                'rmse_I': all_rmse_I,
                'rmse_Beta': all_rmse_Beta,
                'time_predict': execution_time,
                f'{type_start_day}': start_days})

            # merging dataframes
            results = pd.concat([rmse_df, all_peak], axis=1)
            
            ntype, npop = seed_dirs[26:-1].split('_')
            if int(npop) == 10000:
                npop = ''
            elif int(npop) == 100000:
                npop = '_100k'
            elif int(npop) == 50000:
                npop = '_50k'

            results.to_csv(f'results/{ntype+npop}/{type_start_day}/{new_label}_results.csv', 
                           index=False)

In [33]:
apply_methods(seed_dirs='initial_data/initial_data_r_100000/',
                 idx_s=4, idx_e=5, n_seeds=1201, show_fig_flag=False)

initial_data/initial_data_r_100000//median_beta.csv
median beta initial_data/initial_data_r_100000//median_beta.csv
