# Cross validation: Facebook's Prophet
---

Facebook Prophet with New Year holiday.

This notebook conducts cross validation of the method using a rolling forecast origin method.


**The notebook outputs:**
* MASE, RMSE and MAPE at 7 day intervals from 7 to 84 days and also a 365 day forecast.
* 80 and 95% prediction intervals between 7 and 84 days and also 365 days.

These are saved into the folder `results/model_selection/stage1/`

---

# Imports

In [1]:
import pandas as pd
import numpy as np

#error measures
from forecast_tools.metrics import (mean_absolute_scaled_error, 
                                    root_mean_squared_error,
                                    symmetric_mean_absolute_percentage_error)

#models
from fbprophet import Prophet

import warnings
warnings.filterwarnings('ignore')

In [2]:
#to select exceptionally busy days as covariates.
from amb_forecast.feature_engineering import (regular_busy_calender_days)

In [3]:
#custom ensemble class
from amb_forecast.ensemble import (Ensemble, UnweightedVote)

# Data Input

The constants `TOP_LEVEL`, `STAGE`, `REGION`,`TRUST` and `METHOD` are used to control data selection and the directory for outputting results.  

> Output file is `f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv'.csv`.  where metric will be smape, rmse, mase, coverage_80 and coverage_95. Note: `REGION`: is also used to select the correct data from the input dataframe.

In [4]:
TOP_LEVEL = '../../../results/model_selection'
STAGE = 'temp'
REGION = 'Trust'
METHOD = 'fbp'

FILE_NAME = 'Daily_Responses_5_Years_2019_full.csv'

#split training and test data.
TEST_SPLIT_DATE = '2019-01-01'

#second subdivide: train and val
VAL_SPLIT_DATE = '2017-07-01'

#discard data after 2020 due to coronavirus
#this is the subject of a seperate study.
DISCARD_DATE = '2020-01-01'

In [5]:
#read in path
path = f'../../../data/{FILE_NAME}'

In [6]:
def pre_process_daily_data(path, index_col, by_col, 
                           values, dayfirst=False):
    '''
    Daily data is stored in long format.  Read in 
    and pivot to wide format so that there is a single 
    colmumn for each regions time series.
    '''
    df = pd.read_csv(path, index_col=index_col, parse_dates=True, dayfirst=dayfirst)
    df.columns = map(str.lower, df.columns)
    df.index.rename(str(df.index.name).lower(), inplace=True)
    
    clean_table = pd.pivot_table(df, values=values.lower(), index=[index_col.lower()],
                                 columns=[by_col.lower()], aggfunc=np.sum)
    
    clean_table.index.freq = 'D'
    
    return clean_table

In [7]:
clean = pre_process_daily_data(path, 'Actual_dt', 'ORA', 'Actual_Value', 
                               dayfirst=False)
clean.head()

ora,BNSSG,Cornwall,Devon,Dorset,Gloucestershire,OOA,Somerset,Trust,Wiltshire
actual_dt,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2013-12-30,415.0,220.0,502.0,336.0,129.0,,183.0,2042.0,255.0
2013-12-31,420.0,236.0,468.0,302.0,128.0,,180.0,1996.0,260.0
2014-01-01,549.0,341.0,566.0,392.0,157.0,,213.0,2570.0,351.0
2014-01-02,450.0,218.0,499.0,301.0,115.0,,167.0,2013.0,258.0
2014-01-03,419.0,229.0,503.0,304.0,135.0,,195.0,2056.0,269.0


## Train Test Splot

In [8]:
def ts_train_test_split(data, split_date):
    '''
    Split time series into training and test data
    
    Parameters:
    -------
    data - pd.DataFrame - time series data.  Index expected as datatimeindex
    split_date - the date on which to split the time series
    
    Returns:
    --------
    tuple (len=2) 
    0. pandas.DataFrame - training dataset
    1. pandas.DataFrame - test dataset
    '''
    train = data.loc[data.index < split_date]
    test = data.loc[data.index >= split_date]
    return train, test

In [9]:
train, test = ts_train_test_split(clean, split_date=TEST_SPLIT_DATE)

#exclude data after 2020 due to coronavirus.
test, discard = ts_train_test_split(test, split_date=DISCARD_DATE)

#train split into train and validation
train, val = ts_train_test_split(train, split_date=VAL_SPLIT_DATE)


In [10]:
#amount of training data
train.shape

(1279, 9)

In [11]:
#amount of validation data
val.shape

(549, 9)

# New years day

In [12]:
exceptional = regular_busy_calender_days(train[REGION], quantile=0.99)

In [13]:
new_year = pd.DataFrame({
                         'holiday': 'new_year',
                         'ds': pd.date_range(start=exceptional[0], 
                                             periods=20, 
                                             freq='YS')
                        })

In [14]:
new_year.head()

Unnamed: 0,holiday,ds
0,new_year,2013-01-01
1,new_year,2014-01-01
2,new_year,2015-01-01
3,new_year,2016-01-01
4,new_year,2017-01-01


# Wrapper classes for Prophet and statsmodels ARIMA

Adapter/wrapper classes to enable usage within `Ensemble` class and work with cross validation.

In [15]:
class FbProphetWrapper(object):
    '''
    Facade for FBProphet object - so that it can be
    used within Ensemble with methods from other packages

    '''
    def __init__(self, training_index, holidays=None, interval_width=0.8,
                 mcmc_samples=0, changepoint_prior_scale=0.05):
        self._training_index = training_index
        self._holidays = holidays
        self._interval_width = interval_width
        self._mcmc_samples = mcmc_samples
        self._cp_prior_scale = changepoint_prior_scale

    def _get_resids(self):
        return self._train - self._forecast['yhat'][:-self._h]

    def _get_preds(self):
        return self._forecast['yhat'][:-self._h].to_numpy()

    def fit(self, train):
        
        self._model = Prophet(holidays=self._holidays, 
                              interval_width=self._interval_width,
                              mcmc_samples=self._mcmc_samples,
                              changepoint_prior_scale=self._cp_prior_scale,
                              daily_seasonality=False)
        
        
        self._model.fit(self._pre_process_training(train))
        self._t = len(train)
        self._train = train
        self.predict(len(train))

    def _pre_process_training(self, train):

        if len(train.shape) > 1:
            y_train = train[:, 0]
        else:
            y_train = train

        y_train = np.asarray(y_train)
            
        #extend the training index
        if len(y_train) > len(self._training_index):
            self._training_index = pd.date_range(start=self._training_index[0], 
                                                 periods=len(y_train),
                                                 freq=self._training_index.freq)
        
        
        prophet_train = pd.DataFrame(self._training_index)
        prophet_train['y'] = y_train
        prophet_train.columns = ['ds', 'y']
        
        return prophet_train

    def predict(self, h, return_conf_int=False, alpha=0.2):
        '''
        forecast h steps ahead.
        
        Params:
        ------
        h: int
            h-step forecast
        
        return_conf_int: bool, optional (default=False)
            return 1 - alpha PI
        
        alpha: float, optional (default=0.2)
            return 1 - alpha PI
                       
        Returns:
        -------
        np.array
            If return_conf_int = False returns preds only
            
        np.array, np.array
            If return_conf_int = True returns tuple of preds, pred_ints
        '''
        if isinstance(h, (np.ndarray, pd.DataFrame)):
            h = len(h)
        
        self._h = h
        future = self._model.make_future_dataframe(periods=h)
        self._forecast = self._model.predict(future)

        if return_conf_int:
            return (self._forecast['yhat'][-h:].to_numpy(), 
                    self._forecast[['yhat_lower', 'yhat_upper']][-h:].to_numpy())
        else:
            return self._forecast['yhat'][-h:].to_numpy()
            

    fittedvalues = property(_get_preds)
    resid = property(_get_resids)

# Example of fitting the model.
1. FBProphet with new years day holiday.

The code below demonstrates how to fit the model.

In [17]:
model_1 = FbProphetWrapper(training_index=train.index, 
                           holidays=new_year)

In [18]:
estimators = {'fbp': model_1}
ens = Ensemble(estimators, UnweightedVote())

In [19]:
#fit to training data in chosen region
ens.fit(train[REGION])

In [20]:
#predict 7 days ahead
H = 7
ens_preds = ens.predict(horizon=H)

In [21]:
#view predictions
ens_preds

array([2302.95867484, 2277.13777755, 2175.05797737, 2098.10556466,
       2095.54782887, 2117.64655788, 2169.00810598])

In [22]:
#with prediction intervals
ens_preds, pi = ens.predict(horizon=H, return_conf_int=True)

In [23]:
ens_preds

array([2302.95867484, 2277.13777755, 2175.05797737, 2098.10556466,
       2095.54782887, 2117.64655788, 2169.00810598])

In [24]:
pi

array([[2205.28672538, 2406.02440461],
       [2173.04947732, 2378.19927565],
       [2072.19469448, 2275.5084746 ],
       [1988.88965749, 2203.49027235],
       [1985.19651326, 2199.09451664],
       [2013.87671931, 2217.93287629],
       [2069.86554857, 2262.46745335]])

# Cross validation functions

`time_series_cv` implements rolling forecast origin cross validation for time series.  
It does not calculate forecast error, but instead returns the predictions, pred intervals and actuals in an array that can be passed to any forecast error function. (this is for efficiency and allows additional metrics to be calculated if needed).

In [25]:
def time_series_cv(model, train, val, horizons, alpha=0.2, step=1):
    '''
    Time series cross validation across multiple horizons for a single model.

    Incrementally adds additional training data to the model and tests
    across a provided list of forecast horizons. Note that function tests a
    model only against complete validation sets.  E.g. if horizon = 15 and 
    len(val) = 12 then no testing is done.  In the case of multiple horizons
    e.g. [7, 14, 28] then the function will use the maximum forecast horizon
    to calculate the number of iterations i.e if len(val) = 365 and step = 1
    then no. iterations = len(val) - max(horizon) = 365 - 28 = 337.
    
    Parameters:
    --------
    model - forecasting model

    train - np.array - vector of training data

    val - np.array - vector of validation data

    horizon - list of ints, forecast horizon e.g. [7, 14, 28] days
    
    alpha - float, optional (default=0.2)
        1 - alpha prediction interval specification

    step -- int, optional (default=1)
            step taken in cross validation 
            e.g. 1 in next cross validation training data includes next point 
            from the validation set.
            e.g. 7 in the next cross validation training data includes next 7 points
            (default=1)
            
    Returns:
    -------
    np.array, np.array, np.array
        - cv_preds, cv_test, cv_intervals
    '''
    
    #point forecasts
    cv_preds = [] 
    #ground truth observations
    cv_actuals = [] 
    #prediction intervals
    cv_pis = []
    
    split = 0

    print('split => ', end="")
    for i in range(0, len(val) - max(horizons) + 1, step):
        split += 1
        print(f'{split}, ', end="")
                
        train_cv = np.concatenate([train, val[:i]], axis=0)
        model.fit(train_cv)
        
        #predict the maximum horizon 
        preds, pis = model.predict(horizon=len(val[i:i+max(horizons)]), 
                                   return_conf_int=True,
                                   alpha=alpha)        
        cv_h_preds = []
        cv_test = []
        cv_h_pis = []
        
        #sub horizon calculations
        for h in horizons:
            #store the h-step prediction
            cv_h_preds.append(preds[:h])
            #store the h-step actual value
            cv_test.append(val.iloc[i:i+h])    
            cv_h_pis.append(pis[:h])
                     
        cv_preds.append(cv_h_preds)
        cv_actuals.append(cv_test)
        cv_pis.append(cv_h_pis)
        
    print('done.\n')        
    return cv_preds, cv_actuals, cv_pis

## Custom functions for calculating CV scores for point predictions and coverage.

These functions have been written to work with the output of `time_series_cv`

In [26]:
def split_cv_error(cv_preds, cv_test, error_func):
    '''
    Forecast error in the current split
    
    Params:
    -----
    cv_preds, np.array
        Split predictions
        
    
    cv_test: np.array
        acutal ground truth observations
        
    error_func: object
        function with signature (y_true, y_preds)
        
    Returns:
    -------
        np.ndarray
            cross validation errors for split
    '''
    n_splits = len(cv_preds)
    cv_errors = []
    
    for split in range(n_splits):
        pred_error = error_func(cv_test[split], cv_preds[split])
        cv_errors.append(pred_error)
        
    return np.array(cv_errors)

def forecast_errors_cv(cv_preds, cv_test, error_func):
    '''
    Forecast errors by forecast horizon
    
    Params:
    ------
    cv_preds: np.ndarray
        Array of arrays.  Each array is of size h representing
        the forecast horizon specified.
        
    cv_test: np.ndarray
        Array of arrays.  Each array is of size h representing
        the forecast horizon specified.
        
    error_func: object
        function with signature (y_true, y_preds)
        
    Returns:
    -------
    np.ndarray
        
    '''
    cv_test = np.array(cv_test)
    cv_preds = np.array(cv_preds)
    n_horizons = len(cv_test)    
    
    horizon_errors = []
    for h in range(n_horizons):
        split_errors = split_cv_error(cv_preds[h], cv_test[h], error_func)
        horizon_errors.append(split_errors)

    return np.array(horizon_errors)

def split_coverage(cv_test, cv_intervals):
    n_splits = len(cv_test)
    cv_errors = []
        
    for split in range(n_splits):
        val = np.asarray(cv_test[split])
        lower = cv_intervals[split].T[0]
        upper = cv_intervals[split].T[1]
        
        coverage = len(np.where((val > lower) & (val < upper))[0])
        coverage = coverage / len(val)
        
        cv_errors.append(coverage)
        
    return np.array(cv_errors)
    
    
def prediction_int_coverage_cv(cv_test, cv_intervals):
    cv_test = np.array(cv_test)
    cv_intervals = np.array(cv_intervals)
    n_horizons = len(cv_test)    
    
    horizon_coverage = []
    for h in range(n_horizons):
        split_coverages = split_coverage(cv_test[h], cv_intervals[h])
        horizon_coverage.append(split_coverages)

    return np.array(horizon_coverage)  

In [27]:
def split_cv_error_scaled(cv_preds, cv_test, y_train):
    n_splits = len(cv_preds)
    cv_errors = []
    
    for split in range(n_splits):
        pred_error = mean_absolute_scaled_error(cv_test[split], cv_preds[split], 
                                                y_train, period=7)
        
        cv_errors.append(pred_error)
        
    return np.array(cv_errors)

def forecast_errors_cv_scaled(cv_preds, cv_test, y_train):
    cv_test = np.array(cv_test)
    cv_preds = np.array(cv_preds)
    n_horizons = len(cv_test)    
    
    horizon_errors = []
    for h in range(n_horizons):
        split_errors = split_cv_error_scaled(cv_preds[h], cv_test[h], y_train)
        horizon_errors.append(split_errors)
        
    return np.array(horizon_errors)

In [30]:
def get_ensemble(meta_learner=None, fb_interval=0.8):
    '''
    Create ensemble model
    '''
    if meta_learner is None:
        meta_learner = UnweightedVote()
        
    model_1 = FbProphetWrapper(training_index=train.index, 
                           holidays=new_year, interval_width=fb_interval)
    
    estimators = {'fbp': model_1}
    return Ensemble(estimators, UnweightedVote())
    

# Run cross validation

This is run twices once each for 80 and 95% prediction intervals.  The 2nd run is required due to the way Prophet generates prediction intervals.

In [31]:
horizons = [7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 365]
model = get_ensemble()

results = time_series_cv(model, train[REGION], val[REGION], horizons, 
                         alpha=0.2, step=7)

split => 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, done.



# symmetric MAPE results

In [32]:
cv_preds, cv_test, cv_intervals = results
cv_errors = forecast_errors_cv(cv_preds, cv_test, 
                               symmetric_mean_absolute_percentage_error)
df = pd.DataFrame(cv_errors)
df.columns = horizons
df.describe()

Unnamed: 0,7,14,21,28,35,42,49,56,63,70,77,84,365
count,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0
mean,2.992764,3.054805,3.077355,3.085396,3.098144,3.115493,3.138307,3.163085,3.191124,3.216202,3.247082,3.294733,4.231241
std,1.453381,1.252905,1.008357,0.817484,0.727439,0.678602,0.630983,0.580087,0.543042,0.526889,0.518761,0.513413,0.710843
min,0.823558,1.216192,1.233591,1.474079,1.760108,1.755597,1.80595,2.047982,2.154263,2.41784,2.4763,2.480856,2.972044
25%,1.805585,2.155122,2.346848,2.504084,2.706935,2.712459,2.782993,2.880657,2.84487,2.874326,2.838934,2.820658,3.810998
50%,2.634136,3.009083,2.94627,3.060609,3.132027,3.191006,3.149871,3.124042,3.176737,3.132216,3.094246,3.162486,4.274855
75%,3.803801,3.706259,3.562549,3.65716,3.651882,3.547876,3.451511,3.592388,3.719088,3.64597,3.650612,3.761396,4.652082
max,7.008097,5.879231,5.047494,4.396623,4.217112,4.225105,4.310605,4.214964,4.095931,4.12137,4.171102,4.192921,5.980493


In [33]:
#output sMAPE results to file
metric = 'smape'
print(f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv')
df.to_csv(f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv')

../../../results/model_selection/temp/Trust-fbp_smape.csv


# RMSE results

In [34]:
cv_preds, cv_test, cv_intervals = results
cv_errors = forecast_errors_cv(cv_preds, cv_test, root_mean_squared_error)
df = pd.DataFrame(cv_errors)
df.columns = horizons
df.describe()

Unnamed: 0,7,14,21,28,35,42,49,56,63,70,77,84,365
count,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0
mean,81.477287,86.065006,88.93257,90.760518,92.251442,93.563846,94.93357,96.35959,97.673905,98.77572,99.894313,101.424357,121.644895
std,45.993262,41.350955,36.300717,32.138012,28.970328,26.26138,23.533562,20.534614,17.567544,15.222167,12.902539,10.740993,16.664464
min,22.028363,40.772142,38.686507,45.753424,50.32689,49.693287,50.908165,57.92057,59.550988,66.307828,67.704705,67.918715,92.63586
25%,54.607258,61.252855,65.851423,69.485072,73.950025,74.086058,76.590836,79.519301,84.839158,88.273722,95.455559,98.022181,112.803903
50%,69.845118,79.166901,81.6635,87.585046,83.961145,82.138745,94.865665,105.240232,103.843003,107.512793,105.805463,104.099944,122.390253
75%,93.257754,96.850014,94.645084,106.146969,109.704715,113.304334,115.900079,115.098938,112.113194,110.231165,108.443285,108.361383,130.932972
max,258.759134,214.484114,179.703312,161.309311,147.268953,136.451304,127.909123,121.840427,117.505871,113.562275,115.017211,113.273749,164.62806


In [35]:
#output RMSE to file
metric = 'rmse'
print(f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv')
df.to_csv(f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv')

../../../results/model_selection/temp/Trust-fbp_rmse.csv


# Mean Absolute Scaled Error (MASE)

Scaled by one-step insample Seasonal Naive

In [36]:
cv_errors = forecast_errors_cv_scaled(cv_preds, cv_test, train[REGION])
df = pd.DataFrame(cv_errors)
df.columns = horizons
df.describe()

Unnamed: 0,7,14,21,28,35,42,49,56,63,70,77,84,365
count,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0
mean,0.807967,0.824645,0.830861,0.833448,0.837451,0.842769,0.849555,0.856983,0.865372,0.873086,0.882331,0.896192,1.148302
std,0.423243,0.360452,0.288207,0.231103,0.201529,0.182511,0.164352,0.145062,0.130012,0.122297,0.116913,0.113685,0.196648
min,0.234997,0.344559,0.34823,0.416005,0.487381,0.483703,0.499805,0.562564,0.589535,0.661236,0.675048,0.67543,0.80965
25%,0.488794,0.590129,0.632499,0.674166,0.72587,0.726999,0.729204,0.766759,0.80545,0.809345,0.806517,0.80391,1.034837
50%,0.703273,0.786611,0.754741,0.789199,0.801155,0.848755,0.852255,0.851214,0.86042,0.846333,0.860574,0.892673,1.152226
75%,0.977523,0.973144,0.926313,1.009757,0.992046,0.989316,0.982963,0.974243,0.963264,0.945713,0.955735,0.988183,1.256168
max,2.172972,1.782926,1.439076,1.291793,1.167713,1.116409,1.137327,1.113348,1.080469,1.086529,1.104837,1.11275,1.655542


In [37]:
#output mase to file.
metric = 'mase'
print(f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv')
df.to_csv(f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv')

../../../results/model_selection/temp/Trust-fbp_mase.csv


# 80% Prediction Interval Coverage

In [38]:
#80% PIs
cv_coverage = prediction_int_coverage_cv(cv_test, cv_intervals)
df = pd.DataFrame(cv_coverage)
df.columns = horizons
df.describe()

Unnamed: 0,7,14,21,28,35,42,49,56,63,70,77,84,365
count,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0
mean,0.777778,0.769841,0.763668,0.763228,0.765079,0.763668,0.764928,0.761905,0.760141,0.757672,0.752766,0.745591,0.669305
std,0.203321,0.170802,0.140693,0.117264,0.107002,0.102073,0.098424,0.095138,0.09259,0.091429,0.092078,0.091391,0.100698
min,0.285714,0.285714,0.380952,0.5,0.542857,0.547619,0.571429,0.589286,0.587302,0.6,0.597403,0.583333,0.430137
25%,0.642857,0.678571,0.666667,0.714286,0.714286,0.72619,0.72449,0.6875,0.666667,0.685714,0.668831,0.666667,0.60137
50%,0.857143,0.785714,0.761905,0.75,0.771429,0.785714,0.795918,0.785714,0.761905,0.785714,0.779221,0.77381,0.665753
75%,0.928571,0.892857,0.857143,0.857143,0.842857,0.809524,0.816327,0.821429,0.833333,0.828571,0.831169,0.815476,0.720548
max,1.0,1.0,0.952381,0.964286,0.914286,0.928571,0.938776,0.910714,0.920635,0.871429,0.883117,0.892857,0.841096


In [39]:
#write 80% coverage to file
metric = 'coverage_80'
print(f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv')
df.to_csv(f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv')

../../../results/model_selection/temp/Trust-fbp_coverage_80.csv


# 95% Prediction Interval Coverage

Rerun analysis and obtain 95% Prediction intervals

In [40]:
horizons = [7, 14, 21, 28, 35, 42, 49, 56, 63, 70, 77, 84, 365]
model = get_ensemble(fb_interval=0.95)

results = time_series_cv(model, train[REGION], val[REGION], horizons, 
                         alpha=0.05, step=7)

split => 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, done.



In [41]:
#95% PIs
cv_preds, cv_test, cv_intervals = results
cv_coverage = prediction_int_coverage_cv(cv_test, cv_intervals)
df = pd.DataFrame(cv_coverage)
df.columns = horizons
df.describe()

Unnamed: 0,7,14,21,28,35,42,49,56,63,70,77,84,365
count,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0,27.0
mean,0.915344,0.912698,0.91358,0.914021,0.913228,0.910935,0.908541,0.906746,0.904762,0.902116,0.900914,0.898589,0.88067
std,0.138727,0.114377,0.092485,0.078158,0.07153,0.067857,0.064131,0.059294,0.056206,0.054428,0.053078,0.049347,0.047873
min,0.428571,0.571429,0.714286,0.75,0.771429,0.761905,0.795918,0.803571,0.793651,0.785714,0.792208,0.797619,0.742466
25%,0.857143,0.892857,0.880952,0.839286,0.857143,0.880952,0.887755,0.857143,0.84127,0.857143,0.863636,0.857143,0.849315
50%,1.0,0.928571,0.952381,0.928571,0.942857,0.928571,0.897959,0.910714,0.920635,0.914286,0.922078,0.916667,0.882192
75%,1.0,1.0,1.0,0.964286,0.971429,0.952381,0.959184,0.955357,0.944444,0.928571,0.935065,0.940476,0.916438
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.982143,0.984127,0.971429,0.974026,0.97619,0.953425


In [42]:
#write 95% coverage to file
metric = 'coverage_95'
print(f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv')
df.to_csv(f'{TOP_LEVEL}/{STAGE}/{REGION}-{METHOD}_{metric}.csv')

../../../results/model_selection/temp/Trust-fbp_coverage_95.csv


# End