In [1]:
import pandas as pd
import numpy as np
from causalimpact import CausalImpact

In [2]:
def custom_sort_key(s):
    parts = s.split('_')
    return int(parts[1])

In [4]:
dataset_name = 'sim_10_60_l_he'
dataset_type = 'sim'
data_row = pd.read_csv("../datasets/text_data/sim/"+dataset_name+".csv")
data_row

FileNotFoundError: [Errno 2] No such file or directory: '../datasets/text_data/sim/sim_10_60_l_he.csv'

In [65]:
def causalimpact_eval(dataset_name,dataset_type,forecast_horizon):
    if dataset_type == "sim":
        y_true_df_A = pd.read_csv("../datasets/text_data/" + dataset_type +  \
                "/" + dataset_name + "_test_actual.csv")
        # Reading the original data to calculate the MASE errors
        y_true_df_B = pd.read_csv("../datasets/text_data/" + dataset_type +  \
                "/" + dataset_name + "_train.csv")
        data_row_A = y_true_df_A.pivot(index='time', columns='series_id', values='value')
        data_row_B = y_true_df_B.pivot(index='time', columns='series_id', values='value')
        data_row = pd.concat([data_row_B, data_row_A],ignore_index=True)
        data_row_A = data_row_A.T
        data_row_B = data_row_B.T

    if dataset_type == "calls911":
        data_row = pd.read_csv('../datasets/text_data/' + dataset_type\
                            + '/'+dataset_name+'.csv')[1:]
        y_true_df_A = data_row.iloc[len(data_row['date'])-forecast_horizon:, 1:].T
        y_true_df_B = data_row.iloc[:len(data_row['date'])-forecast_horizon, 1:].T
        data_row_A = y_true_df_A
        data_row_B = y_true_df_B

    errors_directory = '../results/benchmarks/errors/'

    errors_file_name_mean_median = 'mean_median_' + dataset_name + '_causalimpact'
    SMAPE_file_name_all_errors = 'all_smape_errors_' + dataset_name + '_causalimpact'
    MASE_file_name_all_errors = 'all_mase_errors_' + dataset_name + '_causalimpact'

    errors_file_full_name_mean_median = errors_directory + errors_file_name_mean_median+'.txt'
    SMAPE_file_full_name_all_errors = errors_directory + SMAPE_file_name_all_errors
    MASE_file_full_name_all_errors = errors_directory + MASE_file_name_all_errors
    
    output = '../results/benchmarks/predicted/' + dataset_name +\
          '_causalimpact.txt'
    y_pred_list = []
    for i in data_row.columns:
        ci = CausalImpact(data_row.loc[:,[i] + [col for col in \
                    data_row.columns if col != i]],
                [0,len(data_row.index)-forecast_horizon-1],
                [len(data_row.index)-forecast_horizon,
                len(data_row.index)-1])
        # evaluate the model
        y_pred = ci.inferences.loc[(len(data_row.index)-\
                    forecast_horizon):(len(data_row.index)-1),'preds']
        y_pred_list.append(y_pred)
    np.savetxt(output, pd.DataFrame(y_pred_list), delimiter = ',')

    y_pred_df= pd.read_csv(output, header=None)
    num_time_series = len(y_pred_df.index) 

    # SMAPE
    time_series_wise_SMAPE = 2 * np.abs(y_pred_df - np.array(data_row_A)) /\
        (np.abs(y_pred_df) + np.abs(np.array(data_row_A)))
    SMAPEPerSeries = np.mean(time_series_wise_SMAPE, axis=1)
    mean_SMAPE = np.mean(SMAPEPerSeries)
    mean_SMAPE_str = f"mean_SMAPE:{mean_SMAPE}"
    print(mean_SMAPE_str)
    np.savetxt(SMAPE_file_full_name_all_errors+'.txt', SMAPEPerSeries, delimiter=",", fmt='%f')
    
    mase_vector = []
    for i in range(num_time_series):
        lagged_diff = [data_row_B.iloc[i,j] - \
                   data_row_B.iloc[i,j - forecast_horizon]\
                      for j in range(forecast_horizon,\
                        len(data_row_B.columns))]
        mase_vector.append(np.mean(np.abs(np.array(np.array(data_row_A.iloc[i]))\
                 - np.array(y_pred_df.iloc[i])) / np.mean(np.abs(lagged_diff))))

    mean_MASE = np.mean(mase_vector)
    mean_MASE_str = f"mean_MASE:{mean_MASE}"
    print(mean_MASE_str)

    np.savetxt(MASE_file_full_name_all_errors+'.txt', mase_vector, delimiter=",", fmt='%f')

    # Writing the SMAPE results to file
    with open(errors_file_full_name_mean_median, 'w') as f:
        # f.write('\n'.join([mean_SMAPE_str, median_SMAPE_str, std_SMAPE_str]))
        f.write('\n'.join([mean_SMAPE_str]))

    # Writing the MASE results to file
    with open(errors_file_full_name_mean_median, 'a') as f:
        # f.write('\n'.join([mean_MASE_str, median_MASE_str, std_MASE_str]))
        f.write('\n'.join([mean_MASE_str]))


In [None]:
dataset_name = 'calls911_benchmarks'
dataset_type = 'calls911'
forecast_horizon=7
causalimpact_eval(dataset_name,dataset_type,forecast_horizon)

In [79]:
dataset_name_test = ['sim_10_60_l_he', 'sim_10_60_l_ho',\
                     'sim_10_60_nl_he', 'sim_10_60_nl_ho',\
                     'sim_10_222_l_he', 'sim_10_222_l_ho',\
                     'sim_10_222_nl_he', 'sim_10_222_nl_ho',\
                     'sim_101_60_l_he', 'sim_101_60_l_ho',\
                     'sim_101_60_nl_he', 'sim_101_60_nl_ho',\
                     'sim_101_222_l_he', 'sim_101_222_l_ho',\
                     'sim_101_222_nl_he', 'sim_101_222_nl_ho',\
                     'sim_500_60_l_he', 'sim_500_60_l_ho',\
                     'sim_500_60_nl_he', 'sim_500_60_nl_ho',\
                     'sim_500_222_l_he', 'sim_500_222_l_ho',\
                     'sim_500_222_nl_he', 'sim_500_222_nl_ho']
dataset_type = 'sim'
forecast_horizon=12
for i in dataset_name_test:
    print(i)
    causalimpact_eval(i,dataset_type,forecast_horizon)

sim_10_60_l_he




mean_SMAPE:0.394960885774756
mean_MASE:1.175297174100768
sim_10_60_l_ho




mean_SMAPE:0.2279659033647968
mean_MASE:0.7947804500718474
sim_10_60_nl_he




mean_SMAPE:0.5040668867341817
mean_MASE:1.4690547503438671
sim_10_60_nl_ho




mean_SMAPE:0.3014502065139876
mean_MASE:0.7951249448186914
sim_10_222_l_he




mean_SMAPE:0.2510744494136422
mean_MASE:0.9832764930994357
sim_10_222_l_ho




mean_SMAPE:0.33572100997882565
mean_MASE:1.050519450888118
sim_10_222_nl_he




mean_SMAPE:0.41152530904765844
mean_MASE:1.1359579904307453
sim_10_222_nl_ho




mean_SMAPE:0.3521531849772698
mean_MASE:1.0566978937615248
sim_101_60_l_he




mean_SMAPE:1.42083426154537
mean_MASE:11.39198059789036
sim_101_60_l_ho




mean_SMAPE:1.0920264957620271
mean_MASE:10.027192483111962
sim_101_60_nl_he




mean_SMAPE:0.4292697265345358
mean_MASE:1.237281191051999
sim_101_60_nl_ho




mean_SMAPE:0.4271679189224155
mean_MASE:1.1414424957899563
sim_101_222_l_he




mean_SMAPE:0.478900568452502
mean_MASE:2.114642379672656
sim_101_222_l_ho




mean_SMAPE:0.35103440877553554
mean_MASE:1.138504718502041
sim_101_222_nl_he




mean_SMAPE:0.537044023521524
mean_MASE:1.4301975227810644
sim_101_222_nl_ho




mean_SMAPE:0.5454482240419178
mean_MASE:1.4933717286837056
sim_500_60_l_he




mean_SMAPE:0.5770328005124841
mean_MASE:1.955531771308093
sim_500_60_l_ho




mean_SMAPE:0.3951819757776043
mean_MASE:1.2059439722830525
sim_500_60_nl_he




mean_SMAPE:0.9878341911581421
mean_MASE:3.568097636997206
sim_500_60_nl_ho




mean_SMAPE:0.4927354721971988
mean_MASE:1.2619126715155722
sim_500_222_l_he




mean_SMAPE:0.3124215086237141
mean_MASE:1.229476890245996
sim_500_222_l_ho




mean_SMAPE:0.4024892645159915
mean_MASE:1.546198835910658
sim_500_222_nl_he




mean_SMAPE:0.6330267473301043
mean_MASE:1.6178006130974416
sim_500_222_nl_ho




In [3]:
# The index needs to be sorted again, if I want to do the placebo test
# first sort, then do placebo test
def custom_sort_key(s):
    parts = s.split('_')
    return int(parts[1])

def transform_sim(dataset_name, dataset_type):
    y_true_df_A = pd.read_csv("../datasets/text_data/" + dataset_type +  \
            "/" + dataset_name + "_test_actual.csv")
    output = '../results/benchmarks/predicted/' + dataset_name +\
        '_causalimpact.txt'
    y_pred_df= pd.read_csv(output, header=None)
    y_pred_df.index = y_true_df_A.index
    y_pred_df = y_pred_df.loc[sorted(y_pred_df.index, key=custom_sort_key),:]
    
    np.savetxt('../results/benchmarks/predicted/' + dataset_name +\
        '_T_causalimpact.txt', pd.DataFrame(y_pred_df), delimiter = ',')


In [1]:
%history

%history


In [6]:
transform_sim('calls911_benchmarks', 'calls911')

FileNotFoundError: [Errno 2] No such file or directory: '../datasets/text_data/calls911/calls911_benchmarks_test_actual.csv'

mean_SMAPE:0.3700299471410633


In [42]:
# MASE


mean_MASE:1.6336656457292509
