In [None]:
import numpy as np
import pandas as pd
from functools import reduce
import pickle

%matplotlib inline

## Params

In [None]:
ENS_LEVEL_KEYS = ['store_id', 'dept_id']

LIST_ALGO = ['lgb_estim', 
             'tf_estim', 
             'Prophet_store_dpt_lgb_weights']

OUTPUT_NAME = 'lgb_tf_prophet_ensembling'

## Load usefull data

In [None]:
ids = pd.read_csv('data/raw/sales_train_evaluation.csv')
ids = ids[['id', 'item_id', 'dept_id', 'cat_id', 'store_id', 'state_id']]

In [None]:
f = open('data/external/weights_' + OUTPUT_NAME + '.pkl', 'rb')
weights_dict = pickle.load(f)
f.close()

## Load and melt forecast files

In [None]:
melted_df = []

for algo in LIST_ALGO:
    df = pd.read_csv('data/submission/' + algo + '_evaluation.csv')
    df_melt = pd.melt(df[df['id'].str.contains('evaluation')], id_vars='id', value_name=algo)
    melted_df.append(df_melt)

In [None]:
melted_df = pd.merge(ids, reduce(pd.merge, melted_df))

## Calculate ensembling weights & apply them

In [None]:
list_ensemble = list()

for _, gp in melted_df.groupby(ENS_LEVEL_KEYS):
    
    w = weights_dict[_]
    gp['ensemble'] = gp[LIST_ALGO].values.dot(w)
    list_ensemble.append(gp)

ensemble_eval = pd.concat(list_ensemble)
ensemble_eval['ensemble'].clip(0, inplace=True)
ensemble_eval.head()

## Format outputs

In [None]:
ensemble_eval = ensemble_eval.pivot_table(index=['id'], columns=['variable'], values=['ensemble']).reset_index()
ensemble_eval.columns = [c[0] if c[1]=='' else c[1] for c in ensemble_eval.columns.tolist()]
ensemble_eval = ensemble_eval[['id'] + ['F%s' % c for c in range(1,29)]]
ensemble_eval.head()

## Merge with validation & save evaluation outputs

In [None]:
ensemble_val = pd.read_csv('data/submission/' + OUTPUT_NAME + '_validation.csv')
ensemble_eval = pd.concat([ensemble_val, ensemble_eval], axis=0)

In [None]:
ensemble_eval.to_csv('data/submission/' + OUTPUT_NAME + '_evaluation.csv', index=False)