In [None]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression,ElasticNet
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb

In [None]:
def mypredict(train,test,next_fold,t):
    
    train = pd.concat([train,next_fold])
    X_train = train.copy()
    X_test = test.copy()
    test_pred_ret = test.copy()

    #----------------------------------------------------------
    # Feature Engineering
    #----------------------------------------------------------
    #One Hot Encoding
    X_train['IsHoliday_oh'] = X_train['IsHoliday'].astype(int)
    #Extract Year, Month, Week, Date (Train)
    X_train['day'] = pd.to_datetime(X_train['Date']).dt.day
    X_train['week'] = pd.to_datetime(X_train['Date']).dt.week
    X_train['month'] = pd.to_datetime(X_train['Date']).dt.month
    X_train['year'] = pd.to_datetime(X_train['Date']).dt.year

    y_train = train['Weekly_Sales']
    X_train = X_train.drop(['IsHoliday','Date','Weekly_Sales'],axis=1)

    #One Hot Encoding
    X_test['IsHoliday_oh'] = X_test['IsHoliday'].astype(int)
    #Extract Year, Month, Week, Date (Train)
    X_test['day'] = pd.to_datetime(X_test['Date']).dt.day
    X_test['week'] = pd.to_datetime(X_test['Date']).dt.week
    X_test['month'] = pd.to_datetime(X_test['Date']).dt.month
    X_test['year'] = pd.to_datetime(X_test['Date']).dt.year
    X_test = X_test.drop(['IsHoliday','Date'],axis=1)

    #----------------------------------------------------------
    # Model Building
    #----------------------------------------------------------
    rf_mod = RandomForestRegressor(n_estimators=150,
                                   random_state=125247)
    rf_mod.fit(X_train,y_train)

    #Random Forest (Prediction)
    y_preds = rf_mod.predict(X_test)
    #y_preds[np.isnan(y_preds)] = 0

    test_pred_ret['Weekly_Pred'] = y_preds
    
    return train,test_pred_ret
        

In [None]:
train = pd.read_csv('../data/train_ini.csv', parse_dates=['Date'])
test = pd.read_csv('../data/test.csv', parse_dates=['Date'])

# save weighed mean absolute error WMAE
n_folds = 10
next_fold = None
wae = []

# time-series CV
for t in range(1, n_folds+1):
    #print(f'Fold{t}...')

    # *** THIS IS YOUR PREDICTION FUNCTION ***
    train, test_pred = mypredict(train, test, next_fold, t)

    # Load fold file
    # You should add this to your training data in the next call to mypredict()
    fold_file = 'fold_{t}.csv'.format(t=t)
    next_fold = pd.read_csv("../data/fold/"+fold_file, parse_dates=['Date'])

    # extract predictions matching up to the current fold
    scoring_df = next_fold.merge(test_pred, on=['Date', 'Store', 'Dept'], how='left')

    # extract weights and convert to numpy arrays for wae calculation
    weights = scoring_df['IsHoliday_x'].apply(lambda is_holiday:5 if is_holiday else 1).to_numpy()
    actuals = scoring_df['Weekly_Sales'].to_numpy()
    preds = scoring_df['Weekly_Pred'].fillna(0).to_numpy()

    wae.append((np.sum(weights * np.abs(actuals - preds)) / np.sum(weights)).item())
    
    print ("WAE for Fold:{} is :{} ".format(t,(np.sum(weights * np.abs(actuals - preds)) / np.sum(weights)).item()))

print(wae)
print(sum(wae)/len(wae))

In [None]:
np.mean(wae)