##### Import dependencies

In [1]:
import numpy as np
import pandas as pd
from numpy.fft import *
import shutil
import os
import os
import glob
from tensorflow.keras.models import load_model
from tqdm.notebook import tqdm, trange

# from RNN import RNN
from Transformer import Transformer
#from CNN import CNN

from utils import series_to_supervised

from utils import rmse, smape, coeff_determination

In [4]:
MODEL_PATH = 'wh_wise_best_models/'
TRAIN_PATH = '../dataset/wh_wise_data/train/'
TEST_PATH = '../dataset/wh_wise_data/test/'

##### Preprocess DataFrame

In [5]:
def preprocess1(df):
    df['date'] = pd.to_datetime(df['date'], dayfirst=True)
    df.set_index(['date'], inplace=True)
    df.drop(columns=['Unnamed: 0', 'wh_id'], inplace=True)
    return df

In [6]:
def preprocess2(daily_data):
    daily_data['day'] = pd.DatetimeIndex(daily_data.index).day
    daily_data['weekday'] = ((pd.DatetimeIndex(daily_data.index).dayofweek) // 5 == 1).astype(float)
    daily_data['season'] = [month%12 // 3 + 1 for month in pd.DatetimeIndex(daily_data.index).month]
    daily_data.drop(columns='weekday', inplace=True)
    return daily_data

##### Module to filter noise

In [7]:
def filter_signal(signal, threshold=3e3):
    fourier = rfft(signal)
    frequencies = rfftfreq(signal.size, d=20e-3/(signal.size))
    fourier[frequencies > threshold] = 0
    return irfft(fourier)

##### Train-Test Split

In [8]:
def tt_split(daily_data):
    look_back = 7
    n_features = daily_data.shape[1]

    # Walk-forward data split to avoid data leakage
    X_train, y_train, X_test, y_test, scale_X = series_to_supervised(daily_data, train_size=0.8, n_in=look_back, n_out=7, target_column='sales', dropnan=True, scale_X=True)

    # reshape input to be 3D [samples, timesteps, features]
    X_train_reshaped = X_train.values.reshape((-1,look_back,n_features))
    X_test_reshaped = X_test.values.reshape((-1,look_back,n_features))

    y_train_reshaped = y_train.values
    y_test_reshaped = y_test.values
    return X_train, X_train_reshaped, X_test, X_test_reshaped, y_train, y_train_reshaped, y_test, y_test_reshaped

##### Module to Train the transformer

In [9]:
def train_trans(id, X_train_reshaped, y_train_reshaped, X_test_reshaped, y_test_reshaped):
    tr = Transformer()
    tr.train(X_train_reshaped,y_train_reshaped)
    _, rmse_result, mae_result, smape_result, r2_result = tr.evaluate(X_test_reshaped,y_test_reshaped)
    best_model = sorted(os.listdir('checkpoint'), reverse=True)[0]
    src = os.path.join('checkpoint', best_model)
    target = os.path.join(MODEL_PATH, 'wh_'+str(id)+'.hdf5')
    shutil.copy2(src, target)


##### Iterating through warehouses

In [12]:
whlst = os.listdir(TRAIN_PATH)
start = 55
end = 112

In [13]:
whlst[55]

'train_wh_1079.csv'

In [14]:
for wh in tqdm(whlst[start:end]):
    id = int(wh.split('_')[-1][:-4])
    df_path = os.path.join(TRAIN_PATH, wh)
    df = pd.read_csv(df_path)
    df = preprocess1(df)
    x = filter_signal(df['sales'])
    df = df[:-1] 
    df['sales']=x
    daily_data = preprocess2(df)
    X_train, X_train_reshaped, X_test, X_test_reshaped, y_train, y_train_reshaped, y_test, y_test_reshaped = tt_split(daily_data)
    train_trans(id, X_train_reshaped, y_train_reshaped, X_test_reshaped, y_test_reshaped)
    
    

  0%|          | 0/57 [00:00<?, ?it/s]

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 7, 4)]       0           []                               
                                                                                                  
 layer_normalization (LayerNorm  (None, 7, 4)        8           ['input_1[0][0]']                
 alization)                                                                                       
                                                                                                  
 multi_head_attention (MultiHea  (None, 7, 4)        19460       ['layer_normalization[0][0]',    
 dAttention)                                                      'layer_normalization[0][0]']    
                                                                                              

KeyboardInterrupt: 

In [None]:
#******** train 1017 ********

In [None]:
for wh in os.listdir(TEST_PATH):
    id = int(wh.split('_')[-1][:-4])
    model_path = os.path.join(MODEL_PATH, f'wh_{id}.hdf5')
    model = load_model(model_path, custom_objects={'rmse':rmse, 'smape':smape, 'coeff_determination':coeff_determination})
    