##### THIS FILE IS USED TO RUN PREDICTION MODEL ON TOTAL NUMBER OF CUSTOMER #####

In [None]:
import pandas as pd
import openpyxl
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import tensorflow as tf
import math
import os
from dateutil.relativedelta import relativedelta
from sklearn.cluster import KMeans

In [None]:
# This is used to block warning messages
import warnings
# Ignore all warnings
warnings.filterwarnings("ignore")
# Or ignore specific warnings by category (e.g., FutureWarnings)
warnings.filterwarnings("ignore", category=FutureWarning)
# This is used to edit image
from IPython.display import Image
from IPython.core.display import HTML 

## 1 | Prepare input

In [None]:
df = pd.read_csv(r'C:\Users\Admin\RBAC\OriDataNew.csv').drop(columns='Unnamed: 0')

In [None]:
df['TransactionDate'] = pd.to_datetime(df['TransactionDate'])
df['Year_week'] = df['TransactionDate'].apply(lambda x: x.strftime("%Y-%U"))

In [None]:
num_cust_agg = df.drop_duplicates(subset=['Year_week','CustomerID']).groupby('Year_week',as_index=False).agg(Num_Customer= ('CustomerID','count'))

In [None]:
num_cust_agg = num_cust_agg[num_cust_agg['Year_week']!='2021-39']
num_cust_agg

In [None]:
def window(input_list, window_size, drop_remainder = True):
    res_list = []
    for i in range(len(input_list)):
        if i + window_size <= len(input_list): window = input_list[i: i + window_size]
        else: 
            if drop_remainder == True: continue
            else: window = input_list[i:]
        res_list.append(window)
    return res_list

def label_extract(window_list):
    window_array = np.array(window_list)
    input = np.array(list(map(lambda x: list(x[:-1]), window_array)))
    label = np.array(list(map(lambda x: list(x[-1:]), window_array)))
    return input, label

In [None]:
input_list = list(num_cust_agg['Num_Customer'].values)
window_size = 4

In [None]:
window_list = window(input_list = input_list,window_size=window_size)
input, label = label_extract(window_list)
display(
    input,label
)

In [None]:
n_split = math.floor(len(input)*0.8)

####
train_x = input[:n_split,:]
train_y = label[:n_split,:]
####
valid_x = input[n_split:,:]
valid_y = label[n_split:,:]
print(
    train_x.shape,
    train_y.shape,
    valid_x.shape,
    valid_y.shape
)

# 2 | Training

In [None]:
def train_RNN(input, label, window_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Lambda(lambda x: tf.expand_dims(x, axis=-1), input_shape=[None]),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(window_size, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(window_size, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(window_size, return_sequences=True)),
        tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(window_size)),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(32, activation='relu'),
        tf.keras.layers.Dense(1, activation ='linear')
    ])

    model.compile(loss=tf.keras.losses.Huber(), optimizer='Adam', metrics=["mae"])

    checkpoint_path = os.path.join(os.getcwd(), "predict_total_cust.ckpt")
    cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                     save_weights_only=True, verbose=1
                                                     )

    return model, cp_callback


In [None]:
model, cp_callback = train_RNN(input=input,label=label,window_size=window_size)

In [None]:
# ### THIS CODE WILL TRAIN DATA FOR LSTM MODEL ###

# model.fit(
#     train_x,
#     train_y,
#     validation_data = (valid_x, valid_y),
#     epochs = 300,
#     callbacks = [cp_callback]
# )

In [None]:
def predict_time_series(list_of_val, predict_period, model, window_size):
    for period in range(predict_period):
        list_of_val_2 = np.expand_dims(np.array(list_of_val),axis=0)
        x = list_of_val_2[:,-window_size:]
        res = model.predict(x)
        ## noise
        res_new = ((np.random.randint(1,50)/100)+1)*res[0][0]
        list_of_val.append(math.floor(res_new))
    return list_of_val

In [None]:
model.load_weights(os.path.join(os.getcwd(), "predict_total_cust.ckpt"))

In [None]:
predict_period = 24
y_pred = predict_time_series(
    list_of_val = list(num_cust_agg['Num_Customer'][-window_size+1:].values), 
    predict_period = predict_period, 
    model = model, 
    window_size = window_size
)
y_pred = y_pred[3:]

# 3 | Predicting

In [None]:
# Get a list of all days
def custom_range(start, stop, step=1):
    formatted_range = []
    for num in range(start, stop, step):
        formatted_num = f"{num:02d}"
        formatted_range.append(formatted_num)
    return formatted_range

time_stamp_list = []
for year in ['2023']:
    for week in custom_range(27, 27 + predict_period):
        time_stamp_list.append(year + '-' + week)
time_stamp_list

In [None]:
print(
    len(y_pred),
    len(time_stamp_list)
)

In [173]:
res_predict = pd.DataFrame({'Year_week':time_stamp_list,'Num_Customer':y_pred})
final_output = pd.concat([num_cust_agg,res_predict])
# change day
final_output_test = final_output.copy()
final_output_test['Year_week'] = pd.to_datetime(final_output_test['Year_week'] + '-0', format='%Y-%W-%w')
final_output_test['Year_Month'] = final_output_test['Year_week'].dt.to_period('M')
final_output_test = final_output_test.drop(['Year_week'], axis=1)
final_output_test = final_output_test[['Year_Month','Num_Customer']]
final_output_test.to_csv(r'num_cust_predict.csv')

# 4 | Distributed for cluster

In [174]:
df

Unnamed: 0,BillID,Channel,OrderFrom,TransactionDate,SalesAmount,CustomerID,CustomerGender,VoucherStatus,Province,Year_Month
0,0,Take Away,CALL CENTER,2021-10-01 00:00:00+00:00,296891.0,1753863,Unknown,No,Ho Chi Minh City,2021-10
1,1,Take Away,STORE,2021-10-01 00:00:00+00:00,301782.0,1124050,Unknown,No,Hanoi,2021-10
2,2,Take Away,WEBSITE,2021-10-01 00:00:00+00:00,319792.0,1626827,Male,No,Hanoi,2021-10
3,3,Take Away,STORE,2021-10-01 00:00:00+00:00,424762.0,125643,Male,No,Hanoi,2021-10
4,4,Delivery,STORE,2021-10-01 00:00:00+00:00,280031.0,2117237,Unknown,No,Hanoi,2021-10
...,...,...,...,...,...,...,...,...,...,...
1397504,1397504,Delivery,WEBSITE,2023-07-01 00:00:00+00:00,87103.0,1401593,Unknown,No,Ho Chi Minh City,2023-07
1397505,1397505,Take Away,STORE,2023-07-01 00:00:00+00:00,278920.0,1322074,Male,No,Hanoi,2023-07
1397506,1397506,Take Away,WEBSITE,2023-07-01 00:00:00+00:00,364872.0,2038523,Unknown,No,Nothern Provinces,2023-07
1397507,1397507,Delivery,APP,2023-07-01 00:00:00+00:00,1349201.0,1443231,Male,No,Southern Provinces,2023-07
