In [10]:
import pandas as pd
import numpy as np
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from scipy.stats import zscore
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Embedding, Flatten, Layer, Input, LayerNormalization, BatchNormalization, Add, Activation, Permute, Multiply, Lambda
from tensorflow.keras.regularizers import l2
from tensorflow.keras import regularizers
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.initializers import HeUniform
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import LabelEncoder
import tensorflow.keras.backend as K
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing  import MinMaxScaler
from keras.models  import load_model




In [11]:
# US holidays DataFrame
model_data = pd.read_excel(r"/content/Call_Volume_Data_2020_to_2025.xlsx")
holidays_df = pd.read_csv(r"/content/Holiday.csv")

# Ensure the holidays DataFrame has a 'DATE' column in datetime format
holidays_df['Date'] = pd.to_datetime(holidays_df['Date'],format='mixed')

# Merge the holidays DataFrame with model_data on the date column
model_data = pd.merge(model_data, holidays_df[['Date', 'Holiday']], left_on='REPORT_DT', right_on='Date', how='left')

# Drop the redundant 'DATE' column from the merge
model_data.drop(columns=['Date'], inplace=True)

# FILL missing holiday names with "No Holiday"
model_data['Holiday'] = model_data['Holiday'].fillna('No Holiday')

# Display the updated DataFrame
model_data.sort_values(by = 'REPORT_DT').reset_index()
model_data.set_index('REPORT_DT', inplace=True)
# model_data = model_data.drop(columns = 'index', axis = 1)

# Ensure the columns are numeric
model_data['DAY_OF_WEEK'] = model_data.index.dayofweek
model_data['DAY_OF_MONTH'] = pd.to_numeric(model_data['DAY_OF_MONTH'], errors='coerce')
model_data['MONTH'] = pd.to_numeric(model_data['MONTH'], errors='coerce')
model_data['QUARTER'] = pd.to_numeric(model_data['QUARTER'], errors='coerce')

# Encode DAY OF WEEK (1 to 7)
model_data['DAY_OF_WEEK_SIN'] = np.sin(2 * np.pi * model_data['DAY_OF_WEEK'] / 7)
model_data['DAY_OF_WEEK_COS'] = np.cos(2 * np.pi * model_data['DAY_OF_WEEK'] / 7)

# Encode DAY OF MONTH (1 to 31)
model_data['DAY_OF_MONTH_SIN'] = np.sin(2 * np.pi * model_data['DAY_OF_MONTH'] / 31)
model_data['DAY_OF_MONTH_COS'] = np.cos(2 * np.pi * model_data['DAY_OF_MONTH'] / 31)

# Encode MONTH (1 to 12)
model_data['MONTH_SIN'] = np.sin(2 * np.pi * model_data['MONTH'] / 12)
model_data['MONTH_COS'] = np.cos(2 * np.pi * model_data['MONTH'] / 12)

# Encode QUARTER (1 to 4)
model_data['QUARTER_SIN'] = np.sin(2 * np.pi * model_data['QUARTER'] / 4)
model_data['QUARTER_COS'] = np.cos(2 * np.pi * model_data['QUARTER'] / 4)


model_data['is_sunday'] = (model_data.index.dayofweek == 6).astype(int)
model_data['is_monday'] = (model_data.index.dayofweek == 0).astype(int)
model_data['is_weekend'] = (model_data.index.dayofweek >= 5).astype(int)





In [12]:
# Step 1: Calculate the average call volume for each holiday
holiday_encoding = model_data.groupby('Holiday')['TOTAL_OFFERED_CALL_VOLUME'].mean().to_dict()

# Step 2: Map the encoded values to the Holiday column
model_data['HOLIDAY_ENCODED'] = model_data['Holiday'].map(holiday_encoding)

# Step 3: Handle missing values (e.g., for "No Holiday")
default_value = model_data[model_data['Holiday']=='No Holiday']['TOTAL_OFFERED_CALL_VOLUME'].mean()
model_data['HOLIDAY_ENCODED'] = model_data['HOLIDAY_ENCODED'].fillna(default_value)

# Display the updated DataFrame
model_data_encoded = model_data.drop(columns=['DAY_OF_WEEK','DAY_OF_MONTH','MONTH','QUARTER','YEAR','Holiday'])

from sklearn.preprocessing import MinMaxScaler

# Define input features and target
feature_cols = ['DAY_OF_WEEK_SIN', 'DAY_OF_WEEK_COS',
                'DAY_OF_MONTH_SIN', 'DAY_OF_MONTH_COS', 'MONTH_SIN', 'MONTH_COS',
                'QUARTER_SIN', 'QUARTER_COS', 'HOLIDAY_ENCODED']
target_col = 'TOTAL_OFFERED_CALL_VOLUME'

model_data_encoded_scaled = model_data_encoded.copy()



In [13]:
model_data_encoded_scaled.head()

Unnamed: 0_level_0,TOTAL_OFFERED_CALL_VOLUME,DAY_OF_WEEK_SIN,DAY_OF_WEEK_COS,DAY_OF_MONTH_SIN,DAY_OF_MONTH_COS,MONTH_SIN,MONTH_COS,QUARTER_SIN,QUARTER_COS,is_sunday,is_monday,is_weekend,HOLIDAY_ENCODED
REPORT_DT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2020-07-01,32654,0.974928,-0.222521,0.201299,0.97953,-0.5,-0.866025,-1.0,-1.83697e-16,0,0,0,23215.583333
2020-07-02,24795,0.433884,-0.900969,0.394356,0.918958,-0.5,-0.866025,-1.0,-1.83697e-16,0,0,0,23215.583333
2020-07-03,9860,-0.433884,-0.900969,0.571268,0.820763,-0.5,-0.866025,-1.0,-1.83697e-16,0,0,0,23215.583333
2020-07-04,14390,-0.974928,-0.222521,0.724793,0.688967,-0.5,-0.866025,-1.0,-1.83697e-16,0,0,1,23215.583333
2020-07-05,30575,-0.781831,0.62349,0.848644,0.528964,-0.5,-0.866025,-1.0,-1.83697e-16,1,0,1,23215.583333


In [14]:
# Define sequence Length
SEQ_LEN = 180*2
HORIZON = 180

# Split the data into training and testing sets
train_size = int(len(model_data_encoded_scaled) - HORIZON*3)
train_data = model_data_encoded_scaled[:train_size].copy() # Use .copy() to avoid SettingWithCopyWarning
test_data = model_data_encoded_scaled[train_size:].copy()

# Initialize the scaler
scaler = MinMaxScaler()

# Fit the scaler on the training data and transform both training and testing data
train_data.loc[:, ['TOTAL_OFFERED_CALL_VOLUME', 'HOLIDAY_ENCODED']] = scaler.fit_transform(
    train_data[['TOTAL_OFFERED_CALL_VOLUME', 'HOLIDAY_ENCODED']]
)

test_data.loc[:, ['TOTAL_OFFERED_CALL_VOLUME', 'HOLIDAY_ENCODED']] = scaler.transform(
    test_data[['TOTAL_OFFERED_CALL_VOLUME', 'HOLIDAY_ENCODED']]
)


train_targets = train_data[target_col]
test_targets = test_data[target_col]

def create_sequences(data, targets, seq_length, horizon):
    X, y = [], []
    for i in range(len(data) - seq_length - horizon + 1):
        X.append(data[i:i + seq_length])
        y.append(targets[i + seq_length : i + seq_length + horizon]) # Collect the next 365 days
    return np.array(X), np.array(y)

X_train, y_train = create_sequences(train_data, train_targets, SEQ_LEN, HORIZON)
X_test, y_test = create_sequences(test_data, test_targets, SEQ_LEN, HORIZON)

# Define the input shape
lstm_input = Input(shape=(X_train.shape[1], X_train.shape[2]))



  train_data.loc[:, ['TOTAL_OFFERED_CALL_VOLUME', 'HOLIDAY_ENCODED']] = scaler.fit_transform(
 0.24821019 0.42758448 0.27903064 0.23467927 0.21388173 0.7702248
 0.16684565 0.6645547  0.32842927 0.68324026 0.0781429  0.04435137
 0.40224084 0.45751002 0.73052692 0.65764605 0.13165808 0.95640034
 0.14350659 0.40424542 0.07019616 0.29997136 0.79252577 0.97340349
 0.62918814 0.87736254 0.78543814 0.86060997 0.55000716 0.79170246
 0.20686569 0.19326317 0.952248   0.89647766 0.33780785 0.03268184
 0.0146764  0.66505584 0.38588202 0.03976947 0.40274198 0.62016753
 0.08329754 0.19201031 0.0799685  0.68084192 0.94587629 0.16476947
 0.49889032 0.65292096 0.39715779 0.48038373 0.98152921 0.80194015
 0.84840349 0.22773482 0.22730527 0.28114261 0.11522766 0.71928694
 0.99495275 0.51449742 0.67103379 0.10316438 0.13412801 0.73739977
 0.56418242 0.58952606 0.09539662 0.24659937 0.36290092 0.02806415
 0.94505298 0.07542239 0.20654353 0.43989834 0.99842497 0.50630011
 0.39740836 0.38588202 0.40764605 0.

In [15]:
# First LSTM layer with Batch Normalization
lstm1 = LSTM(256, activation="tanh", return_sequences=True, kernel_initializer=HeUniform(seed=100), kernel_regularizer=l2(0.0001))(lstm_input)
batch_norm1 = BatchNormalization()(lstm1) # Add Batch Normalization
drop1 = Dropout(0.2)(batch_norm1)

# Second LSTM Layer with Batch Normalization
lstm2 = LSTM(128, activation="tanh", return_sequences=False, kernel_initializer=HeUniform(seed=100), kernel_regularizer=l2(0.0001))(drop1)
batch_norm2 = BatchNormalization()(lstm2) # Add Batch Normalization
drop2 = Dropout(0.2)(batch_norm2)

# Dense Layers
dense1 = Dense(64, activation="relu")(drop2)
batch_norm3 = BatchNormalization()(dense1) # Add Batch Normalization
dense2 = Dense(180)(batch_norm3)

# Add residual connection
residual = Dense(180)(drop2) # Residual connection from the LSTM output
output = Add()([dense2, residual]) # Combine the residual and dense outputs

# Define the model
model = Model(inputs=lstm_input, outputs=output)

# Compile the model with Huber loss
model.compile(loss="huber", optimizer=Adam(learning_rate=0.001), metrics=["mae"])

# Display the model summary
model.summary()

early_stopping = EarlyStopping(monitor='val_loss', mode='min', patience=3, start_from_epoch=5, restore_best_weights=True)
model.fit(X_train, y_train, epochs=8, batch_size=32, validation_data=(X_test, y_test), verbose=1, shuffle=False, callbacks=[early_stopping])




Epoch 1/8
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 1s/step - loss: 0.9273 - mae: 1.0193 - val_loss: 0.4769 - val_mae: 0.4963
Epoch 2/8
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - loss: 0.7119 - mae: 0.7842 - val_loss: 0.4364 - val_mae: 0.4514
Epoch 3/8
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 1s/step - loss: 0.5842 - mae: 0.6388 - val_loss: 0.4032 - val_mae: 0.4082
Epoch 4/8
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 1s/step - loss: 0.4955 - mae: 0.5342 - val_loss: 0.3682 - val_mae: 0.3581
Epoch 5/8
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 1s/step - loss: 0.4322 - mae: 0.4580 - val_loss: 0.3433 - val_mae: 0.3246
Epoch 6/8
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 1s/step - loss: 0.3938 - mae: 0.4140 - val_loss: 0.3314 - val_mae: 0.3222
Epoch 7/8
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 1s/step - loss: 0.3668 - mae: 0

<keras.src.callbacks.history.History at 0x7ee50968c090>

In [16]:
model.save("Agentic_AI_LSTM_v1.h5")

