In [3]:
# Imports
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from google.colab import files
import sys
import os

file_path = 'syntheticReservationData.csv'

if not os.path.exists(file_path):
    print("Error: dataset not found")
    sys.exit(1)

df = pd.read_csv(file_path)

df['isWeekend'] = df['isWeekend'].astype(int)

# Features
rlFeatureCols = [
    'isWeekend',
    'timeOfDay',
    'occupancy',
    'numOfGuests',
    'customerVisitCount',
    'customerAvgSpend',
    # ACTION
    'assignedTableCapacity',
    'wastedSeats'
]

rlTargetCol = 'targetQValue'

X_rl = df[rlFeatureCols]
y_rl = df[rlTargetCol]

# Split
X_train_rl, X_test_rl, y_train_rl, y_test_rl = train_test_split(X_rl, y_rl, test_size=0.2, random_state=1)

# Scale
scaler_rl = StandardScaler()
X_train_rl_scaled = scaler_rl.fit_transform(X_train_rl)
X_test_rl_scaled = scaler_rl.transform(X_test_rl)

# Model Architecture
model = keras.Sequential([
    layers.Dense(64, activation='relu', input_shape=[len(rlFeatureCols)]),
    layers.Dropout(0.2),

    layers.Dense(128, activation='relu'),
    layers.Dropout(0.2),

    layers.Dense(64, activation='relu'),
    layers.Dropout(0.1),

    layers.Dense(1) # Output: The predicted Q-Value
])

# Compile
model.compile(
    loss='mse',
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    metrics=['mae']
)

# Train
print("\nStarting RL Agent training...")

early_stopping_rl = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

history_rl = model.fit(
    X_train_rl_scaled, y_train_rl,
    epochs=100,
    validation_split=0.2,
    verbose=1,
    batch_size=32,
    callbacks=[early_stopping_rl]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Starting RL Agent training...
Epoch 1/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step - loss: 2254.5598 - mae: 29.7306 - val_loss: 301.9787 - val_mae: 12.1563
Epoch 2/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4ms/step - loss: 473.9241 - mae: 15.0088 - val_loss: 273.4067 - val_mae: 11.3936
Epoch 3/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - loss: 428.1688 - mae: 14.0130 - val_loss: 274.5803 - val_mae: 11.3197
Epoch 4/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - loss: 409.1046 - mae: 13.6335 - val_loss: 287.5705 - val_mae: 11.5428
Epoch 5/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 392.4662 - mae: 13.3085 - val_loss: 242.2899 - val_mae: 10.7029
Epoch 6/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - loss: 378.3693 - mae: 13.0517 - val_loss: 290.2855 - val_mae: 11.3332


In [4]:

# Evaluate
print("Evaluating RL Agent...")
loss, mae = model.evaluate(X_test_rl_scaled, y_test_rl, verbose=0)
print(f"\nMean Absolute Error on Q-Value: {mae:.2f}")

print("\nVIP on Busy Saturday")
# [isWeekend, timeOfDay, occupancy, guests, visits, spend, table_cap, wasted]

# Scenario: Weekend, 7PM (19), Busy (0.9), 2 Guests, 10 Visits, $80 Spend
vipState = [1, 19, 0.9, 2, 10, 80]

# Action A: Reject (Table=0, Wasted=0)
inputReject = scaler_rl.transform([vipState + [0, 0]])

# Action B: Accept Perfectly (Table=2, Wasted=0)
inputAccept = scaler_rl.transform([vipState + [2, 0]])

# Action C: Accept Wastefully (Table=6, Wasted=4)
inputWasteful = scaler_rl.transform([vipState + [6, 4]])

predReject = model.predict(inputReject, verbose=0)[0][0]
predAccept = model.predict(inputAccept, verbose=0)[0][0]
predWasteful = model.predict(inputWasteful, verbose=0)[0][0]

print(f"Predicted Value of reject:   {predReject:.2f}")
print(f"Predicted Value of accept:   {predAccept:.2f}")
print(f"Predicted Value of wasteful: {predWasteful:.2f}")

if predAccept > predReject and predAccept > predWasteful:
    print("Success: Agent learned to prioritize the efficient acceptance!")
else:
    print("Fail: Agent logic is weird. Check data generation.")

# Save
modelName = 'decisionDQN.h5'
model.save(modelName)
print(f"\nModel saved as {modelName}")
files.download(modelName)

Evaluating RL Agent...

Mean Absolute Error on Q-Value: 10.54

VIP on Busy Saturday




Predicted Value of reject:   -0.24
Predicted Value of accept:   118.24
Predicted Value of wasteful: 45.39
Success: Agent learned to prioritize the efficient acceptance!

Model saved as decisionDQN.h5


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [5]:
import joblib

# Save the Scaler
joblib.dump(scaler_rl, 'rlScaler.pkl')
print("Scaler saved as rlScaler.pkl")
files.download('rlScaler.pkl')

Scaler saved as rlScaler.pkl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>