In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler
from google.colab import files
import sys
import os

file_path = 'syntheticReservationData.csv'

if not os.path.exists(file_path):
    print("Error: dataset not found")
    sys.exit(1)

df = pd.read_csv(file_path)

df['isWeekend'] = df['isWeekend'].astype(int)

featureCols = [
    'isWeekend',
    'timeOfDay',
    'occupancy',
    'numOfGuests',
    'customerVisitCount',
    'customerAvgSpend'
]

targetCol = 'actualDuration'

X = df[featureCols]
y = df[targetCol]

# Split into Train (80%) and Test (20%)
# Validation is set during training (in hyperparam)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Scale the Features
# Neural Networks perform best when inputs are normalized
# Mean 0 and std 1 way better than minmax
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

model = keras.Sequential([
    # Input Layer implicit based on input_shape

    # Hidden Layer 1
    layers.Dense(64, activation='relu', input_shape=[len(featureCols)]),

    # Dropout SOO good since we synthesized our entire dataset
    layers.Dropout(0.2),

    # Hidden Layer 2
    layers.Dense(32, activation='relu'),

    # Dropout
    layers.Dropout(0.1),

    # Output Layer: 1 Neuron for the predicted minutes, linear
    layers.Dense(1)
])


optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)

model.compile(
    loss='mse', # MSE punish big errors
    optimizer=optimizer,
    metrics=['mae', 'mse'] # MAE easier to understand
)

model.summary()

print("Starting training...")

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

history = model.fit(
    X_train_scaled, y_train,
    epochs=100,
    validation_split=0.2, # Use 20% of training data to validate during training
    verbose=1,
    batch_size=32,
    callbacks=[early_stopping]
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Starting training...
Epoch 1/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 3ms/step - loss: 1329.6503 - mae: 26.7356 - mse: 1329.6503 - val_loss: 81.6672 - val_mae: 7.4871 - val_mse: 81.6672
Epoch 2/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 150.3094 - mae: 9.8330 - mse: 150.3094 - val_loss: 74.3655 - val_mae: 7.1958 - val_mse: 74.3655
Epoch 3/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 141.9519 - mae: 9.5634 - mse: 141.9519 - val_loss: 74.0914 - val_mae: 7.1853 - val_mse: 74.0914
Epoch 4/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - loss: 136.2895 - mae: 9.3747 - mse: 136.2895 - val_loss: 70.4610 - val_mae: 7.0441 - val_mse: 70.4610
Epoch 5/100
[1m2000/2000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - loss: 131.9754 - mae: 9.2298 - mse: 131.9754 - val_loss: 72.9392 - val_mae: 7.1433 - val_mse: 72.9392
Epoch 6/100

In [2]:

loss, mae, mse = model.evaluate(X_test_scaled, y_test, verbose=0)
print(f"Mean Absolute Error on Test Data: {mae:.2f} minutes")

# Example Prediction
sampleInput = X_test_scaled[:5]
sampleTruth = y_test[:5].values
predictions = model.predict(sampleInput).flatten()

print("\nSample Predictions vs Truth:")
for i in range(5):
    print(f"Predicted: {predictions[i]:.1f} mins, Actual: {sampleTruth[i]} mins")

modelName = 'durationPredictor.h5'
model.save(modelName)
print(f"Model saved as {modelName}")

# Download the file to your local machine
files.download(modelName)

Mean Absolute Error on Test Data: 7.00 minutes
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step





Sample Predictions vs Truth:
Predicted: 58.8 mins, Actual: 61 mins
Predicted: 58.4 mins, Actual: 47 mins
Predicted: 68.3 mins, Actual: 66 mins
Predicted: 71.0 mins, Actual: 61 mins
Predicted: 63.9 mins, Actual: 71 mins
Model saved as durationPredictor.h5


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [3]:
import joblib

# Save the Scaler
joblib.dump(scaler, 'durationScaler.pkl')
print("Scaler saved as durationScaler.pkl")
files.download('durationScaler.pkl')

Scaler saved as durationScaler.pkl


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>