In [1]:
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import make_pipeline
from sklearn.compose import make_column_transformer
from tensorflow import keras
from tensorflow.keras import layers

# --------------------------
# 1️⃣ Load the data
# --------------------------
hotel = pd.read_csv('hotel.csv')

X = hotel.copy()
y = X.pop('is_canceled')

# Convert month names to numbers
X['arrival_date_month'] = X['arrival_date_month'].map({
    'January':1, 'February': 2, 'March':3,
    'April':4, 'May':5, 'June':6, 'July':7,
    'August':8, 'September':9, 'October':10,
    'November':11, 'December':12
})

# --------------------------
# 2️⃣ Define feature sets
# --------------------------
features_num = [
    "lead_time", "arrival_date_week_number",
    "arrival_date_day_of_month", "stays_in_weekend_nights",
    "stays_in_week_nights", "adults", "children", "babies",
    "is_repeated_guest", "previous_cancellations",
    "previous_bookings_not_canceled", "required_car_parking_spaces",
    "total_of_special_requests", "adr",
]

features_cat = [
    "hotel", "arrival_date_month", "meal",
    "market_segment", "distribution_channel",
    "reserved_room_type", "deposit_type", "customer_type",
]

# --------------------------
# 3️⃣ Define preprocessing pipelines
# --------------------------
transformer_num = make_pipeline(
    SimpleImputer(strategy="constant"),
    StandardScaler()
)

transformer_cat = make_pipeline(
    SimpleImputer(strategy="constant", fill_value="NA"),
    OneHotEncoder(handle_unknown='ignore')
)

preprocessor = make_column_transformer(
    (transformer_num, features_num),
    (transformer_cat, features_cat)
)

# --------------------------
# 4️⃣ Train/validation split (keep DataFrame here!)
# --------------------------
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, stratify=y, train_size=0.75, random_state=42
)

# --------------------------
# 5️⃣ Fit the preprocessor FIRST (on DataFrame)
# --------------------------
preprocessor.fit(X_train)

# Save the fitted preprocessor
joblib.dump(preprocessor, "preprocessor.pkl")

# --------------------------
# 6️⃣ Transform the data
# --------------------------
X_train_processed = preprocessor.transform(X_train)
X_valid_processed = preprocessor.transform(X_valid)

# --------------------------
# 7️⃣ Build and train the model
# --------------------------
input_shape = [X_train_processed.shape[1]]

model = keras.Sequential([
    layers.InputLayer(input_shape=input_shape),
    layers.BatchNormalization(),
    layers.Dense(256, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(128, activation='relu'),
    layers.BatchNormalization(),
    layers.Dropout(0.3),
    layers.Dense(1, activation='sigmoid'),
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# --------------------------
# 8️⃣ Train with early stopping
# --------------------------
early_stopping = keras.callbacks.EarlyStopping(
    patience=5,
    min_delta=0.001,
    restore_best_weights=True
)

history = model.fit(
    X_train_processed, y_train,
    validation_data=(X_valid_processed, y_valid),
    batch_size=512,
    epochs=200,
    callbacks=[early_stopping],
    verbose=1
)

# --------------------------
# 9️⃣ Save model and print evaluation
# --------------------------
model.save("hotel_model.keras")

loss, accuracy = model.evaluate(X_valid_processed, y_valid)
print(f"Validation accuracy: {accuracy:.3f}")
print(f"Validation loss: {loss:.3f}")


Epoch 1/200




[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 7ms/step - accuracy: 0.7681 - loss: 0.4880 - val_accuracy: 0.7989 - val_loss: 0.4347
Epoch 2/200
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8007 - loss: 0.4250 - val_accuracy: 0.8097 - val_loss: 0.4057
Epoch 3/200
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8082 - loss: 0.4105 - val_accuracy: 0.8163 - val_loss: 0.3963
Epoch 4/200
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8118 - loss: 0.4036 - val_accuracy: 0.8175 - val_loss: 0.3921
Epoch 5/200
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8127 - loss: 0.3990 - val_accuracy: 0.8203 - val_loss: 0.3877
Epoch 6/200
[1m175/175[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8152 - loss: 0.3949 - val_accuracy: 0.8212 - val_loss: 0.3850
Epoch 7/200
[1m175/175[0m [32m━