In [None]:
# hotel_price_model.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

# 1. Load Data
df = pd.read_csv("hotel_bookings.csv")

# 2. Rename target column
df.rename(columns={"adr": "price"}, inplace=True)

# 3. Drop unnecessary columns
df.drop(columns=["company", "agent", "country"], inplace=True)

# 4. Encode 'hotel' column (binary)
label_encoder = LabelEncoder()
df["hotel"] = label_encoder.fit_transform(df["hotel"])

# 5. One-hot encode other categorical columns
categorical_columns = df.select_dtypes(include="object").columns
categorical_columns = categorical_columns[categorical_columns != "hotel"]
df = pd.get_dummies(df, columns=categorical_columns, drop_first=True)

# 6. Impute missing values (numeric columns only)
numeric_columns = df.select_dtypes(include=[np.number]).columns
imputer = SimpleImputer(strategy="mean")
df[numeric_columns] = imputer.fit_transform(df[numeric_columns])

# 7. Train/Test Split
X = df.drop("price", axis=1)
y = df["price"]
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=23)

# 8. Linear Regression
lm = LinearRegression()
lm.fit(x_train, y_train)

# 9. HistGradientBoostingRegressor
model = HistGradientBoostingRegressor()
model.fit(x_train, y_train)

# 10. Evaluation
def evaluate_model(name, model, x_train, y_train, x_test, y_test):
    y_pred = model.predict(x_test)
    print(f"🔹 {name}")
    print(f"Train R²: {model.score(x_train, y_train):.3f}")
    print(f"Test R²: {model.score(x_test, y_test):.3f}")
    print(f"MAE: {mean_absolute_error(y_test, y_pred):.2f}")
    print(f"RMSE: {np.sqrt(mean_squared_error(y_test, y_pred)):.2f}")
    print("-" * 50)

evaluate_model("Linear Regression", lm, x_train, y_train, x_test, y_test)
evaluate_model("HistGradientBoosting", model, x_train, y_train, x_test, y_test)
