In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns
import plotly.express as px
import plotly.subplots as sp
from plotly.subplots import make_subplots as ms
import plotly.graph_objects as go
colors = sns.color_palette("Blues")
import random
import scipy.stats as stats
import statsmodels.api as sma
import statsmodels.formula.api as sfa
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder, MinMaxScaler, StandardScaler,\
PowerTransformer
from sklearn.linear_model import LinearRegression, LogisticRegression,ElasticNet, Ridge, Lasso
from sklearn.feature_selection import SequentialFeatureSelector, RFE, f_oneway, chi2
from sklearn.metrics import r2_score, roc_auc_score, roc_curve, mean_squared_error, f1_score, accuracy_score,\
classification_report, confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score
from statsmodels.stats.outliers_influence import variance_inflation_factor
import warnings 
warnings.filterwarnings('ignore')
from sklearn.model_selection import KFold, GridSearchCV, cross_val_score, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier, StackingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from xgboost import XGBClassifier
from PIL import Image
from imblearn.over_sampling import SMOTE
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

pd.options.display.max_columns = None # display all columns of the dataframe

pd.options.display.max_rows = None # display all rows of the dataframe

pd.options.display.float_format = '{:.6f}'.format # to display the float values upto 6 decimal places

plt.rcParams['figure.figsize'] = [12,7]

print("DONE")

DONE


In [2]:
hotel = pd.read_csv('hotel reservations.csv')
df_hotel_dataset = hotel.copy()
df_hotel_dataset.head(1)

Unnamed: 0,Booking_ID,no_of_adults,no_of_children,no_of_weekend_nights,no_of_week_nights,type_of_meal_plan,required_car_parking_space,room_type_reserved,lead_time,arrival_year,arrival_month,arrival_date,market_segment_type,repeated_guest,no_of_previous_cancellations,no_of_previous_bookings_not_canceled,avg_price_per_room,no_of_special_requests,booking_status
0,INN00001,2,0,1,2,Meal Plan 1,0,Room_Type 1,224,2017,10,2,Offline,0,0,0,65.0,0,Not_Canceled


**Pipeline the final model**

In [3]:
# Creating a date column
import datetime
df_hotel_dataset['Date'] = pd.to_datetime(df_hotel_dataset[['arrival_year', 'arrival_month',
                                            'arrival_date']].astype(str).agg('/'.join, axis=1), errors='coerce')

# Creating a day column

df_hotel_dataset['Day'] = df_hotel_dataset['Date'].apply(lambda x: 'Weekend' 
                                                         if datetime.datetime.weekday(x) >= 5 else 'Weekday')


df_hotel_dataset = df_hotel_dataset.drop(columns = ['Booking_ID','arrival_year','Date', 'arrival_month','arrival_date'])
df_hotel_dataset.head(1)

Unnamed: 0,no_of_adults,no_of_children,no_of_weekend_nights,no_of_week_nights,type_of_meal_plan,required_car_parking_space,room_type_reserved,lead_time,market_segment_type,repeated_guest,no_of_previous_cancellations,no_of_previous_bookings_not_canceled,avg_price_per_room,no_of_special_requests,booking_status,Day
0,2,0,1,2,Meal Plan 1,0,Room_Type 1,224,Offline,0,0,0,65.0,0,Not_Canceled,Weekday


In [4]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from category_encoders import WOEEncoder
from xgboost import XGBClassifier
from sklearn.preprocessing import OrdinalEncoder, PowerTransformer



X = df_hotel_dataset.drop('booking_status', axis = 1)
y = df_hotel_dataset.booking_status

y.replace({'Not_Canceled':0,'Canceled':1}, inplace = True)


# transformation
num_cols = ['lead_time','avg_price_per_room','no_of_special_requests','no_of_previous_cancellations',
            'no_of_previous_bookings_not_canceled','no_of_children', 'no_of_weekend_nights','no_of_week_nights']

# encoding
oe_meal_plan = OrdinalEncoder(categories = [['Not Selected','Meal Plan 1','Meal Plan 2', 'Meal Plan 3']])
oe_room_type = OrdinalEncoder(categories = [['Room_Type 1', 'Room_Type 2', 'Room_Type 3', 'Room_Type 4',
                                             'Room_Type 5', 'Room_Type 6', 'Room_Type 7']])
oe_market_segment = OrdinalEncoder(categories = [['Corporate', 'Offline', 'Complementary', 'Aviation','Online']])



preprocess = ColumnTransformer(transformers = [('Power',PowerTransformer(),num_cols),
                                               ('Encoding', oe_meal_plan, ['type_of_meal_plan']),
                                               ('Encoding1', oe_room_type, ['room_type_reserved']),
                                               ('Encoding2', oe_market_segment, ['market_segment_type']),
                                               ('woe', WOEEncoder(), ['Day'])
], remainder= 'passthrough'
)

# Initalising Pipeline
pipeline = Pipeline([('preprocessing',preprocess), ('modelXGB', XGBClassifier()) ])

# train test split
xtrain, xtest, ytrain, ytest = train_test_split(X, y, test_size = 0.3, random_state = 127)

pipeline.fit(xtrain, ytrain)

pred_test = pipeline.predict(xtest)

print(classification_report(ytest, pred_test))

              precision    recall  f1-score   support

           0       0.89      0.93      0.91      7259
           1       0.84      0.77      0.80      3624

    accuracy                           0.87     10883
   macro avg       0.87      0.85      0.86     10883
weighted avg       0.87      0.87      0.87     10883



**Freezing the model**

In [5]:
import pickle

model = open('model_xgb.pickle', "wb")
pickle.dump(pipeline, model)
model.close()

### Deployment

In [6]:
%%writefile app.py

import numpy as np
import seaborn as sns
import streamlit as st
from IPython.display import display, HTML
import pandas as pd 
import matplotlib.pyplot as plt 
import pickle
import base64
from category_encoders import WOEEncoder
from xgboost import XGBClassifier
from sklearn.preprocessing import OrdinalEncoder, PowerTransformer

with open("HotelImage.jpg", "rb") as image_file:
    encoded_string = base64.b64encode(image_file.read())
st.markdown(
f"""
<style>
.stApp {{
    background-image: url(data:image/{"png"};base64,{encoded_string.decode()});
    background-size: cover
}}
</style>
""",
unsafe_allow_html=True
)

st.title("Hotel Booking Cancellation Prediction")
st.markdown("Will this customer honour the booking? ")

# step 1

model = open('model_xgb.pickle', "rb")
clf = pickle.load(model)
model.close()

# step 2
adults = st.number_input('No. of Adults',0,4,step = 1)
children = st.number_input('No. of children', 0,10,1)
wnd = st.slider('No. of weekend nights', 0,6)
wn = st.slider('No. of weekend nights', 0,17)
tmp = st.selectbox('Type of Meal Plan', ('Meal Plan 1', 'Meal Plan 2', 'Meal Plan 3', 'Not Selected')) 
car_parking = st.number_input("Parking required or not", 0,1 , 1)
room_type = st.selectbox("Type of room type reserved ", ('Room_Type 1', 'Room_Type 2', 'Room_Type 3', 'Room_Type 4',
                       'Room_Type 5', 'Room_Type 6', 'Room_Type 7'))
lead_time = st.number_input("Lead Time" , 0,443,1)
segment_type = st.selectbox("Mode of Booking ", ('Online','Aviation','Offline','Corporate','Complementary'))
repeated_guest = st.selectbox("Repeat visit 0 --> NO , 1 --> Yes" , (0,1))
previous_cancellations = st.slider("No of previous cancellations", 0,13,1)
not_cancelled = st.slider("No of successful visits" , 0,58,1)
avg_price = st.slider("Price per room" , 0, 540, 10)
special_request = st.slider("Special requests if any" , 0,5,1)
day = st.selectbox('Weekday or Weekend',('Weekend','Weekday'))

# step3 : converting user input to model input

data = {'no_of_adults': adults,
        'no_of_children' : children, 
        'no_of_weekend_nights' : wnd, 
        'no_of_week_nights': wn,
        'type_of_meal_plan' : tmp,
       'required_car_parking_space': car_parking,
        'room_type_reserved': room_type,
        'lead_time': lead_time,
       "market_segment_type": segment_type,
       "repeated_guest": repeated_guest,
       "no_of_previous_cancellations" : previous_cancellations,
       "no_of_previous_bookings_not_canceled" : not_cancelled,
       "avg_price_per_room": avg_price,
       "no_of_special_requests" : special_request,
       "Day":day}

input_data = pd.DataFrame([data])

prediction = clf.predict(input_data)

if st.button("Check your Status"):
    if prediction == 0:
        st.subheader("Booking will be honoured")
    if prediction==1:
        st.subheader("Booking will be cancelled")

Overwriting app.py
