In [17]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [29]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import load_model

Reading data

In [19]:
data = pd.read_csv('/content/gdrive/MyDrive/Colab Notebooks/Content/hotel.csv')
df = data.copy()
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119390 entries, 0 to 119389
Data columns (total 32 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   hotel                           119390 non-null  object 
 1   is_canceled                     119390 non-null  int64  
 2   lead_time                       119390 non-null  int64  
 3   arrival_date_year               119390 non-null  int64  
 4   arrival_date_month              119390 non-null  object 
 5   arrival_date_week_number        119390 non-null  int64  
 6   arrival_date_day_of_month       119390 non-null  int64  
 7   stays_in_weekend_nights         119390 non-null  int64  
 8   stays_in_week_nights            119390 non-null  int64  
 9   adults                          119390 non-null  int64  
 10  children                        119386 non-null  float64
 11  babies                          119390 non-null  int64  
 12  meal            

Processing data

In [38]:
for column in df.columns:
    if(df[column].dtype == 'object'):
        df[column].fillna(df[column].mode()[0], inplace = True)
    else:
        df[column].fillna(round(df[column].mean()), inplace = True)

label_encoder = LabelEncoder()
for column in df.select_dtypes(include = ['object']).columns:
    df[column] = label_encoder.fit_transform(df[column])

x = df.drop(columns = ['is_canceled'])
y = df['is_canceled']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 42)

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

x_train = np.array(x_train)
x_test = np.array(x_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

Building deep learing model

In [39]:
model = Sequential()

model.add(Dense(64, input_dim = x_train.shape[1], activation = 'relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation = 'relu'))
model.add(Dense(1, activation = 'sigmoid'))

model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

model.fit(x_train, y_train, epochs = 50, batch_size = 32, validation_data = (x_test, y_test))


Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2985/2985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 2ms/step - accuracy: 0.9494 - loss: 0.1555 - val_accuracy: 1.0000 - val_loss: 3.7431e-04
Epoch 2/50
[1m2985/2985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9990 - loss: 0.0046 - val_accuracy: 1.0000 - val_loss: 9.0491e-05
Epoch 3/50
[1m2985/2985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 2ms/step - accuracy: 0.9996 - loss: 0.0015 - val_accuracy: 1.0000 - val_loss: 4.3370e-06
Epoch 4/50
[1m2985/2985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 2ms/step - accuracy: 0.9999 - loss: 8.6728e-04 - val_accuracy: 1.0000 - val_loss: 1.3831e-06
Epoch 5/50
[1m2985/2985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9999 - loss: 4.3526e-04 - val_accuracy: 1.0000 - val_loss: 1.3035e-06
Epoch 6/50
[1m2985/2985[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 2ms/step - accuracy: 1.0000 - loss: 1.8738e-04 - val_accuracy: 1.0000 - val_loss: 4

<keras.src.callbacks.history.History at 0x7e9661570e80>

Review model

In [43]:
loss, accuracy = model.evaluate(x_test, y_test)
print(f"Loss: {loss}\nAccuracy: {accuracy}")

[1m747/747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 1.0000 - loss: 1.8735e-06
Loss: 1.867301534730359e-06
Accuracy: 1.0


Save and using model

In [44]:
predictions = model.predict(x_test)
predictions = (predictions > 0.5).astype(int)

model.save('hotel_cancellation_model.h5')

loaded_model = load_model('hotel_cancellation_model.h5')

loaded_predictions = loaded_model.predict(x_test)
loaded_predictions = (loaded_predictions > 0.5).astype(int)

accuracy = accuracy_score(y_test, loaded_predictions)
print(f'Accuracy: {accuracy}')

[1m747/747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step




[1m747/747[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
Accuracy: 1.0
