In [None]:
import pandas as pd
import numpy as np
import keras
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

np.random.seed(2)

In [None]:
df = pd.read_csv('data/hotel_booking_updated.csv')

In [None]:
df.head()

In [None]:
#handling categorical features

le = preprocessing.LabelEncoder()
df['hotel'] = le.fit_transform(df['hotel'].astype('str'))
df['arrival_date_month'] = le.fit_transform(df['arrival_date_month'].astype('str'))
df['meal'] = le.fit_transform(df['meal'].astype('str'))
df['country'] = le.fit_transform(df['country'].astype('str'))
df['market_segment'] = le.fit_transform(df['market_segment'].astype('str'))
df['distribution_channel'] = le.fit_transform(df['distribution_channel'].astype('str'))
df['reserved_room_type'] = le.fit_transform(df['reserved_room_type'].astype('str'))
df['assigned_room_type'] = le.fit_transform(df['assigned_room_type'].astype('str'))
df['deposit_type'] = le.fit_transform(df['deposit_type'].astype('str'))
df['customer_type'] = le.fit_transform(df['customer_type'].astype('str'))
df['arrival_date'] = le.fit_transform(df['arrival_date'].astype('str'))


In [None]:
#The column "is_canceled" is our target variable, we set y as this column
X = df.drop('is_canceled', axis = 1)

In [None]:
y = df["is_canceled"]

In [None]:
#We use the train_test_split function to create the appropriate train and test data for our features
# ("X_train" and "X_test" respectively) and target data ("Y_train" and "Y_test").
#We are specifying our test data to be 20% of the total data. We are also providing a seed to be able to reproduce this split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
X_train = np.array(X_train)
X_test = np.array(X_test)
y_train = np.array(y_train)
y_test = np.array(y_test)

In [None]:
#Standardisation

scaler = StandardScaler()

In [None]:
#We fit the scaler passing the training data but also request it transforms the data and returns it to a variable named "train_scaled"
train_scaled = scaler.fit_transform(X_train)

In [None]:
#We then transform our test data with the same fitted scaler
test_scaled = scaler.transform(X_test)

## Deep neural network

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout

In [None]:
model = Sequential([
    Dense(units=16, input_dim = 32,activation='relu'),
    Dense(units=24,activation='relu'),
    Dropout(0.5),
    Dense(20,activation='relu'),
    Dense(24,activation='relu'),
    Dense(1,activation='sigmoid'),
])

In [None]:
model.summary()

##  Training

In [None]:
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])


In [None]:
model.fit(X_train,y_train,batch_size=15,epochs=5)

In [None]:
score = model.evaluate(X_test, y_test)

In [None]:
print(score)

In [None]:
import matplotlib.pyplot as plt
import itertools

from sklearn import svm, datasets
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()

In [None]:
y_pred = model.predict(X_test)
y_test = pd.DataFrame(y_test)

In [None]:
cnf_matrix = confusion_matrix(y_test, y_pred.round())

In [None]:
print(cnf_matrix)

In [None]:
plot_confusion_matrix(cnf_matrix, classes=[0,1])