In [0]:
import io
import tensorflow as tf
%tensorflow_version 2.x
import functools
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, roc_curve, confusion_matrix, auc

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
from google.colab import files
uploaded = files.upload()

Saving hotel_bookings.csv to hotel_bookings.csv


In [0]:
hotel_data = pd.read_csv(io.BytesIO(uploaded['hotel_bookings.csv']))

In [0]:
df_test = pd.read_csv(io.BytesIO(uploaded['hotel_bookings.csv']))

float_cols = [c for c in df_test if df_test[c].dtype == 'float32']
float64_cols = {c: np.float64 for c in float_cols}

hotel_data = pd.read_csv(io.BytesIO(uploaded['hotel_bookings.csv']), engine = 'c', dtype = float64_cols)

In [0]:
final_data = hotel_data.copy()

In [0]:
le = preprocessing.LabelEncoder()

# Dropping the 'company' column
final_data = final_data.drop(['company'], axis = 1)

# Remove the null values in the country columns

indices = final_data.loc[pd.isna(final_data['country']), :].index  
final_data = final_data.drop(final_data.index[indices])  

final_data['country'] = le.fit_transform(final_data['country']) 

final_data = pd.get_dummies(data = final_data, columns = ['hotel','meal', 'market_segment'
                                                            , 'distribution_channel', 'reserved_room_type', 'deposit_type'
                                                            , 'assigned_room_type', 'customer_type', 'reservation_status'])

final_data = final_data.drop(columns = [ 'reservation_status_Canceled', 'reservation_status_Check-Out'
                                          , 'reservation_status_No-Show', 'arrival_date_day_of_month'
                                          , 'stays_in_weekend_nights', 'children', 'arrival_date_week_number'
                                          , 'arrival_date_week_number', 'arrival_date_month', 'agent'
                                          , 'reservation_status_date'], axis = 1)

label = final_data['is_canceled']


final_data = final_data.drop(['is_canceled'], axis =1)

In [0]:
x_train, x_test, y_train, y_test = train_test_split(final_data, label, test_size = 0.20, random_state = 42)

## Setting up the model parameters

In [0]:
num_classes = 2 # total number of outputs (in this case, cancelled or not cancelled, hence 2)
num_features = 63 # data features (you can check for the input size by using the shape function, in this case, 63)

# Training parameters.
learning_rate = 0.00001
training_steps = 63000
batch_size = 128
display_step = 315

# Network parameters.
n_hidden_1 = 250 # 1st layer number of neurons.
n_hidden_2 = 250 # 2nd layer number of neurons.
n_hidden_3 = 250 # 3rd layer number of neurons.
n_hidden_4 = 250 # 4th layer number of neurons.

In [0]:
# Using the tf.data API to shuffle and batch the data
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.repeat().shuffle(5000).batch(batch_size).prefetch(1)

In [0]:
# Next up, we will be defining the layers of weights and bias for the hidden layers

# For now, I will create a random value generator to initialize the weights
random_normal = tf.initializers.RandomNormal()

weights = {
    'h1': tf.Variable(random_normal([num_features, n_hidden_1])),
    'h2': tf.Variable(random_normal([n_hidden_1, n_hidden_2])),
    'h3': tf.Variable(random_normal([n_hidden_2, n_hidden_3])),
    'h4': tf.Variable(random_normal([n_hidden_3, n_hidden_4])),
    'out': tf.Variable(random_normal([n_hidden_4, num_classes])),
}
biases = {
    'b1': tf.Variable(random_normal([n_hidden_1])),
    'b2': tf.Variable(random_normal([n_hidden_2])),
    'b3': tf.Variable(random_normal([n_hidden_3])),
    'b4': tf.Variable(random_normal([n_hidden_4])),
    'out': tf.Variable(random_normal([num_classes])),
}

In [0]:
# Next will be creating the model

def neural_net(x):
    
    # This will be to convert the input into float32 input as some inputs are still in int format
    # Without this line, the hidden layers will return an error requesting for a float input
    x = tf.cast(x, dtype = tf.float32)

    # 1st hidden fully connected layer with 250 neurons.
    layer_1 = tf.add(tf.matmul(x, weights['h1']), biases['b1'])
    # Apply relu to layer_1 output for non-linearity.
    layer_1 = tf.nn.relu(layer_1)
    
    # 2nd hidden fully connected layer with 250 neurons.
    layer_2 = tf.add(tf.matmul(layer_1, weights['h2']), biases['b2'])
    # Apply relu to layer_2 output for non-linearity.
    layer_2 = tf.nn.relu(layer_2)

    # 3rd hidden fully connected layer with 250 neurons.
    layer_3 = tf.add(tf.matmul(layer_2, weights['h3']), biases['b3'])
    # Apply relu to layer_2 output for non-linearity.
    layer_3 = tf.nn.relu(layer_3)

    # 4th hidden fully connected layer with 250 neurons.
    layer_4 = tf.add(tf.matmul(layer_3, weights['h4']), biases['b4'])
    # Apply relu to layer_2 output for non-linearity.
    layer_4 = tf.nn.relu(layer_4)
    
    # 5th hutput fully connected layer with a neuron for each class.
    out_layer = tf.matmul(layer_4, weights['out']) + biases['out']
    # Apply softmax to normalize the logits to a probability distribution.
    return tf.nn.softmax(out_layer)

In [0]:
# Defining the cross-Entropy loss function

def cross_entropy(y_pred, y_true):
    # Encode label to a one hot vector.
    y_true = tf.one_hot(y_true, depth = num_classes)
    # Clip prediction values to avoid log(0) error.
    y_pred = tf.clip_by_value(y_pred, 1e-9, 1.)
    # Compute cross-entropy.
    return tf.reduce_mean(-tf.reduce_sum(y_true * tf.math.log(y_pred)))

# Defining the accuracy metric.
def accuracy(y_pred, y_true):
    # Predicted class is the index of highest score in prediction vector (i.e. argmax).
    correct_prediction = tf.equal(tf.argmax(y_pred, 1), tf.cast(y_true, tf.int64))
    return tf.reduce_mean(tf.cast(correct_prediction, tf.float32), axis = -1)

# XXXXXX optimizer.
optimizer = tf.optimizers.Adam(learning_rate)

In [0]:
# Lastly, we will define the optimization process. 
def run_optimization(x, y):
    # The computation will be wrapped inside a GradientTape for automatic differentiation
    with tf.GradientTape() as g:
        pred = neural_net(x)
        loss = cross_entropy(pred, y)
        
    # Combining the weights and values as a list so as to update the gradients
    trainable_variables = list(weights.values()) + list(biases.values())

    # The loss and combinedlist from above  will form the gradient
    gradients = g.gradient(loss, trainable_variables)
    
    # Finally, apply the gradient to the optimizer
    optimizer.apply_gradients(zip(gradients, trainable_variables))

In [76]:
# Finally! We can run our algorithm
# This is where we will create a for loop to run according to the batch size, training steps and other parameters we defined earlier 

for step, (batch_x, batch_y) in enumerate(train_data.take(training_steps), 1):
    # After every defined batch size, the next step is to update the gradient accordingly
    run_optimization(batch_x, batch_y)

    # To print and update the user along the way
    if step % display_step == 0:
        pred = neural_net(batch_x)
        loss = cross_entropy(pred, batch_y)
        acc = accuracy(pred, batch_y)
        print('step: %i, loss: %f, accuracy: %f' % (step, loss, acc))

step: 315, loss: 68.515259, accuracy: 0.726562
step: 630, loss: 75.311493, accuracy: 0.687500
step: 945, loss: 71.258560, accuracy: 0.726562
step: 1260, loss: 77.212036, accuracy: 0.687500
step: 1575, loss: 77.024399, accuracy: 0.703125
step: 1890, loss: 65.100639, accuracy: 0.757812
step: 2205, loss: 73.953903, accuracy: 0.710938
step: 2520, loss: 65.477707, accuracy: 0.781250
step: 2835, loss: 78.550522, accuracy: 0.656250
step: 3150, loss: 64.385620, accuracy: 0.812500
step: 3465, loss: 62.413155, accuracy: 0.773438
step: 3780, loss: 65.786057, accuracy: 0.742188
step: 4095, loss: 64.598839, accuracy: 0.734375
step: 4410, loss: 66.624352, accuracy: 0.750000
step: 4725, loss: 72.085754, accuracy: 0.695312
step: 5040, loss: 70.418182, accuracy: 0.750000
step: 5355, loss: 58.702438, accuracy: 0.804688
step: 5670, loss: 71.550842, accuracy: 0.734375
step: 5985, loss: 61.305546, accuracy: 0.757812
step: 6300, loss: 62.890717, accuracy: 0.757812
step: 6615, loss: 71.498375, accuracy: 0.69

In [77]:
# Test model on validation set.
pred = neural_net(x_test)
print('Test Accuracy: %f' % accuracy(pred, y_test))

Test Accuracy: 0.824986
