In [1]:
import json
import numpy as np
import pandas as pd
import tensorflow as tf



In [2]:
train = pd.read_csv("../../../data/feature/cb_train.csv")
test = pd.read_csv("../../../data/feature/cb_validation.csv")
train.columns

Index(['Gender', 'Age', 'Ethnicity', 'Educational_Level', 'Income',
       'Country_region', 'Hotel_Type', 'Meal_Type', 'Visted_Previously',
       'Previous_Cancellations', 'Deposit_type', 'Booking_channel',
       'Required_Car_Parking', 'Use_Promotion', 'Room_Rate',
       'month_Expected_checkin', 'dayofweek_Expected_checkin', 'stay',
       'booking_to_checkin', 'total_participants', 'Reservation_Status'],
      dtype='object')

In [3]:
# define the neural networks
from tensorflow.keras.layers import Input, Embedding, Dense, Reshape, Concatenate, Dropout, BatchNormalization
from tensorflow.keras import Model

def combined_network(cat_vars,categories_dict,cont_vars, layers):
    inputs = []
    embeddings = []
    emb_dict ={}
    # create embedding layer for each categorical variables
    for i in range(len(cat_vars)):
        emb_dict[cat_vars[i]] = Input(shape=(1,))
        emb_sz = get_emb_sz(cat_vars[i],categories_dict)
        vocab = len(categories_dict[cat_vars[i]]) +1
        embedding = Embedding(vocab,emb_sz,input_length=1)(emb_dict[cat_vars[i]])
        embedding = Reshape(target_shape=(emb_sz,))(embedding)
        inputs.append(emb_dict[cat_vars[i]])
        embeddings.append(embedding)
        # concat continuous variables with embedded variables
    cont_input = Input(shape=(len(cont_vars),))
    embedding = BatchNormalization()(cont_input)
    inputs.append(cont_input)
    embeddings.append(embedding)
    x = Concatenate()(embeddings)
        # add user-defined fully-connected layers separated with batchnorm and dropout layers
    for i in range(len(layers)):
        if i ==0:
            x = Dense(layers[i],activation="relu")(x)
        else:
            x = BatchNormalization()(x)
            x = Dropout(0.7)(x)
            x = Dense(layers[i],activation="relu")(x)
    output = Dense(3,activation="softmax")(x)
    model = Model(inputs,output)
    return model

In [4]:
with open('../../../data/feature/enc_map.json', 'r') as pf:
    params = json.load(pf)


In [5]:
def get_emb_sz(cat_col,categories_dict):
    num_classes = len(categories_dict[cat_col])
    return int(min(600,round(1.6*num_classes**0.56)))

In [6]:
categories = {}
for para in params.keys():
    categories[para] = list(params[para].values()) 

In [7]:
cat_vars = ['Gender','Ethnicity', 'Educational_Level',
       'Income', 'Country_region', 'Hotel_Type','Meal_Type', 'Visted_Previously', 'Previous_Cancellations',
       'Deposit_type', 'Booking_channel', 'Required_Car_Parking', 'Use_Promotion']
cont_vars = ['Age', 'Room_Rate','month_Expected_checkin', 'dayofweek_Expected_checkin', 'stay',
       'booking_to_checkin', 'total_participants']

In [8]:
layers = [200,350,200,100]

model = combined_network(cat_vars,categories,cont_vars, layers)
opt = tf.keras.optimizers.Adam(0.001)
model.compile(optimizer=opt,loss='sparse_categorical_crossentropy',metrics=["accuracy"])

In [11]:
model.summary()

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 1)]          0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 1)]          0                                            
_______________________________________________________________________________________

In [9]:
# process x_train input to fit model
input_list = []
for i in cat_vars:
    input_list.append(train[i].values)
input_list.append(train.loc[:,cont_vars].values)
# modify x_test input to fit model
test_list = []
for i in cat_vars:
    test_list.append(test[i].values)
test_list.append(test.loc[:,cont_vars].values)

In [10]:
model.fit(input_list,train.iloc[:, -1]-1,epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x23ec9848ac0>

In [11]:
y_pred = model.predict(test_list)


In [12]:
def func(x):
    return np.argmax(x)
y_preds = np.apply_along_axis(func,1,y_pred)


In [13]:
from sklearn.metrics import f1_score

In [15]:
print(f1_score(test.iloc[:, -1], y_preds+1, average='macro'))

0.2462338456832607
