In [1]:
import os
import sys
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Conv2D, Flatten, Conv1D, Input, Dropout, MaxPooling1D
from keras import optimizers
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

In [2]:
filehandler = open("../data/sample_data/hotel_reservation_only_bottlenecks_sample.pkl","rb")
dat = pickle.load(filehandler)
filehandler.close()
print(type(dat))
print(dat.shape)

<class 'pandas.core.frame.DataFrame'>
(3999996, 7)


In [3]:
batch_size = 4

In [4]:
df = dat.copy()
X = df.drop(columns = "label")
y = df.loc[:, "label"]

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = X_train.reset_index()
X_train = X_train.drop(columns = "index")
X_test = X_test.reset_index()
X_test = X_test.drop(columns = "index")

y_train = y_train.reset_index()
y_train = y_train.drop(columns = "index")
y_train = y_train.loc[:, "label"]
y_test = y_test.reset_index()
y_test = y_test.drop(columns = "index")
y_test = y_test.loc[:, "label"]


In [6]:
X_count = X_train.shape[0]
batch_count= X_count // batch_size

X_train_batch_list = []
y_train_batch_list = []


shuffled_range = range(X_count)
shuffled_X = X_train.iloc[shuffled_range, :]
shuffled_Y = [y_train[i] for i in shuffled_range]
for i in range(0, 10):

    y_train_batch = shuffled_Y[i*batch_size :(i +1)* batch_size]
    X_train_batch = shuffled_X[i*batch_size :(i +1)* batch_size]

    y_train_batch_list.append(y_train_batch)
    X_train_batch_list.append(X_train_batch)
        
        # vectorizer.fit_transform(train_data[i:i + batchSize])
        # Update the classifier with documents in the current mini-batch


In [7]:
train_data_pd = X_train_batch_list[0]
train_label = y_train_batch_list[0]
train_data = train_data_pd.to_numpy()

sample_size = train_data.shape[0] # number of samples in train set
time_steps  = train_data.shape[1] # number of features in train set
input_dimension = 1         

train_data_reshaped = train_data.reshape(sample_size,time_steps,input_dimension)
print("After reshape train data set shape:\n", train_data_reshaped.shape)
print("1 Sample shape:\n",train_data_reshaped[0].shape)
print("An example sample:\n", train_data_reshaped[0])


After reshape train data set shape:
 (4, 6, 1)
1 Sample shape:
 (6, 1)
An example sample:
 [[0.87874428]
 [0.02903283]
 [0.01747466]
 [0.02536331]
 [0.02607459]
 [0.02331032]]


In [8]:
def build_conv1D_model():
    n_timesteps = train_data_reshaped.shape[1] 
    n_features  = train_data_reshaped.shape[2] 
    print("Shape of the data input is: ", n_timesteps, n_features)
    model = None
    model = Sequential()
    model.add(Input(shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters = 64, kernel_size = 2, activation = 'relu'))
    # model.add(Dropout(0.2))
    model.add(Conv1D(filters = 64, kernel_size = 2, activation = 'relu'))
    model.add(Conv1D(filters = 64, kernel_size = 1, activation = 'relu'))
    model.add(Conv1D(filters = 64, kernel_size = 1, activation = 'relu'))
    model.add(Conv1D(filters = 64, kernel_size = 1, activation = 'relu'))
    model.add(Conv1D(filters = 64, kernel_size = 1, activation = 'relu'))
    model.add(Conv1D(filters = 64, kernel_size = 1, activation = 'relu'))
    model.add(Conv1D(filters = 64, kernel_size = 1, activation = 'relu'))
    model.add(Flatten())
    model.add(Dense(6, activation = 'softmax'))

    optimizer = "adam"

    model.compile(loss='sparse_categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

model_conv1D = build_conv1D_model()
model_conv1D.summary()

Shape of the data input is:  6 1
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 5, 64)             192       
                                                                 
 conv1d_1 (Conv1D)           (None, 4, 64)             8256      
                                                                 
 conv1d_2 (Conv1D)           (None, 4, 64)             4160      
                                                                 
 conv1d_3 (Conv1D)           (None, 4, 64)             4160      
                                                                 
 conv1d_4 (Conv1D)           (None, 4, 64)             4160      
                                                                 
 conv1d_5 (Conv1D)           (None, 4, 64)             4160      
                                                                 
 conv1d_6 (Conv1D)     

In [9]:
X_train_arr = np.asarray(X_train.to_numpy())
y_train_arr = np.asarray(y_train.to_numpy())
history = model_conv1D.fit(X_train_arr, y_train_arr, epochs=10,
                            verbose=1, batch_size = 4)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
acc = model_conv1D.evaluate(X_train, y_train)
print("Loss:", acc[0], " Accuracy:", acc[1])

Loss: 0.031602367758750916  Accuracy: 0.9891618490219116


In [11]:
pred = model_conv1D.predict(X_test)
pred_y = pred.argmax(axis=-1)
cm = confusion_matrix(y_test, pred_y)

In [12]:
cm

array([[132237,    246,    185,    161,    203,    262],
       [   368, 131569,    250,    230,    296,    342],
       [   319,    384, 131707,    142,    227,    323],
       [   370,    337,    304, 132123,    296,    261],
       [   313,    319,    212,    267, 131809,    360],
       [   410,    457,    333,    277,    337, 131764]], dtype=int64)

In [13]:
print(accuracy_score(y_test, pred_y))

0.98901125
