## Treinamento do modelo

### Importação de bibliotecas e do dataframe tratado

In [1]:
import pandas as pd
import numpy as np
import sagemaker
import boto3
from sagemaker import Session

df = pd.read_csv("dataframe/Hotel Reservations tratado.csv")

In [2]:
df.columns

Index(['label_avg_price_per_room', 'no_of_adults', 'no_of_children',
       'no_of_weekend_nights', 'no_of_week_nights',
       'required_car_parking_space', 'no_of_special_requests',
       'room_type_reserved_Room_Type 1', 'room_type_reserved_Room_Type 2',
       'room_type_reserved_Room_Type 3', 'room_type_reserved_Room_Type 4',
       'room_type_reserved_Room_Type 5', 'room_type_reserved_Room_Type 6',
       'room_type_reserved_Room_Type 7', 'arrival_year_2017',
       'arrival_year_2018', 'arrival_month_1', 'arrival_month_2',
       'arrival_month_3', 'arrival_month_4', 'arrival_month_5',
       'arrival_month_6', 'arrival_month_7', 'arrival_month_8',
       'arrival_month_9', 'arrival_month_10', 'arrival_month_11',
       'arrival_month_12'],
      dtype='object')

In [3]:
df.shape

(36275, 28)

### Separação dos dados de treino e teste

In [4]:
from sklearn.model_selection import train_test_split
X = df.iloc[:,1:len(df.columns)]
y = df.iloc[:,0]

In [5]:
X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.3)
X_treino.shape, X_teste.shape, y_treino.shape, y_teste.shape

((25392, 27), (10883, 27), (25392,), (10883,))

### Definições do modelo sequencial

In [6]:
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
from tensorflow.keras.layers import Dropout, Dense

model = keras.Sequential()
model.add(layers.Dense(54, input_shape=(X.shape[1],), activation='relu'))
model.add(layers.Dense(27, activation='relu'))
model.add(Dropout(0.2))
model.add(layers.Dense(27, activation='relu'))
model.add(Dropout(0.2))
model.add(layers.Dense(27, activation='relu'))
model.add(keras.layers.Dense(3, activation='softmax'))

In [7]:
model.compile(optimizer='adam',
              loss=keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])

### Treinamento do modelo

In [8]:
sequential = model.fit(X_treino, y_treino, epochs=50, batch_size=256, validation_data=(X_teste, y_teste))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


### Tratamento da saída e previsões

In [9]:
teste = np.array([list(X_teste.values)])[0][0]
teste.shape

(27,)

In [10]:
predicoes = model.predict(X_teste)
predicoes



array([[0.06175908, 0.5174547 , 0.42078623],
       [0.18035686, 0.7196771 , 0.09996614],
       [0.6123231 , 0.30878082, 0.07889605],
       ...,
       [0.18090671, 0.66676354, 0.15232982],
       [0.4932276 , 0.4579425 , 0.04882984],
       [0.07603126, 0.36060712, 0.5633616 ]], dtype=float32)

In [11]:
predicoes = list(predicoes)
y_teste = list(y_teste)

In [12]:
predicoes = [list(predicoes[i]).index(max(predicoes[i])) for i in range(len(predicoes))]

In [35]:
Xt = X_teste.values.tolist()
print(f'''Entrada: {Xt[0]}
Saída esperada: {y_teste[0] + 1}''')

Entrada: [1, 0, 0, 2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
Saída esperada: 2


### Testes de acurácia do modelo

In [15]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, confusion_matrix, classification_report, accuracy_score
mae = mean_absolute_error(y_teste, predicoes)
mse = mean_squared_error(y_teste, predicoes)
print('MAE = ', mae, '\nMSE = ', mse)

MAE =  0.3889552513093816 
MSE =  0.45695120830653313


In [16]:
accuracy_score(y_teste,predicoes)

0.6450427271891942

In [17]:
mtx = confusion_matrix(y_teste, predicoes)
mtx

array([[2177,  922,  142],
       [ 924, 2517,  658],
       [ 228,  989, 2326]], dtype=int64)

In [18]:
print(classification_report(y_teste, predicoes))

              precision    recall  f1-score   support

           0       0.65      0.67      0.66      3241
           1       0.57      0.61      0.59      4099
           2       0.74      0.66      0.70      3543

    accuracy                           0.65     10883
   macro avg       0.66      0.65      0.65     10883
weighted avg       0.65      0.65      0.65     10883



In [19]:
model.save('model.h5')

In [32]:
s3 = boto3.client('s3')
with open('model.h5', 'rb') as file:
    s3.upload_fileobj(file, 'modelo-treinado-grupo4', 'modelos/model.h5')