# Prevendo Cancelamento de Reservas em Hotel

## Instalando e Importando Pacotes

In [3]:
!pip install -q pandas_profiling

You should consider upgrading via the '/home/lucas/anaconda3/bin/python -m pip install --upgrade pip' command.[0m


In [4]:
!pip install --upgrade pip

Defaulting to user installation because normal site-packages is not writeable
Collecting pip
  Downloading pip-20.3.3-py2.py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 11.4 MB/s eta 0:00:01
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 20.3
    Uninstalling pip-20.3:
      Successfully uninstalled pip-20.3
Successfully installed pip-20.3.3


In [7]:
# Imports

# Manipulacao e Visualizacao de Dados
import numpy as np 
import pandas as pd 
import matplotlib as m 
import matplotlib.pyplot as plt 
import seaborn as sns 
import pandas_profiling 
from pandas_profiling import ProfileReport

# Pre-Processing and Evaluation
import sklearn 
from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import MinMaxScaler 
from sklearn.metrics import accuracy_score

# Deep Learning
import tensorflow 
from keras.models import Sequential 
from keras.layers import Dense, Input, Dropout 
from keras.losses import categorical_crossentropy 
from keras.utils import to_categorical 
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.optimizers import Nadam

# Graphics
from matplotlib.pylab import rcParams
rcParams['axes.labelsize'] = 14
rcParams['xtick.labelsize'] = 12
rcParams['ytick.labelsize'] = 12
rcParams['text.color'] = 'k'
rcParams['figure.max_open_warning'] = 30
rcParams['figure.figsize'] = 10,8
m.style.use('ggplot')
%matplotlib inline

In [8]:
# Versões dos pacotes usados neste jupyter notebook
%reload_ext watermark
%watermark -a "Data Science Academy" --iversions

numpy            1.18.1
pandas_profiling 2.9.0
matplotlib       3.3.3
sklearn          0.23.1
autopep8         1.4.4
json             2.0.9
pandas           1.1.5
seaborn          0.11.0
tensorflow       2.3.0
Data Science Academy


In [9]:
# Listando todos os devices disponiveis
from tensorflow.python.client import device_lib 
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 6452783212051349594
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 6685507443031723522
physical_device_desc: "device: XLA_CPU device"
]


In [11]:
import tensorflow as tf
print("Número Disponível de GPUs: ", len(tf.config.experimental.list_physical_devices('GPU')))

Número Disponível de GPUs:  0


In [12]:
# Lista o código de cada GPU
tf.config.list_physical_devices('GPU')

[]

## Carregando os Dados

In [13]:
# Loading dataset
dataset = pd.read_csv("dados/hotel_bookings.csv")

In [14]:
dataset.shape

(119390, 32)

In [16]:
dataset.head(5)

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,children,babies,meal,country,market_segment,distribution_channel,is_repeated_guest,previous_cancellations,previous_bookings_not_canceled,reserved_room_type,assigned_room_type,booking_changes,deposit_type,agent,company,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,0.0,0,BB,PRT,Direct,Direct,0,0,0,C,C,3,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,0.0,0,BB,PRT,Direct,Direct,0,0,0,C,C,4,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,0.0,0,BB,GBR,Direct,Direct,0,0,0,A,C,0,No Deposit,,,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,0.0,0,BB,GBR,Corporate,Corporate,0,0,0,A,A,0,No Deposit,304.0,,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,0.0,0,BB,GBR,Online TA,TA/TO,0,0,0,A,A,0,No Deposit,240.0,,0,Transient,98.0,0,1,Check-Out,2015-07-03


## Analise Exploratoria

In [None]:
prof = ProfileReport(dataset)
prof

HBox(children=(HTML(value='Summarize dataset'), FloatProgress(value=0.0, max=46.0), HTML(value='')))




HBox(children=(HTML(value='Generate report structure'), FloatProgress(value=0.0, max=1.0), HTML(value='')))

In [None]:
prof.to_file(output_file = 'relatorio.html')

In [19]:
# Numero de Colunas
print(len(dataset.columns))

32


In [20]:
dataset.dtypes

hotel                              object
is_canceled                         int64
lead_time                           int64
arrival_date_year                   int64
arrival_date_month                 object
arrival_date_week_number            int64
arrival_date_day_of_month           int64
stays_in_weekend_nights             int64
stays_in_week_nights                int64
adults                              int64
children                          float64
babies                              int64
meal                               object
country                            object
market_segment                     object
distribution_channel               object
is_repeated_guest                   int64
previous_cancellations              int64
previous_bookings_not_canceled      int64
reserved_room_type                 object
assigned_room_type                 object
booking_changes                     int64
deposit_type                       object
agent                             

In [27]:
# Checking Collumns are String
for column, series in dataset.items():
    if str(type(series[0])) == "<class 'str'>":
        print(column)

hotel
arrival_date_month
meal
country
market_segment
distribution_channel
reserved_room_type
assigned_room_type
deposit_type
customer_type
reservation_status
reservation_status_date


In [28]:
# Checking Missing Values
dataset.isnull().sum()

hotel                                  0
is_canceled                            0
lead_time                              0
arrival_date_year                      0
arrival_date_month                     0
arrival_date_week_number               0
arrival_date_day_of_month              0
stays_in_weekend_nights                0
stays_in_week_nights                   0
adults                                 0
children                               4
babies                                 0
meal                                   0
country                              488
market_segment                         0
distribution_channel                   0
is_repeated_guest                      0
previous_cancellations                 0
previous_bookings_not_canceled         0
reserved_room_type                     0
assigned_room_type                     0
booking_changes                        0
deposit_type                           0
agent                              16340
company         

## Cleaning and Transformation Data

In [29]:
dataset = dataset.drop(['company'], axis = 1)

In [None]:
dataset = dataset.drop(['country'], axis = 1)