<a href="https://colab.research.google.com/github/G2454/UTFPR-IA-25.2/blob/main/Projeto_Final_Hotel_Reservcations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Base de dados : Hotel Reservations

- https://www.kaggle.com/datasets/ahsan81/hotel-reservations-classification-dataset
- Classe: booking_status (Canceled, Not_Canceled)

In [1]:
import numpy as np
import pandas as pd

# Modelo machine learning
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder

# Validação Cruzada
from sklearn.model_selection import (
    KFold,
    LeaveOneOut,
    StratifiedKFold,
    cross_validate
)

# Métricas
from sklearn.metrics import (recall_score,
                             accuracy_score,
                             precision_score,
                             f1_score)
from sklearn.metrics import classification_report

In [6]:
def carregaBaseDados(nome):

  return pd.read_csv(nome)



def getNumericColumns(dataframe):

  return dataframe.select_dtypes(include=['int64','float64']).columns.tolist()

In [7]:
# Pré-processamento

def preProcessamento(dataframe, rem_cols, transform_column, normalization_cols):

  # Remoção de columnas irrelevantes
  dataframe.drop(rem_cols, axis=1, inplace=True)

  # Transforma os dados da classe satisfaction
  labelencoder = LabelEncoder()
  for col in transform_column:
      dataframe[col] = labelencoder.fit_transform(dataframe[col])

  # Normalização dos dados
  std=StandardScaler()
  dataframe[normalization_cols] = std.fit_transform(dataframe[normalization_cols])

# Separa atributos da classe (X,y)
def separaClasse(dataframe, classe):
  X = dataframe.drop([classe], axis = 1)
  y = dataframe[classe]
  return X,y

# Separa os conjuntos em treino e teste (70%/30%)
# Abordagem Holdout
def separaTreinoTeste(X, y):
  return train_test_split(X, y, test_size=0.3)

# K-fold Cross-validation
def KFCross(model,X,y):
  kf = KFold(n_splits = 10, shuffle = True)
  clf = cross_validate(
      eval(model),
      X,
      y,
      scoring = 'accuracy',
      cv = kf
  )
  return clf

# Gera o modelo preditivo
def geraModelo(modelo, X,y):
  modelo = eval(modelo)
  modelo.fit(X,y)
  return modelo

### Métricas
def metricaReport(y_test, y_pred):
  print(classification_report(y_test, y_pred))

### Realizando testes com os dados:
# Gera o dataframe
df = carregaBaseDados('/content/data.csv')

# Seleciona as colunas numéricas
numeric = getNumericColumns(df)

# Aplica pré-processamento
# Informar os atributos categóricos na ordem em que eles aparecem na figura (cima para baixo)
preProcessamento(df, ['Booking_ID'], ['type_of_meal_plan', 'room_type_reserved', 'market_segment_type', 'booking_status'],
                 numeric)

# Separa atributos e classe
X,y = separaClasse(df, 'booking_status')

# Gerar conjunto treino e teste
X_train, X_test, y_train, y_test = separaTreinoTeste(X, y)

# Avalia os dados
modelo  = geraModelo('DecisionTreeClassifier()', X_train, y_train)
score = modelo.score(X_test, y_test)
y_pred = modelo.predict(X_test)
print(score)

# Avalia o modelo com mais métricas
metricaReport(y_test, y_pred)

#Testando validação cruzada
# Abordagem K-fold Cross-validation
cv = KFCross('DecisionTreeClassifier()', X,y)
print(f"{cv['test_score']}\nMedia: {np.mean(cv['test_score'])}")

0.8671322245704309
              precision    recall  f1-score   support

           0       0.79      0.80      0.80      3518
           1       0.90      0.90      0.90      7365

    accuracy                           0.87     10883
   macro avg       0.85      0.85      0.85     10883
weighted avg       0.87      0.87      0.87     10883

[0.8693495  0.87789416 0.87541345 0.86990077 0.87458655 0.8808933
 0.86958919 0.86821064 0.87124345 0.87344913]
Media: 0.8730530150418474
