In [None]:
!pip install sagemaker boto3 pandas scikit-learn xgboost

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

In [None]:
dataset = pd.read_csv('Hotel Reservations.csv')

In [None]:
def categorize_price(price):
    if price <= 85:
        return 1
    elif price < 115:
        return 2
    else:
        return 3

dataset['label_avg_price_per_room'] = dataset['avg_price_per_room'].apply(categorize_price)

dataset = dataset.drop(columns=['avg_price_per_room'])

In [None]:
categorical_columns = dataset.select_dtypes(include=['object']).columns

label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    dataset[col] = le.fit_transform(dataset[col])
    label_encoders[col] = le

In [None]:
X = dataset.drop(columns=['label_avg_price_per_room'])
y = dataset['label_avg_price_per_room']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
dtrain = xgb.DMatrix(X_train_scaled, label=y_train)
dtest = xgb.DMatrix(X_test_scaled, label=y_test)

params = {
    'objective': 'multi:softmax',
    'num_class': 4,
    'max_depth': 6,
    'eta': 0.3,
    'eval_metric': 'mlogloss'
}

bst = xgb.train(params, dtrain, num_boost_round=100, evals=[(dtest, 'test')])

In [None]:
preds = bst.predict(dtest)

accuracy = accuracy_score(y_test, preds)
print(f'Acurácia: {accuracy:.2f}')