In [None]:
!pip install boto3 pandas scikit-learn 

In [None]:
import pandas as pd
import numpy as np
from sklearn.decomposition import TruncatedSVD
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import confusion_matrix, accuracy_score
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('Hotel Reservations.csv')

In [None]:
def classify_price(avg_price):
    if avg_price <= 85:
        return 1
    elif avg_price < 115:
        return 2
    else:
        return 3

In [None]:
df['label_avg_price_per_room'] = df['avg_price_per_room'].apply(classify_price)
df.drop(columns=['avg_price_per_room'], inplace=True)

In [None]:
categorical_columns = df.select_dtypes(include=['object']).columns
numeric_columns = df.select_dtypes(include=['number']).columns.drop('label_avg_price_per_room')

In [None]:
features = df.drop(columns=['label_avg_price_per_room'])
labels = df['label_avg_price_per_room']

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numeric_columns),
        ('cat', OneHotEncoder(), categorical_columns)
    ])

pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('reduce_dim', TruncatedSVD(n_components=2))
])

In [None]:
features_reduced = pipeline.fit_transform(features)

In [None]:
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(features_reduced)
predictions = kmeans.predict(features_reduced)

In [None]:
print("Confusion Matrix:")
print(confusion_matrix(labels, predictions))

In [None]:
print("Accuracy Score:")
print(accuracy_score(labels, predictions))

In [None]:
plt.scatter(features_reduced[:, 0], features_reduced[:, 1], c=predictions, cmap='viridis', marker='o', edgecolor='k', s=50)
plt.title('Clusters após TruncatedSVD')
plt.xlabel('Componente Principal 1')
plt.ylabel('Componente Principal 2')
plt.show()