In [None]:
!pip install tensorflow_recommenders

In [None]:
import tensorflow as tf
import tensorflow_recommenders as tfrs
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from google.colab import drive

# ML

## Modelo de Recomendacion de Machine Learning utilizando Tensorflow Recommenders

MONTAJE DE ARCHIVOS GOOGLE DRIVE:

In [None]:
drive.mount('/content/drive')

In [None]:
user_reviews_content = '/content/drive/MyDrive/Colab Notebooks/HotelWiseML/Hoteles.NLP.01.NLTK.parquet'
user_reviews_dataset = pd.read_parquet(user_reviews_content)

MONTAJE DE ARCHIVOS LOCAL:

In [None]:
user_reviews_content = 'Hoteles.NLP.01.NLTK.parquet'
user_reviews_dataset = pd.read_parquet(user_reviews_content)

## Funciones

In [None]:
# Preprocesamiento de datos
unique_hotels = user_reviews_dataset['name'].unique()
unique_cities = user_reviews_dataset['city'].unique()

# Mapear ids de hoteles y usuarios en los datos
hotel_to_index = {hotel: i for i, hotel in enumerate(unique_hotels)}
city_to_index = {city: i for i, city in enumerate(unique_cities)}

# Agregar columnas de índices de hoteles y usuarios al DataFrame
user_reviews_dataset['hotel_index'] = user_reviews_dataset['name'].map(hotel_to_index)
user_reviews_dataset['city_index'] = user_reviews_dataset['city'].map(city_to_index)

# Dividir datos en conjunto de entrenamiento y prueba
train_data, test_data = train_test_split(user_reviews_dataset, test_size=0.2, random_state=42)

# Configurar modelo y entrenamiento
embedding_dimension = 32

# Definir capas de embedding para hoteles, ciudades y características
hotel_embedding = tf.keras.layers.Embedding(input_dim=len(unique_hotels), output_dim=embedding_dimension)
city_embedding = tf.keras.layers.Embedding(input_dim=len(unique_cities), output_dim=embedding_dimension)
security_embedding = tf.keras.layers.Embedding(input_dim=len(user_reviews_dataset['security'].unique()), output_dim=embedding_dimension)
rating_embedding = tf.keras.layers.Embedding(input_dim=len(user_reviews_dataset['avg_rating'].unique()), output_dim=embedding_dimension)
sentiment_embedding = tf.keras.layers.Embedding(input_dim=len(user_reviews_dataset['sentiment_analysis'].unique()), output_dim=embedding_dimension)

# Definir modelo de recomendación
class HotelModel(tfrs.Model):
    def __init__(self, unique_hotels, unique_cities):
        super().__init__()
        embedding_dimension = 32

        # Definir capas de embedding para hoteles, ciudades y características
        self.hotel_embeddings = tf.keras.layers.Embedding(input_dim=len(unique_hotels), output_dim=embedding_dimension)
        self.city_embeddings = tf.keras.layers.Embedding(input_dim=len(unique_cities), output_dim=embedding_dimension)
        self.security_embeddings = tf.keras.layers.Embedding(input_dim=len(user_reviews_dataset['security'].unique()), output_dim=embedding_dimension)
        self.rating_embeddings = tf.keras.layers.Embedding(input_dim=len(user_reviews_dataset['avg_rating'].unique()), output_dim=embedding_dimension)
        self.sentiment_embeddings = tf.keras.layers.Embedding(input_dim=len(user_reviews_dataset['sentiment_analysis'].unique()), output_dim=embedding_dimension)

        # Definir capa de producto punto para calcular similitud entre usuarios y hoteles
        self.task = tfrs.tasks.Retrieval()

    def compute_loss(self, features, training=False):
        # Cálculo de embeddings
        hotel_embeddings = self.hotel_embeddings(features['name'])
        city_embeddings = self.city_embeddings(features['city'])
        security_embeddings = self.security_embeddings(features['security'])
        rating_embeddings = self.rating_embeddings(features['avg_rating'])
        sentiment_embeddings = self.sentiment_embeddings(features['sentiment_analysis'])

        # Combinar embeddings
        feature_embeddings = hotel_embeddings + city_embeddings + security_embeddings + rating_embeddings + sentiment_embeddings

        # Obtener embeddings de candidatos
        candidate_embeddings = self.hotel_embeddings(features['hotel_index'])

        return self.task(feature_embeddings, candidate_embeddings, compute_metrics=False)

# Crear dataset de TensorFlow
train_dataset = tf.data.Dataset.from_tensor_slices(dict(train_data))
test_dataset = tf.data.Dataset.from_tensor_slices(dict(test_data))

# Crear modelo y compilar
model = HotelModel(unique_hotels, unique_cities)
model.compile(optimizer=tf.keras.optimizers.Adagrad(0.1))
model.fit(train_dataset.batch(32), epochs=10)

# Solicitar entrada del usuario para la ciudad
city_name = 'Miami Beach'  # Por ejemplo, 'Miami Beach'

# Obtener índice de la ciudad
city_index = city_to_index[city_name]

# Crear entrada de características para la ciudad deseada
city_features = {
    'hotel_index': np.array([i for i in range(len(unique_hotels))]),
    'city_index': np.array([city_index] * len(unique_hotels)),
    'security': np.array([security_value for security_value in user_reviews_dataset['security'].unique()]),
    'avg_rating': np.array([avg_rating_value for avg_rating_value in user_reviews_dataset['avg_rating'].unique()]),
    'sentiment_analysis': np.array([sentiment_value for sentiment_value in user_reviews_dataset['sentiment_analysis'].unique()])
}

# Realizar la recomendación para la ciudad deseada
query_embeddings = model(city_features)
top_recommendations = model.task(query_embeddings)

# Obtener los índices de los hoteles recomendados
top_hotel_indices = top_recommendations[1].numpy()

# Obtener los nombres de los hoteles recomendados
recommended_hotels = [unique_hotels[index] for index in top_hotel_indices]

# Imprimir los nombres de los hoteles recomendados
print("Los mejores hoteles recomendados para", city_name, "son:")
for hotel in recommended_hotels:
    print(hotel)
