<a href="https://colab.research.google.com/github/Javier19-cmd/Laboratorio6-7-DeepLearning/blob/main/Lab67_DL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Sistema de recomendación en base al contenido

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
import os
import tensorflow as tf

# Configurando la TPU en Google Colab
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)

with strategy.scope():
    # 1. Cargando de datos y preprocesamiento (se está usando el 10% de los datos)
    data = pd.read_csv('/content/drive/MyDrive/Lab67DL/Books.csv')
    data = data.sample(frac=0.1, random_state=1).reset_index(drop=True)  # Usando el 10% de los datos

    # 2. Creación de perfiles de contenido
    data['Content'] = data['Book-Title'] + ' ' + data['Book-Author'] + ' ' + data['Year-Of-Publication'].astype(str) + ' ' + data['Publisher']
    data['Content'].fillna('ValorPredeterminado', inplace=True)

    print(data["Book-Title"])

    # 3. Vectorizando el texto (TF-IDF)
    tfidf_vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = tfidf_vectorizer.fit_transform(data['Content'])

    # 4. Calculando la similitud del texto (similitud coseno)
    cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

    # 5. Generación de recomendaciones
    def get_recommendations(book_title, cosine_sim=cosine_sim):
        if book_title in data['Book-Title'].values:
            idx = data[data['Book-Title'] == book_title].index[0]  # Obtiene el índice del primer libro coincidente en el nuevo conjunto de datos
            if idx < len(cosine_sim):  # Asegúrate de que el índice esté dentro del rango de la matriz de similitud del coseno
                sim_scores = list(enumerate(cosine_sim[idx]))
                sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
                sim_scores = sim_scores[1:11]  # Recomienda los 10 libros más similares (excluyendo el libro de entrada)
                book_indices = [i[0] for i in sim_scores]
                return data['Book-Title'].iloc[book_indices]
            else:
                return pd.Series([])  # Devuelve una Serie vacía si el índice está fuera del rango de la matriz de similitud del coseno
        else:
            return pd.Series([])  # Devuelve una Serie vacía si el libro no se encuentra en el conjunto de datos

  data = pd.read_csv('/content/drive/MyDrive/Lab67DL/Books.csv')


0                                       LOST IN THE MUSEUM
1                                   Skinned Alive: Stories
2         Silly Mid-off: How to Survive the Cricket Season
3                        El Pianista del Gueto de Varsovia
4        Merry Wives of Windsor (Arden Shakespeare Seco...
                               ...                        
27131    Valley of Horses (Thorndike Large Print Basic ...
27132    An Elizabethan Progress: The Queen's Journey i...
27133    Millionaire'S Pregnant Bride (Texas Cattleman'...
27134                                             Rosewood
27135     Data &amp; Computer Communications (6th Edition)
Name: Book-Title, Length: 27136, dtype: object


In [None]:
libro = "Skinned Alive: Stories"

if libro in data['Book-Title'].values:
    recommendations = get_recommendations(libro)
    print("La recomendación es:")
    print(recommendations)
else:
  print(f"'{libro}' no se encuentra en el conjunto de datos.")

La recomendación es:
4788                      Fanny: A Fiction (White, Edmund)
17816                                     Forgetting Elena
5008                                  Sketches from Memory
6696                    The Enchanted Land (Romance Alive)
26573      To the Wedding: A Novel (Vintage International)
19116                                          Another You
470                        Our Paris: Sketches from Memory
8533          River Dogs: Stories (Vintage Contemporaries)
18308    The Safety of Objects: Stories (Vintage Contem...
10586                      Nightrose (Romance Alive Audio)
Name: Book-Title, dtype: object


# Sistema de recomendación en base al filtro colaborativo

NNRM

In [33]:
import pandas as pd
import numpy as np
import tensorflow as tf
from keras.layers import Input, Embedding, Flatten, Concatenate, Dense
from keras.models import Model
from sklearn.preprocessing import LabelEncoder

# Configurar la TPU en Google Colab
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.experimental.TPUStrategy(resolver)

with strategy.scope():
    # Carga los datos
    users_data = pd.read_csv('/content/drive/MyDrive/Lab67DL/Users.csv')
    books_data = pd.read_csv('/content/drive/MyDrive/Lab67DL/Books.csv')
    ratings_data = pd.read_csv('/content/drive/MyDrive/Lab67DL/Ratings.csv')

    users_data = users_data.sample(frac=0.1, random_state=1).reset_index(drop=True)
    books_data = books_data.sample(frac=0.1, random_state=1).reset_index(drop=True)
    ratings_data = ratings_data.sample(frac=0.1, random_state=1).reset_index(drop=True)

    ratings_data['ISBN'] = ratings_data['ISBN'].str.rstrip('X')

    label_encoder = LabelEncoder()
    ratings_data['ISBN'] = label_encoder.fit_transform(ratings_data['ISBN'])

    # Definir las incrustaciones de usuario y libro
    num_users = len(users_data) + 1
    num_books = len(books_data) + 1
    embedding_dim = 200

    user_input = Input(shape=[1], name="User-Input")
    user_embedding = Embedding(num_users, embedding_dim, name="User-Embedding")(user_input)
    user_vec = Flatten(name="Flatten-Users")(user_embedding)

    book_input = Input(shape=[1], name="Book-Input")
    book_embedding = Embedding(num_books, embedding_dim, name="Book-Embedding")(book_input)
    book_vec = Flatten(name="Flatten-Books")(book_embedding)

    # Concatenar las incrustaciones de usuario y libro
    concat = Concatenate()([user_vec, book_vec])

    # Capas densas para la predicción de calificación
    dense1 = Dense(128, activation='relu')(concat)
    dense2 = Dense(64, activation='relu')(dense1)
    output = Dense(1, activation='linear')(dense2)

    model = Model(inputs=[user_input, book_input], outputs=output)
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_squared_error'])

    history = model.fit([ratings_data['User-ID'], ratings_data['ISBN']], ratings_data['Book-Rating'], epochs=30, verbose=1)

# Hacer predicciones
user_id = 123  # ID de usuario deseado
book_id = 456  # ID de libro deseado
predicted_rating = model.predict([np.array([user_id]), np.array([book_id])])

print(f'Predicción de calificación para el usuario {user_id} y el libro {book_id}: {predicted_rating[0][0]}')

  books_data = pd.read_csv('/content/drive/MyDrive/Lab67DL/Books.csv')


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
Predicción de calificación para el usuario 123 y el libro 456: 1.015446662902832


In [None]:
# import tensorflow as tf
# import pandas as pd
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import LabelEncoder
# import os

# # Carga los datos
# users_data = pd.read_csv('/content/drive/MyDrive/Lab67DL/Users.csv')
# books_data = pd.read_csv('/content/drive/MyDrive/Lab67DL/Books.csv')
# ratings_data = pd.read_csv('/content/drive/MyDrive/Lab67DL/Ratings.csv')

# ratings_data['ISBN'] = ratings_data['ISBN'].str.rstrip('X')

# label_encoder = LabelEncoder()
# ratings_data['ISBN'] = label_encoder.fit_transform(ratings_data['ISBN'])

# # Divide los datos en conjuntos de entrenamiento y prueba
# X_train, X_test, y_train, y_test = train_test_split(ratings_data[["User-ID", "ISBN"]], ratings_data['Book-Rating'], test_size=0.2, random_state=42)

# print(X_train.shape)
# print(y_train.shape)

# # Convierte el modelo para usar TPU
# resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])
# tf.config.experimental_connect_to_cluster(resolver)
# tf.tpu.experimental.initialize_tpu_system(resolver)
# strategy = tf.distribute.experimental.TPUStrategy(resolver)

# with strategy.scope():
#     model = tf.keras.Sequential([
#         tf.keras.layers.Embedding(input_dim=len(ratings_data['ISBN'].unique()) + 1, output_dim=64),
#         tf.keras.layers.Dense(256, activation='relu'),  # Capa oculta con 256 neuronas
#         tf.keras.layers.Dense(128, activation='relu'),  # Capa oculta con 128 neuronas
#         tf.keras.layers.Dense(64, activation='relu'),   # Otra capa oculta con 64 neuronas
#         tf.keras.layers.Dense(32, activation='relu'),   # Capa oculta con 32 neuronas
#         tf.keras.layers.Dense(16, activation='relu'),   # Capa oculta con 16 neuronas
#         tf.keras.layers.Dense(8, activation='relu'),    # Capa oculta con 8 neuronas
#         tf.keras.layers.Dense(1)
#     ])



#     # Compila el modelo
#     model.compile(loss='mean_squared_error', optimizer='adam')

# print(model.summary())

# with strategy.scope():

#   # Entrena el modelo
#   model.fit(X_train, y_train, epochs=50, batch_size=256, validation_split=0.2)