In [2]:
import ast
import boto3
import faiss
import firebase_admin
import json
import math
import numpy as np
import os
import pandas as pd
import requests
import time
import torch
import torch.nn.functional as F

from botocore.exceptions import ClientError
from decimal import Decimal
from dotenv import load_dotenv
from firebase_admin import credentials, firestore
from sklearn.metrics.pairwise import cosine_similarity
from transformers import AutoTokenizer, AutoModel
#from torch.nn.functional import cosine_similarity

In [3]:
# uploading the environment variables and get the API key
load_dotenv()
HUGGINGFACE_API_KEY = os.getenv("HUGGINGFACE_API_KEY")
AWS_ACCESS_KEY = os.getenv("AWS_ACCESS_KEY")
AWS_SECRET_KEY = os.getenv("AWS_SECRET_KEY")

In [8]:
# Genres database, local by now. Then we have to get them linked to firebase, or wathever
df_genres = pd.read_csv(r'generos.csv', sep=',')

all_embeddings_from_filtered_data = pd.read_csv(r'../../../all_content_embeddings.csv')
filtered_data = pd.read_csv(r'../../../Test_clean.csv')


In [5]:

# Conversión optimizada para el dict de los embeddings que habiamos guardado en csv
def fast_convert(emb):
    if isinstance(emb, str): 
        return np.array(json.loads(emb), dtype=np.float32)  # Usa float32 para ahorrar memoria
    return emb

all_embeddings_dict = {
    id_: fast_convert(emb)
    for id_, emb in zip(
        all_embeddings_from_filtered_data['ID'], 
        all_embeddings_from_filtered_data['Embedding']
    )
}


### Content Data from FireBase (remains missing the conextion to firebase collection)

In [6]:
# Start Firebase if it's not done (I KEEP THIS FOR THE MOMENT THE DICT OF EMBEDDIGNS WOULD BE A COLLECTION OF)
if not firebase_admin._apps:
    cred_path = r'../../../bubbo-dfba0-firebase-adminsdk-fbsvc-79dc4511e7.json'  
    cred = credentials.Certificate(cred_path)
    firebase_admin.initialize_app(cred)

In [None]:
'''# Esta celda por el momento tampoco sirve si se obtienen los datos local

# Firestore conexion and db collection name
db = firestore.client()
collection_ref = db.collection('Data_Clean') # Look for the new collection
# to get it all
docs = collection_ref.stream()
# documents to dictionaries
data = [{**doc.to_dict(), 'id': doc.id} for doc in docs]
df = pd.DataFrame(data)

'''

AttributeError: '_UnaryStreamMultiCallable' object has no attribute '_retry'

In [9]:
# Cuando corro esta celda, "reseteo" a filtered_data
#filtered_data = df # Comentada porque ahora la saco de local ya que los embeddings ya estan 
filtered_data = filtered_data.replace("",pd.NA)
filtered_data = filtered_data.dropna()                                                                
filtered_data = filtered_data.drop_duplicates()
filtered_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 401689 entries, 0 to 403139
Data columns (total 10 columns):
 #   Column        Non-Null Count   Dtype 
---  ------        --------------   ----- 
 0   Unnamed: 0    401689 non-null  int64 
 1   ID            401689 non-null  object
 2   Genre         401689 non-null  object
 3   CleanTitle    401689 non-null  object
 4   Synopsis      401689 non-null  object
 5   Directors     401689 non-null  object
 6   Cast          401689 non-null  object
 7   Type          401689 non-null  object
 8   PlatformName  401689 non-null  object
 9   Score         401689 non-null  int64 
dtypes: int64(2), object(8)
memory usage: 33.7+ MB


In [None]:
filtered_data[filtered_data['ID']=='671']                                                   

Unnamed: 0.1,Unnamed: 0,ID,Genre,CleanTitle,Synopsis,Directors,Cast,Type,PlatformName,Score
2602,3111,671,Adventure,Harry Potter 1: en de Steen der Wijzen,Dit is de betoverende verfilming gebaseerd op ...,Chris Columbus,Daniel Radcliffe; Rupert Grint; Emma Watson; R...,Movie,Rakuten TV,6
30262,35868,671,Children,Harry Potter and the Philosopher's Stone,Based on the wildly popular J.K. Rowling's boo...,Chris Columbus,Emma Watson; Daniel Radcliffe; Rupert Grint,Movie,Amazon Prime Video,6


### User Preferences from DynamoDB

In [11]:
# To get the info from DynamoDB, user preferences
CONFIG = {
    'aws': {
        'access_key': AWS_ACCESS_KEY,
        'secret_key': AWS_SECRET_KEY,
        'region': 'eu-west-3',
        'table': 'User-7kkcm5dn2rb77hst5nh7gbdisa-staging'
    },
    'columns': ['userId', 'favoriteMoviesIds', 'favoriteGenresIds', 'favoriteSeriesIds'],
}

# conexion
session = boto3.Session(
    aws_access_key_id=CONFIG['aws']['access_key'],
    aws_secret_access_key=CONFIG['aws']['secret_key'],
    region_name=CONFIG['aws']['region']
)

table = session.resource('dynamodb').Table(CONFIG['aws']['table'])

# Values to String
def _process_value(value):
    if isinstance(value, Decimal):
        return str(int(value))
    return str(value)

# Retrive info from DynamoDB and gets a DataFrame
def fetch_preferences():
    try:
        items = []
        start_key = None

        while True:
            # scan with defined 'columns'  in previous 'CONFIG'
            scan_params = {
                'ProjectionExpression': ', '.join(CONFIG['columns'])
            }
            if start_key:
                scan_params['ExclusiveStartKey'] = start_key

            response = table.scan(**scan_params)
            items.extend(response.get('Items', []))

            # check for next pages
            start_key = response.get('LastEvaluatedKey')
            if not start_key:
                break

        # data extracted processing
        processed_data = [{
            'userId': _process_value(item.get('userId', '')),
            'favoriteMoviesIds': ';'.join(map(_process_value, item.get('favoriteMoviesIds', []))),     ###################### DIRECTOR MAS CAST HAY QUE TRAER DESPUES CUANDO COMPLETO EL DF LUEGO DE FILTERED_DATA
            'favoriteGenresIds': ';'.join(map(_process_value, item.get('favoriteGenresIds', []))),
            'favoriteSeriesIds': ';'.join(map(_process_value, item.get('favoriteSeriesIds', [])))
        } for item in items]

        df = pd.DataFrame(processed_data)
        return df

    except ClientError as e:
        print(f"Error al conectar con DynamoDB: {e}")
        return pd.DataFrame()

# calling function to get the df
user_pref = fetch_preferences()


In [12]:
# limpio el dataframe dejando solo users con genero, movie_favs y tvshow_favs
user_pref = user_pref[user_pref['userId'].str.len()==36]
user_pref = user_pref.replace("",pd.NA)
user_pref = user_pref.dropna()             
user_pref.reset_index(inplace=True,drop=True)
print(f'Duplicates: {user_pref.duplicated().sum()}')
user_pref.info()

Duplicates: 0
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9284 entries, 0 to 9283
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   userId             9284 non-null   object
 1   favoriteMoviesIds  9284 non-null   object
 2   favoriteGenresIds  9284 non-null   object
 3   favoriteSeriesIds  9284 non-null   object
dtypes: object(4)
memory usage: 290.3+ KB


In [13]:
user_pref[user_pref['userId']== 'a189607e-3041-70a5-3daf-01cd2118baff'].head()

Unnamed: 0,userId,favoriteMoviesIds,favoriteGenresIds,favoriteSeriesIds
101,a189607e-3041-70a5-3daf-01cd2118baff,671;672;673;135397;354912,10749;27;9648;18;10770;10766;10767;53;878;1076...,66732;93405;18165;119051;65334


In [14]:
# Ahora tengo que traer tambien las Cast y Director

# convertir los valores en listas para expandirlos con explode
user_pref['favoriteGenresIds'] = user_pref['favoriteGenresIds'].apply(lambda x: x.split(';'))
user_pref['favoriteMoviesIds'] = user_pref['favoriteMoviesIds'].apply(lambda x: x.split(';'))
user_pref['favoriteSeriesIds'] = user_pref['favoriteSeriesIds'].apply(lambda x: x.split(';'))

# expandir preferencias de favoriteMoviesIds, favoriteGenresIds, y favoriteSeriesIds por userId
user_fav_genres = user_pref[['userId','favoriteGenresIds']].explode('favoriteGenresIds')
user_fav_movies = user_pref[['userId','favoriteMoviesIds']].explode('favoriteMoviesIds')
user_fav_series = user_pref[['userId','favoriteSeriesIds']].explode('favoriteSeriesIds')


# merge para traerme los CleanTitle, Synopsis, 'Genre'
user_fav_genres['favoriteGenresIds'] = user_fav_genres['favoriteGenresIds'].astype(int)
user_fav_genres = user_fav_genres.merge(df_genres[['genero_id','genero_name']], left_on='favoriteGenresIds', right_on='genero_id') 


filtered_data = filtered_data.dropna(subset=['ID'])                                                                                    # <- Nuevo agregado# <- Nuevo agregado# <- Nuevo agregado
filtered_data['ID'] = filtered_data['ID'].astype(str).str.strip()                                                                                 # <- Nuevo agregado# <- Nuevo agregado# <- Nuevo agregado
user_fav_movies['favoriteMoviesIds'] = user_fav_movies['favoriteMoviesIds'].astype(str).str.strip()
user_fav_series['favoriteSeriesIds'] = user_fav_series['favoriteSeriesIds'].astype(str).str.strip()

user_fav_movies = user_fav_movies.merge(filtered_data[['ID','CleanTitle','Synopsis', 'Cast', 'Directors']], left_on='favoriteMoviesIds', right_on='ID', how='left')  ###### en esta y lasig fila agregué synopsis
user_fav_series = user_fav_series.merge(filtered_data[['ID','CleanTitle','Synopsis', 'Cast', 'Directors']], left_on='favoriteSeriesIds', right_on='ID', how='left')

user_fav_genres = user_fav_genres.drop(columns='genero_id')
user_fav_genres.rename(columns={'genero_name':'Genres'}, inplace=True)
user_fav_movies = user_fav_movies.drop(columns='ID')
user_fav_movies.rename(columns={'CleanTitle':'Movies_Titles', 'Synopsis':'Movies_Synopsis', 'Cast':'Movies_Cast', 'Directors':'Movies_Directors'}, inplace=True)
user_fav_series = user_fav_series.drop(columns='ID')
user_fav_series.rename(columns={'CleanTitle':'Series_Titles', 'Synopsis':'Series_Synopsis', 'Cast':'Series_Cast', 'Directors':'Series_Directors'}, inplace=True)

# reAGRUPO por userId para que me queden las listas CleanTitle, Synopsis, 'Genre' por user segun sus favoriteMoviesIds, favoriteGenresIds, y favoriteSeriesIds por userId
user_fav_genres = user_fav_genres.groupby('userId')[['favoriteGenresIds','Genres']].agg(list).reset_index()
user_fav_movies = user_fav_movies.groupby('userId')[['favoriteMoviesIds','Movies_Titles', 'Movies_Synopsis', 'Movies_Cast', 'Movies_Directors']].agg(list).reset_index()
user_fav_series = user_fav_series.groupby('userId')[['favoriteSeriesIds','Series_Titles', 'Series_Synopsis', 'Series_Cast', 'Series_Directors']].agg(list).reset_index()


In [15]:
user_pref[user_pref['userId']== 'a189607e-3041-70a5-3daf-01cd2118baff'].head()

Unnamed: 0,userId,favoriteMoviesIds,favoriteGenresIds,favoriteSeriesIds
101,a189607e-3041-70a5-3daf-01cd2118baff,"[671, 672, 673, 135397, 354912]","[10749, 27, 9648, 18, 10770, 10766, 10767, 53,...","[66732, 93405, 18165, 119051, 65334]"


In [16]:
#termino de acomodar 'user_pref' para dar paso a los embeddings
user_pref = user_pref.merge(user_fav_genres, left_on='userId', right_on='userId').drop(columns=['favoriteGenresIds_y'])
user_pref.rename(columns={'favoriteGenresIds_x':'favoriteGenresIds'},inplace=True)
user_pref = user_pref.merge(user_fav_movies, left_on='userId', right_on='userId').drop(columns=['favoriteMoviesIds_y'])
user_pref.rename(columns={'favoriteMoviesIds_x':'favoriteMoviesIds'},inplace=True)
user_pref = user_pref.merge(user_fav_series, left_on='userId', right_on='userId').drop(columns=['favoriteSeriesIds_y'])
user_pref.rename(columns={'favoriteSeriesIds_x':'favoriteSeriesIds'},inplace=True)
user_pref = user_pref.reindex(['userId', 'favoriteGenresIds', 'Genres', 'favoriteMoviesIds', 'Movies_Titles','Movies_Synopsis', 'Movies_Cast', 'Movies_Directors', 'favoriteSeriesIds', 'Series_Titles', 'Series_Synopsis', 'Series_Cast', 'Series_Directors'], axis=1)

In [17]:
user_pref[user_pref['userId']== 'c1d9a05e-50c1-7041-a60e-9a56c092e612'].head()

Unnamed: 0,userId,favoriteGenresIds,Genres,favoriteMoviesIds,Movies_Titles,Movies_Synopsis,Movies_Cast,Movies_Directors,favoriteSeriesIds,Series_Titles,Series_Synopsis,Series_Cast,Series_Directors
3,c1d9a05e-50c1-7041-a60e-9a56c092e612,"[28, 10759, 12, 16, 10762, 80, 35, 99, 10764, ...","[Acción, Action & Adventure, Aventura, Animaci...","[27205, 157336, 155, 19995, 293660]","[Perfect Match, Interstellar, Interstellar, Th...",[Watch the latest Perfect Match (2018) Full on...,"[Feng Zhi Mo; Xu Ding, Matthew McConaughey; Je...","[Ba Chen Xu, Christopher Nolan, Christopher No...","[1399, 71446, 66732, 1402, 93405]","[Game of Thrones, Game of Thrones, nan, Santo ...",[Il y a de l'orage dans l'air au royaume des S...,[Emilia Clarke; Peter Dinklage; Kit Harington;...,"[David Nutter; Alan Taylor; Alex Graves, David..."


### Sentences for vectorize from 'filtered_data' / AHORA DESDE FIREBASE

In [18]:
# Making the sentences to embed 
filtered_data['sentences_to_embed'] = (
    filtered_data.CleanTitle.fillna('') +
    filtered_data.Synopsis.fillna('') +
    filtered_data.Genre.fillna('').apply(
        lambda x: ', '.join(ast.literal_eval(x)) if x.startswith('[') and x.endswith(']') else x ) +
    filtered_data.Cast.fillna('') +
    filtered_data.Directors.fillna('')
)

ids_from_filtered_data = filtered_data['ID'].tolist()  # Guardamos los IDs
sentences_from_filtered_data = filtered_data['sentences_to_embed'].dropna().astype(str).tolist()



In [19]:
# Since the model on Hugging Face processes only requests that can be completed within 60 seconds, we need to divide the sentences into batches.
def split_into_batches(sentences, batch_size):
    return [sentences[i:i + batch_size] for i in range(0, len(sentences), batch_size)]

# After trying with different values, we've reach the maximum batch size to get response succesfully
batches = split_into_batches(sentences_from_filtered_data, 75)

In [20]:
# Check key availability
if HUGGINGFACE_API_KEY is None:
    print("Error: No se encontró la clave de API de Hugging Face.")
else:
    print("Clave de API cargada correctamente.")

# Model URL
API_URL = "https://api-inference.huggingface.co/pipeline/feature-extraction/sentence-transformers/all-MiniLM-L6-v2"

# API header and key
headers = {"Authorization": f"Bearer {HUGGINGFACE_API_KEY}"}  

# Function to get embeddings from Hugging Face API
def get_embeddings_from_api(sentences):
    url = API_URL
    payload = {"inputs": sentences}
    
    response = requests.post(url, headers=headers, json=payload, timeout=10)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None
    

# all_embeddings_from_filtered_data = []
# for batch in batches:
#     print(f"Processing batch with {len(batch)} sentences...")
#     time.sleep(7)
#     embeddings = get_embeddings_from_api(batch)
#     if embeddings:
#         all_embeddings_from_filtered_data.extend(embeddings)

# # Asociamos cada embedding con su respectivo ID
# all_embeddings_dict = {id_: emb for id_, emb in zip(ids_from_filtered_data, all_embeddings_from_filtered_data)}

# print("Embeddings processed successfully:")
# print(list(all_embeddings_dict.items())[:2])  # Muestra los primeros pares ID - embedding


Clave de API cargada correctamente.


### Sentences for vectorize from 'user_preferences' / AHORA DESDE DYNAMODB

In [21]:
######################################################################################################### DESDE ACA 13/02
# Sentences we want to be embedded from user_preferences MOVIES
user_pref['movies_sentences_to_embed'] = (user_pref.Movies_Titles.fillna('') +
                                   user_pref.Movies_Synopsis.fillna('')+
                                   user_pref.Genres.fillna('') +
                                   user_pref.Movies_Cast.fillna('') +
                                   user_pref.Movies_Directors.fillna(''))

# Sentences we want to be embedded from user_preferences SERIES
user_pref['series_sentences_to_embed'] = (user_pref.Series_Titles.fillna('') + 
                                   user_pref.Series_Synopsis.fillna('') +
                                   user_pref.Genres.fillna('') +
                                   user_pref.Series_Cast.fillna('') +
                                   user_pref.Series_Directors.fillna(''))

############################################################################################################## NUEVO BBBB
# Guardar userId junto con la oración a vectorizar
movies_sentences_from_user_pref = user_pref[['userId', 'movies_sentences_to_embed']].dropna().astype(str)
series_sentences_from_user_pref = user_pref[['userId', 'series_sentences_to_embed']].dropna().astype(str)
############################################################################################################## NUEVO BBBB CIERRO

# We split the sentences in batches as we did previously with filtered_data
movies_batches_user_pref = split_into_batches(movies_sentences_from_user_pref, 75)
series_batches_user_pref = split_into_batches(series_sentences_from_user_pref, 75)


In [22]:
movies_embeddings_dict = {}  # Diccionario para almacenar {userId: embedding}
num_batches = len(movies_batches_user_pref)  # Total de batches

for i, batch in enumerate(movies_batches_user_pref, start=1):
    time.sleep(1)
    print(f"Processing movie batch {i}/{num_batches} with {len(batch)} sentences...")
    
    batch_user_ids = batch['userId'].tolist()
    batch_sentences = batch['movies_sentences_to_embed'].tolist()
    
    embeddings = get_embeddings_from_api(batch_sentences)
    if embeddings:
        movies_embeddings_dict.update({uid: emb for uid, emb in zip(batch_user_ids, embeddings)})

print("Movies Embeddings processed successfully.")

Processing movie batch 1/124 with 75 sentences...
Processing movie batch 2/124 with 75 sentences...
Processing movie batch 3/124 with 75 sentences...
Processing movie batch 4/124 with 75 sentences...
Processing movie batch 5/124 with 75 sentences...
Processing movie batch 6/124 with 75 sentences...
Processing movie batch 7/124 with 75 sentences...
Processing movie batch 8/124 with 75 sentences...
Processing movie batch 9/124 with 75 sentences...
Processing movie batch 10/124 with 75 sentences...
Processing movie batch 11/124 with 75 sentences...
Processing movie batch 12/124 with 75 sentences...
Processing movie batch 13/124 with 75 sentences...
Processing movie batch 14/124 with 75 sentences...
Processing movie batch 15/124 with 75 sentences...
Processing movie batch 16/124 with 75 sentences...
Processing movie batch 17/124 with 75 sentences...
Processing movie batch 18/124 with 75 sentences...
Processing movie batch 19/124 with 75 sentences...
Processing movie batch 20/124 with 75 se

In [24]:
series_embeddings_dict = {}  # Diccionario para almacenar {userId: embedding}
num_batches_series = len(series_batches_user_pref)  # Total de batches

for i, batch in enumerate(series_batches_user_pref, start=1):
    time.sleep(1)
    print(f"Processing series batch {i}/{num_batches_series} with {len(batch)} sentences...")
    
    batch_user_ids = batch['userId'].tolist()
    batch_sentences = batch['series_sentences_to_embed'].tolist()
    
    embeddings = get_embeddings_from_api(batch_sentences)
    if embeddings:
        series_embeddings_dict.update({uid: emb for uid, emb in zip(batch_user_ids, embeddings)})

print("Series Embeddings processed successfully.")

Processing series batch 1/124 with 75 sentences...
Processing series batch 2/124 with 75 sentences...
Processing series batch 3/124 with 75 sentences...
Processing series batch 4/124 with 75 sentences...
Processing series batch 5/124 with 75 sentences...
Processing series batch 6/124 with 75 sentences...
Processing series batch 7/124 with 75 sentences...
Processing series batch 8/124 with 75 sentences...
Processing series batch 9/124 with 75 sentences...
Processing series batch 10/124 with 75 sentences...
Processing series batch 11/124 with 75 sentences...
Processing series batch 12/124 with 75 sentences...
Processing series batch 13/124 with 75 sentences...
Processing series batch 14/124 with 75 sentences...
Processing series batch 15/124 with 75 sentences...
Processing series batch 16/124 with 75 sentences...
Processing series batch 17/124 with 75 sentences...
Processing series batch 18/124 with 75 sentences...
Processing series batch 19/124 with 75 sentences...
Processing series bat

### Movies Similarities 

In [25]:
# All this cell content is about movies
# We have to convert the embeddings list to numpy arrays in order to calculate cosine similarities wiht sklearn
movies_embeddings_from_user_pref_array = np.array(list(movies_embeddings_dict.values()))
all_embeddings_from_filtered_data_array = np.array(list(all_embeddings_dict.values()))


user_for_example = 2

# Taking first user as example to calculate the cosine_similarity
user_id_example = list(movies_embeddings_dict.keys())[user_for_example]
movies_user_embedding_example = np.array(movies_embeddings_dict[user_id_example]).reshape(1, -1)  # Asegurar forma correcta para cosine_similarity


# To calculate similarity between the user example embeding and the whole content from filtered data
movies_content_similarities = cosine_similarity(movies_user_embedding_example, all_embeddings_from_filtered_data_array).flatten()

# Sort indexes by similarity
movies_most_similar_indexes = movies_content_similarities.argsort()[::-1]

# Top-10
movies_topten_most_similar_indexes = movies_most_similar_indexes[:10]

In [27]:
# To display the movies most similar indexes and their similarity scores
print("Movies Most similar indexes:", movies_most_similar_indexes[:10])
print("Movies Highest similarities:", movies_content_similarities[movies_most_similar_indexes[:10]])

Movies Most similar indexes: [132126 117993  88286  20613  21343  96796 126503  63667  17944  21200]
Movies Highest similarities: [0.71462399 0.69705894 0.69705894 0.68328462 0.68118748 0.67463338
 0.67463338 0.66778694 0.66632306 0.6490797 ]


### Tv Shows (Series) Similarities

In [None]:
# All this cell content is about series 
# We have to convert the embeddings list to numpy arrays in order to calculate cosine similarities wiht sklearn
movies_user_embedding_example = np.array(movies_embeddings_dict[user_id_example]).reshape(1, -1)
'''Estas lineas comentadas ya las hicen en movies, las dejo para verlas nada mas.
all_embeddings_from_filtered_data_array = np.array(all_embeddings_from_filtered_data) esta linea ya la hice en movies

user_for_example = 1'''

# Taking first user as example to calculate the cosine_similarity
series_user_embedding_example = np.array(series_embeddings_dict[user_id_example]).reshape(1, -1)  # forma correcta para cosine_similarity

# To calculate similarity between the user example embeding and the whole content from filtered data
series_content_similarities = cosine_similarity(series_user_embedding_example, all_embeddings_from_filtered_data_array).flatten()

# Sort indexes by similarity
series_most_similar_indexes = series_content_similarities.argsort()[::-1]

# Top-10
series_topten_most_similar_indexes = series_most_similar_indexes[:10]

In [None]:
# To display the series most similar indexes and their similarity scores
print("Series Most similar indexes:", series_most_similar_indexes[:10])
print("Series Highest similarities:", series_content_similarities[series_most_similar_indexes[:10]])

# Recomendations

In [None]:
# search most_similar_indexes, and preferences, and get recommendations
user_id = user_id_example  
movies_preferred = user_pref[user_pref['userId']==user_id]['Movies_Titles']
series_preferred = user_pref[user_pref['userId']==user_id]['Series_Titles']

In [None]:
series_preferred

In [None]:
user_pref[user_pref['userId']== 'e119804e-3011-700f-a5e3-e111aec24ac8'].head()

In [None]:
# search most_similar_indexes, and preferences, and get recommendations
user_id = user_id_example  
movies_preferred = user_pref[user_pref['userId']==user_id]['Movies_Titles']
series_preferred = user_pref[user_pref['userId']==user_id]['Series_Titles']

# Displaying Preferences & Recommendations
print(f'''User {user_id} Preferences:
      ''')
print(f'''Movies preference:''')
for movie in movies_preferred.iloc[0]:# .split(';'):
    print(f'      {movie.strip()}')

print(f'''TV Shows preference:''')
for series in series_preferred.iloc[0]:# .split(';'):
    print(f'''      {series.strip()}
          ''')

print(f'''Recomendations for user: 
      {user_id}
      ''')

################################################################################################################# NUEVO AAAA
# Convertimos los índices más similares en IDs reales
movies_recommended_ids = [filtered_data.iloc[i]['ID'] for i in movies_topten_most_similar_indexes]
series_recommended_ids = [filtered_data.iloc[i]['ID'] for i in series_topten_most_similar_indexes]
################################################################################################################# CIERRO NUEVO AAAA

# Ahora buscamos los títulos usando los IDs reales
movies_recomendations_user = filtered_data[filtered_data['ID'].isin(movies_recommended_ids)]['CleanTitle']
series_recomendations_user = filtered_data[filtered_data['ID'].isin(series_recommended_ids)]['CleanTitle']


print('Movies Recommendations:')
for recommendation in movies_recomendations_user:
    print(f'      {recommendation}')

print('Tv Shows Recommendations:')
for recommendation in series_recomendations_user:
    print(f'      {recommendation}')