In [16]:
# Tratamiento de datos
# -----------------------------------------------------------------------
import pandas as pd
pd.set_option("display.max_columns", None)
import numpy as np

# Otros objetivos
# -----------------------------------------------------------------------
import sys

# Trabajar con bases de datos y python
# -----------------------------------------------------------------------
import psycopg2
from psycopg2 import OperationalError, errorcodes, errors
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Ruta relativa desde notebooks/1-Preprocesing/ a src/
src_path = "../src/"
sys.path.append(src_path)
import support_preprocesing as sp 
import support_sistemas_recomendacion as ssr 
%load_ext autoreload
%autoreload 2

# Configuración warnings
# -----------------------------------------------------------------------
import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [27]:
try:
    conexionpeliculas=psycopg2.connect(
    database="film",
    user="postgres",
    password="admin",
    host="localhost",
    port="5432")
except OperationalError as e:
    if e.pgcode ==errorcodes.INVALID_PASSWORD:
        print("La constraseña es incorrecta")
    elif e.pgcode ==errorcodes.CONNECTION_EXCEPTION:
        print("Error de conexión")

# Query para obtener datos relevantes
query = """
SELECT p.id_pelicula, p.titulo, p.genero, dp.cali_imdb,dp.argum_imbd
FROM peliculas p
JOIN detalles_pelicula dp ON p.id_pelicula = dp.id_pelicula
LEFT JOIN actores a ON p.id_pelicula = a.id_pelicula
"""
peliculas = pd.read_sql_query(query, conexionpeliculas)


conexionpeliculas.close()

In [28]:
peliculas

Unnamed: 0,id_pelicula,titulo,genero,cali_imdb,argum_imbd
0,tt0110660,Nian nian you jin ri,Comedy,6.8,A married man and a married woman have once-a-...
1,tt0110363,Little Big League,Comedy,6.1,Desconocido
2,tt0110397,"Lovers, Lovers",Comedy,5.2,A sexy comedy about two young couples and thei...
3,tt0111045,Rám csaj még nem volt ilyen hatással,Comedy,7.2,"Miklos, a magazine photographer, has a hard ti..."
4,tt0111653,Wagons East!,Comedy,4.8,Desconocido
...,...,...,...,...,...
11225,tt14142640,Bannerghatta,Mystery,5.2,"Storyline One night Ashiq, an acting driver co..."
11226,tt18688348,Maksym Osa,Mystery,6.0,Ukraine - 1636. Someone has attacked a battali...
11227,tt27241513,Khitrovka. The Sign of Four,Mystery,5.0,"Moscow, 1902. The famous director Konstantin S..."
11228,tt27262238,Magna Lacerta,Mystery,,Two students produce a documentary about the d...


In [30]:
# Transformar el texto usando TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
matriz_tfidf = vectorizer.fit_transform(peliculas['argum_imbd'])

# Calcular similitud coseno
similitud_coseno = cosine_similarity(matriz_tfidf, matriz_tfidf)

In [31]:
# Función de recomendación basada en similitud
def recomendar_pelicula(titulo, num_recomendaciones=5):
    if titulo not in peliculas['titulo'].values:
        return "La película no existe en la base de datos."
    
    indice_pelicula = peliculas[peliculas['titulo'] == titulo].index[0]
    puntajes_similares = list(enumerate(similitud_coseno[indice_pelicula]))
    puntajes_similares = sorted(puntajes_similares, key=lambda x: x[1], reverse=True)
    puntajes_similares = puntajes_similares[1:num_recomendaciones + 1]
    
    recomendaciones = [peliculas.iloc[i[0]]['titulo'] for i in puntajes_similares]
    return recomendaciones

# Ejemplo de recomendación
titulo_pelicula ="Goodfellas"
print(f"Películas recomendadas basadas en '{titulo_pelicula}':")
print(recomendar_pelicula(titulo_pelicula))


Películas recomendadas basadas en 'Goodfellas':
['Wagons East!', 'Nine Months', 'The Perez Family', 'La lengua asesina', 'Faisons un rêve']
