In [1]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd
import joblib
from sklearn.neighbors import NearestNeighbors
import numpy as np
import sqlite3
import os
import logging

# Configuración del log
LOG_PATH = "/content/drive/MyDrive/bd_movies_recommendation/notebooks/script_log.log"
logging.basicConfig(
    filename=LOG_PATH,
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s'
)

# Rutas
DATA_PATH = "/content/drive/MyDrive/bd_movies_recommendation/data/db_movies.sqlite"
MODEL_PATH = "/content/drive/MyDrive/bd_movies_recommendation/models/best_knn_model.pkl"
COLUMNS_PATH = "/content/drive/MyDrive/bd_movies_recommendation/models/model_columns.pkl"
RECOMMENDATION_PATH = "/content/drive/MyDrive/bd_movies_recommendation/notebooks/recomendaciones.csv"

def load_model_and_columns():
    """Carga el modelo y las columnas guardadas."""
    try:
        logging.info("Cargando modelo y columnas...")
        model = joblib.load(MODEL_PATH)
        columns = joblib.load(COLUMNS_PATH)
        logging.info(f"Modelo y columnas cargados correctamente. Número de columnas: {len(columns)}")
        return model, columns
    except Exception as e:
        logging.error(f"Error al cargar el modelo o las columnas: {str(e)}")
        raise

def load_data(db_path):
    """Carga los datos de la base de datos SQLite."""
    try:
        logging.info("Cargando datos desde la base de datos...")
        conn = sqlite3.connect(db_path)
        df_ratings = pd.read_sql_query("SELECT userId, movieId, rating FROM ratings", conn)
        df_movies = pd.read_sql_query("SELECT movieId, title FROM movies", conn)
        conn.close()
        logging.info(f"Datos cargados correctamente. Ratings: {df_ratings.shape}, Movies: {df_movies.shape}")
        return df_ratings, df_movies
    except Exception as e:
        logging.error(f"Error al cargar datos: {str(e)}")
        raise

def generate_recommendations(model, columns, df_ratings, df_movies):
    """Genera recomendaciones para todos los usuarios."""
    try:
        logging.info("Generando recomendaciones...")
        user_movie_matrix = df_ratings.pivot(index='userId', columns='movieId', values='rating').fillna(0)

        # Ajustar columnas
        missing_cols = [col for col in columns if col not in user_movie_matrix.columns]
        if missing_cols:
            logging.warning(f"Faltan columnas en los datos: {missing_cols}")

        user_movie_matrix = user_movie_matrix.reindex(columns=columns, fill_value=0)

        logging.info(f"Shape de la matriz ajustada: {user_movie_matrix.shape}")

        recommendations = []

        for user_id in user_movie_matrix.index:
            user_vector = user_movie_matrix.loc[user_id].values.reshape(1, -1)
            distances, indices = model.kneighbors(user_vector, n_neighbors=5)

            recommended_movie_ids = user_movie_matrix.columns[indices[0]].tolist()

            for movie_id in recommended_movie_ids:
                try:
                    title = df_movies[df_movies['movieId'] == movie_id]['title'].values[0]
                    recommendations.append((user_id, movie_id, title))
                except IndexError:
                    logging.warning(f"Movie ID {movie_id} no encontrado en df_movies.")

        recommendations_df = pd.DataFrame(recommendations, columns=['userId', 'movieId', 'title'])
        logging.info("Recomendaciones generadas correctamente.")
        return recommendations_df

    except Exception as e:
        logging.error(f"Error al generar recomendaciones: {str(e)}")
        raise

def save_recommendations(recommendations_df):
    """Guarda las recomendaciones en un archivo CSV."""
    try:
        logging.info(f"Guardando recomendaciones en {RECOMMENDATION_PATH}...")
        recommendations_df.to_csv(RECOMMENDATION_PATH, index=False)
        logging.info("Recomendaciones guardadas correctamente.")
    except Exception as e:
        logging.error(f"Error al guardar recomendaciones: {str(e)}")
        raise

def main():
    try:
        logging.info("Inicio del proceso de generación de recomendaciones.")
        model, columns = load_model_and_columns()
        df_ratings, df_movies = load_data(DATA_PATH)
        recommendations_df = generate_recommendations(model, columns, df_ratings, df_movies)
        save_recommendations(recommendations_df)
        logging.info("Proceso completado exitosamente.")
    except Exception as e:
        logging.error(f"Error durante el proceso principal: {str(e)}")

if __name__ == "__main__":
    main()
