Algoritmo de recomendación

In [1]:
#Comiezo importando librerias 
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

In [2]:
#Cargo los datasets
df_sco = pd.read_csv('./Datasets/score_movies.csv')

df_plat = pd.read_csv('./Datasets/plataformas.csv')


In [3]:
df_plat = df_plat[['movieId', 'title']]

In [4]:
df_plat = df_plat.reset_index(drop=True)

In [5]:
# Pasos previos

N_filas = 100000 # Limitamos el dataset a N_filas

reader = Reader(rating_scale=(1, 5))

data = Dataset.load_from_df(df_sco[['userId', 'movieId', 'score']][:N_filas], reader)

# Separamos nuestros datos

trainset, testset = train_test_split(data, test_size=.25)

In [6]:
# Entreno el modelo SVD

modelo = SVD()

modelo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1ff0011cf50>

In [7]:
# Predigo

predictions = modelo.test(testset)

In [8]:
predictions[1]

Prediction(uid=465, iid='hs1852', r_ui=0.5, est=2.6432308106248352, details={'was_impossible': False})

In [9]:
# Hacemos una predicción al azar para usuario y película

modelo.predict(154,'as680')

Prediction(uid=154, iid='as680', r_ui=None, est=3.668777310550158, details={'was_impossible': False})

In [10]:
#Elijo un usuario al azar y hago una recomendación completa
usuario = 35487 
rating = 4   # Tomamos películas a las que haya calificado con 4 o 5 estrellas
df_user = df_sco[(df_sco.userId == usuario) & (df_sco.score >= rating)]
df_user = df_user.reset_index(drop=True)

df_user = pd.merge(df_user, df_plat[['movieId', 'title']], on='movieId', how='left')
df_user

Unnamed: 0,userId,score,timestamp,movieId,title
0,35487,4.0,1996-04-07,ns7682,otherlife
1,35487,4.0,1996-04-07,ns3147,scooby-doo!: mystery incorporated
2,35487,5.0,1996-04-07,ds1017,life is ruff
3,35487,5.0,1996-04-07,ns900,illegal woman
4,35487,4.0,1996-04-07,as7715,raymond & miguel
5,35487,5.0,1996-04-07,ns2340,prem ratan dhan payo
6,35487,4.0,1996-04-07,as5262,nishabdham
7,35487,5.0,1996-04-07,ns2851,the last bomb of the second world war
8,35487,5.0,1996-04-07,ns1169,dota: dragon's blood
9,35487,4.0,1996-04-07,as8428,rifftrax: contamination


In [11]:
recomendaciones_usuario = df_plat.iloc[:4499].copy()
print(recomendaciones_usuario.shape)
recomendaciones_usuario.head()

(4499, 2)


Unnamed: 0,movieId,title
0,as1,the grand seduction
1,as2,take care good night
2,as3,secrets of deception
3,as4,pink: staying true
4,as5,monster maker


In [12]:
#extraigo las películas que ya ha visto

usuario_vistas = df_sco[df_sco['userId'] == usuario]
print(usuario_vistas.shape)
usuario_vistas.head()

(26, 4)


Unnamed: 0,userId,score,timestamp,movieId
3412075,35487,3.0,1996-04-07,ns8098
3412076,35487,4.0,1996-04-07,ns7682
3412077,35487,4.0,1996-04-07,ns3147
3412078,35487,5.0,1996-04-07,ds1017
3412079,35487,5.0,1996-04-07,ns900


In [13]:
# Eliminamos las peliculas que el usuario ya vió

recomendaciones_usuario = recomendaciones_usuario[~recomendaciones_usuario['movieId'].isin(usuario_vistas['movieId'])]

recomendaciones_usuario.shape

(4497, 2)

In [14]:
# Recomiendo

recomendaciones_usuario['Estimate_Score'] = recomendaciones_usuario['movieId'].apply(lambda x: modelo.predict(usuario, x).est)

In [15]:
recomendaciones_usuario = recomendaciones_usuario.sort_values('Estimate_Score', ascending=False)
print(recomendaciones_usuario.head(10))

     movieId                                      title  Estimate_Score
2108  as2109                           infinity chamber        3.930399
2005  as2006  jonas kaufmann - a global star in private        3.921282
4055  as4056                     motives 2: retribution        3.907603
1691  as1692                            meet your match        3.894208
2904  as2905                                   carriers        3.880700
3981  as3982                          we are still here        3.878394
393    as394                  the wild horse redemption        3.867312
2647  as2648                            disco godfather        3.864574
3187  as3188                                     ax men        3.861677
4083  as4084            animal songs by little baby bum        3.856302


### Predicción con un usuario y pelicula 

In [16]:
def movie_recommendation(userId, movieId):
    # Obtenemos la predicción de la calificación que el usuario le daría a la película
    prediction = modelo.predict(userId, str(movieId))

    # Si la predicción es mayor o igual a 3.5, se recomienda la película
    if prediction.est >= 3.5:
        return "Recomendada", prediction.est
    else:
        return "No recomendada", prediction.est

In [17]:
#Pruebo la función
movie_recommendation(59, 'ns65')

('No recomendada', 2.8444224864724568)

In [18]:
#Evaluo el modelo 
from surprise import accuracy

accuracy.rmse(predictions)

RMSE: 0.9683


0.9682768769629191

In [19]:
#Optimizo hiperparametros
from surprise.model_selection import cross_validate
import numpy as np

rmse_test_means = []
factores = [1,2,4,8,16,32,64,128]

for factor in factores:
    print(factor)
    model = SVD(n_factors=factor)
    cv = cross_validate(model, data, measures=['RMSE'], cv = 3, verbose=True)
    rmse_test_means.append(np.mean(cv['test_rmse']))

1
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9459  0.9521  0.9503  0.9495  0.0026  
Fit time          0.29    0.27    0.28    0.28    0.01    
Test time         0.13    0.19    0.19    0.17    0.03    
2
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9530  0.9452  0.9513  0.9498  0.0033  
Fit time          0.29    0.27    0.28    0.28    0.01    
Test time         0.20    0.21    0.13    0.18    0.04    
4
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9446  0.9502  0.9546  0.9498  0.0041  
Fit time          0.28    0.30    0.29    0.29    0.01    
Test time         0.13    0.13    0.13    0.13    0.00    
8
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9564  0.9

## Creo una interfaz para el modelo

In [20]:
#Creo una función para extraer el titulo
def titulo(movieId):
    return df_plat[df_plat.movieId == movieId].title.iloc[0].title()

In [21]:
#Pruebo la función
titulo("ds568")

'A Tale Of Two Critters'

In [22]:
#fusiono ambas funciono
def title_recommendation(userId, movieId):
    recomendado, puntaje = movie_recommendation(userId, movieId)
    titulo_1 = titulo(movieId)
    return (recomendado, puntaje,titulo_1)

In [23]:
title_recommendation(548,"ds568")

('No recomendada', 3.204704745501258, 'A Tale Of Two Critters')

In [24]:
import gradio as gr

title = str("PelisFrankRecomendation.SA")

with gr.Blocks(title=title) as demo:
    nro_usuario = gr.inputs.Number(label='tu numero de usario')
    peli = gr.Textbox(label='numero de pelicula')
    recomendacion = gr.Button('matchea con tu pelicula')
    title = gr.Textbox(label='tu match se llama')
    output = gr.Textbox(label= 'le gustas o no')
    score = gr.Textbox(label='calidad del match')
    recomendacion.click(fn = title_recommendation, inputs=[nro_usuario,peli], outputs=[output, score,title])
demo.launch(share = True)



Running on local URL:  http://127.0.0.1:7860
Running on public URL: https://71f2422a230edfba31.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


