In [1]:
# Importamos las librerias pertinentes

import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split

In [2]:
#Procedemos a cargar los datasets

scores = pd.read_csv('../Código/score_movies.csv')
platforms = pd.read_csv('../Código/plataformas.csv')

In [4]:
scores.head()

Unnamed: 0,userId,score,movieId,date
0,1,1.0,as680,2015-03-09
1,1,4.5,ns2186,2015-03-09
2,1,5.0,hs2381,2015-03-09
3,1,5.0,ns3663,2015-03-09
4,1,5.0,as9500,2015-03-09


Comencemos con nuestro sistema de ML

In [6]:
platforms = platforms[['movieId','title']]
platforms.reset_index(drop=True)

Unnamed: 0,movieId,title
0,as1,the grand seduction
1,as2,take care good night
2,as3,secrets of deception
3,as4,pink: staying true
4,as5,monster maker
...,...,...
22993,ns8803,zodiac
22994,ns8804,zombie dumb
22995,ns8805,zombieland
22996,ns8806,zoom


In [7]:
#Pasos previos

N_filas = 100000 #Limitamos el dataset a esa cantidad de filas

reader = Reader(rating_scale=(1,5))

data = Dataset.load_from_df(scores[['userId','movieId', 'score']][:N_filas], reader)

#Separamos nuestros datos

trainset, testset = train_test_split(data, test_size=0.2)



In [8]:
#Instanciamos y entrenamos el modelo

model = SVD()

model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x293f4ec9130>

In [9]:
#Predigo 

predictions = model.test(testset)

In [10]:
predictions[1]

Prediction(uid=404, iid='ns5494', r_ui=3.0, est=3.2466366092637102, details={'was_impossible': False})

In [11]:
#Hacemos una predicción al azar para usuario y pelicula

model.predict(154,'as1')

Prediction(uid=154, iid='as1', r_ui=None, est=3.5467410634018255, details={'was_impossible': False})

In [12]:
#Elijo un usuario al azar y hago una recomendación completa

usuario = 778
rating = 4 #Esto significa que tomamos películas que haya calificado de 4 o 5 estrellas
user = scores[(scores.userId == usuario) & (scores.score >= rating)]
user = user.reset_index(drop=True)

user = pd.merge(user, platforms[['movieId', 'title']], on='movieId', how='left')
user

Unnamed: 0,userId,score,movieId,date,title
0,778,4.5,as4640,2009-01-11,chef
1,778,4.0,as9023,2009-01-10,couple of mirrors
2,778,4.0,as1066,2009-01-11,secrets of the dead: the lost gardens of babylon
3,778,4.0,hs193,2009-01-10,the goldbergs
4,778,4.5,ns8036,2017-05-01,skyline
...,...,...,...,...,...
355,778,4.0,as7679,2017-04-30,stories we tell
356,778,4.5,ns4185,2017-04-30,trigger warning with killer mike
357,778,4.0,ns8616,2017-04-15,traitor
358,778,4.0,ds131,2017-04-30,fantastic four


In [13]:
recomendaciones_usuario = platforms.iloc[:4499].copy()
print(recomendaciones_usuario.shape)
recomendaciones_usuario.head()

(4499, 2)


Unnamed: 0,movieId,title
0,as1,the grand seduction
1,as2,take care good night
2,as3,secrets of deception
3,as4,pink: staying true
4,as5,monster maker


In [14]:
#extraigo las películas que ya ha visto

usuario_vistas = scores[scores['userId'] == usuario]
print(usuario_vistas.shape)
usuario_vistas.head()

(718, 4)


Unnamed: 0,userId,score,movieId,date
75525,778,3.0,ns1536,2009-01-10
75526,778,2.5,ns290,2009-01-11
75527,778,4.5,as4640,2009-01-11
75528,778,2.0,as8431,2009-01-11
75529,778,4.0,as9023,2009-01-10


In [15]:
# Eliminamos las peliculas que el usuario ya vió

recomendaciones_usuario = recomendaciones_usuario[~recomendaciones_usuario['movieId'].isin(usuario_vistas['movieId'])]

recomendaciones_usuario.shape

(4373, 2)

In [16]:

# Recomiendo

recomendaciones_usuario['Estimate_Score'] = recomendaciones_usuario['movieId'].apply(lambda x: model.predict(usuario, x).est)

In [17]:
recomendaciones_usuario = recomendaciones_usuario.sort_values('Estimate_Score', ascending=False)
print(recomendaciones_usuario.head(10))

     movieId                                              title  \
4354  as4355                                yuvarathnaa (hindi)   
2737  as2738                                d.l. hughley: reset   
1853  as1854                                 lincoln@gettysburg   
1737  as1738  marilyn hotchkiss' ballroom dancing & charm sc...   
407    as408        the unauthorized beverly hills, 90210 story   
1126  as1127                                       rubbadubbers   
1607  as1608  morphle - fairytale adventures & more cartoons...   
4493  as4494                                do you wanna dance?   
2626  as2627                                     donovan's reef   
2403  as2404                   galapagos: realm of giant sharks   

      Estimate_Score  
4354        4.162316  
2737        4.088522  
1853        4.058036  
1737        4.048667  
407         4.043884  
1126        4.027233  
1607        4.024221  
4493        4.023342  
2626        4.022095  
2403        4.016226  


In [18]:
def movie_recommendation(userId, movieId):
    # Obtenemos la predicción de la calificación que el usuario le daría a la película
    prediction = model.predict(userId, str(movieId))

    # Si la predicción es mayor o igual a 3.5, se recomienda la película
    if prediction.est >= 3.5:
        return "Recomendada", prediction.est
    else:
        return "No recomendada", prediction.est

In [19]:
#Pruebo la función
movie_recommendation(59, 'ns65')

('No recomendada', 3.217838460835491)

In [20]:
#Evaluo el modelo 
from surprise import accuracy

accuracy.rmse(predictions)

RMSE: 0.9629


0.9629127265890075

In [21]:
#Optimizo hiperparametros
from surprise.model_selection import cross_validate
import numpy as np

rmse_test_means = []
factores = [1,2,4,8,16,32,64,128]

for factor in factores:
    print(factor)
    model = SVD(n_factors=factor)
    cv = cross_validate(model, data, measures=['RMSE'], cv = 3, verbose=True)
    rmse_test_means.append(np.mean(cv['test_rmse']))

1
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9536  0.9452  0.9526  0.9505  0.0037  
Fit time          1.43    0.77    0.69    0.96    0.33    
Test time         2.77    1.15    0.85    1.59    0.84    
2
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9433  0.9557  0.9475  0.9488  0.0051  
Fit time          0.78    2.72    2.68    2.06    0.91    
Test time         0.55    0.80    0.57    0.64    0.11    
4
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9475  0.9524  0.9490  0.9496  0.0021  
Fit time          1.87    0.77    1.11    1.25    0.46    
Test time         1.81    2.73    2.97    2.50    0.50    
8
Evaluating RMSE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9535  0.9

CREO UNA INTERFAZ PARA EL MODELO

In [22]:
#Creo una función para extraer el titulo
def titulo(movieId):
    return platforms[platforms.movieId == movieId].title.iloc[0].title()

In [23]:
#Pruebo la función
titulo("ds568")

'A Tale Of Two Critters'

In [24]:
#fusiono ambas funciono
def title_recommendation(userId, movieId):
    recomendado, puntaje = movie_recommendation(userId, movieId)
    titulo_1 = titulo(movieId)
    return (recomendado, puntaje,titulo_1)

In [25]:
title_recommendation(548,"ds568")

('No recomendada', 3.1785684076638416, 'A Tale Of Two Critters')

In [27]:
import gradio as gr

title = str("Movies Recommendation")

with gr.Blocks(title=title) as demo:
    nro_usuario = gr.inputs.Number(label='tu numero de usario')
    peli = gr.Textbox(label='numero de pelicula')
    recomendacion = gr.Button('Descubrí tu pelicula o no')
    title = gr.Textbox(label='tu pelicula recomendad se llama')
    output = gr.Textbox(label= 'Recomendada o no')
    score = gr.Textbox(label='calidad de la recomendación')
    recomendacion.click(fn = title_recommendation, inputs=[nro_usuario,peli], outputs=[output, score,title])
demo.launch(share = True)



Running on local URL:  http://127.0.0.1:7861

Could not create share link. Please check your internet connection or our status page: https://status.gradio.app


