In [1]:
import requests
import pandas as pd
import numpy as np
import seaborn as sns
from bs4 import BeautifulSoup

try:
    import surprise
except ModuleNotFoundError:
    !pip install scikit-surprise
    import surprise
try:
    import scipy as sp
except ModuleNotFoundError:
    !pip install scipy
    import scipy as sp

from sklearn.metrics.pairwise import cosine_similarity



In [50]:
class Procesos:
    def __init__(self):
        self.cargaDocumentos()
    def cargaDocumentos(self):
        self.df_links = pd.read_csv('csv/links.csv')
        self.df_links = self.df_links.dropna()
        self.df_movies = pd.read_csv('csv/movies.csv')
        self.df_movies = self.df_movies.dropna()
        self.df_ratings = pd.read_csv('csv/ratings.csv')
        self.df_ratings = self.df_ratings.dropna()
        self.df_tags = pd.read_csv('csv/tags.csv')
        self.df_tags = self.df_tags.dropna()
        self.df_movies_ratings = self.df_ratings.merge(self.df_movies)[['userId','movieId','title', 'rating','genres']]
        self.ratings_table = self.df_movies_ratings.pivot_table(index='userId', columns='title', values='rating')
        #para cambiar los NAN por 0:
        self.ratings_table.fillna(0, inplace=True)
    def recomedacionPorValoracionOtrosUsuarios(self, title, n_similares):
        sparse_rating = sp.sparse.csr_matrix(self.ratings_table)
        similitud_usuarios = cosine_similarity(sparse_rating)
        #se hace con la transpuesta de la matriz creada anteriormente
        similitud_movies = cosine_similarity(sparse_rating.T)
        df_similitud_usuarios = pd.DataFrame(similitud_usuarios, index=self.ratings_table.index, columns=self.ratings_table.index)
        df_similitud_movies = pd.DataFrame(similitud_movies, index=self.ratings_table.columns, columns=self.ratings_table.columns)
        #le sumamos uno a n_similares porque la primera siempre es la propia pelicula y nos la saltamos
        n_similares+=1
        contador = 1
        print('Peliculas similares a ' + title + ':')
        print()
        for movie in df_similitud_movies[title].sort_values(ascending=False).index[1:n_similares]:
            print(str(contador) + ' - ' + str(movie))
            contador+=1
    def recomedacionPorGenero(self, title, n_similares):
        genres = list(set([genre for genres in self.df_movies["genres"].str.split("|") for genre in genres]))
        genre_matrix = []
        for index, row in self.df_movies.iterrows():
            genre_list = row["genres"].split("|")
            genre_vector = [1 if genre in genre_list else 0 for genre in genres]
            genre_matrix.append(genre_vector)
        genre_matrix = pd.DataFrame(genre_matrix, columns=genres)
        contador = 1
        selected_movie = self.df_movies[self.df_movies["title"] == title]
        selected_movie_index = selected_movie.index[0]
        #sacamos las similitudes de los generos
        similarities = cosine_similarity(genre_matrix[selected_movie_index:selected_movie_index+1], genre_matrix).flatten()
        #las metemos en una tupla y las ordenamos de mayor a menor 
        movie_list = [(index, similarity) for index, similarity in enumerate(similarities)]
        movie_list.sort(key=lambda x: x[1], reverse=True)

        print('Peliculas similares a ' + title + ':')
        print()
        #la bandera nos sirve para saltarnos la propia peli que buscamos
        #siempre esta a false y si nos encontramos la peli que estamos buscando la activamos a True
        #si esta en True al finalizar el bucle significa que ha saltado el titulo que buscabamos para no repetirse a si mismo 
        #y por lo tanto hay que añadir uno mas para llegar al numero deseado por el usuario
        bandera=False
        for movie in movie_list[0:n_similares]:
            if(title != self.df_movies.iloc[movie[0]]["title"]):
                print(str(contador)+' - ' +self.df_movies.iloc[movie[0]]["title"])
                contador+=1
            else:
                bandera=True
        if(bandera):
            #print('bandera')
            mov=movie_list[n_similares][0]
            print(str(contador)+' - ' +self.df_movies.iloc[mov]["title"])
    def predecirRatingDeUserPorSusGenero(self, nombrePelicula, user_id):
        yaVotado = prueba.df_movies_ratings[(prueba.df_movies_ratings['title']==nombrePelicula) & (prueba.df_movies_ratings['userId']==user_id)]["rating"].unique()
        if(len(yaVotado)!=0):
            prediction = yaVotado[0]
            print()
            print("La prediccion para " + nombrePelicula+" es: " + str(prediction))
            #return prediction
        else:
            # obtener géneros de la película a predecir
            movie_genres = prueba.df_movies_ratings[prueba.df_movies_ratings['title']==nombrePelicula]["genres"].unique()
            generosPeli = movie_genres[0].split("|")
            # filtrar valoraciones del usuario para peliculas con generos en comun
            user_ratings_ID = prueba.df_movies_ratings[prueba.df_movies_ratings['userId'] == user_id]
            user_ratings = user_ratings_ID.loc[user_ratings_ID['genres'].str.split('|').apply(lambda x: any(i in x for i in generosPeli))]
            # calcular la media de valoraciones del usuario para las peliculas con generos en comun
            if user_ratings.empty:
                print("La lista es empty")
                #return None
            else:
                #prediction = user_ratings_ID['rating'].mean()
                prediction = format(user_ratings_ID['rating'].mean(), '.3f')
                print()
                print("La prediccion para " + nombrePelicula + " es: " + str(prediction))
                #return prediction
            

p = Procesos()
p.recomedacionPorValoracionOtrosUsuarios("Star Wars: Episode IV - A New Hope (1977)",10)
p.recomedacionPorGenero("Star Wars: Episode IV - A New Hope (1977)",10)
p.predecirRatingDeUserPorSusGenero("Star Wars: Episode IV - A New Hope (1977)",1)

Peliculas similares a Star Wars: Episode IV - A New Hope (1977):

1 - Star Wars: Episode V - The Empire Strikes Back (1980)
2 - Star Wars: Episode VI - Return of the Jedi (1983)
3 - Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)
4 - Matrix, The (1999)
5 - Indiana Jones and the Last Crusade (1989)
6 - Back to the Future (1985)
7 - Star Wars: Episode I - The Phantom Menace (1999)
8 - Terminator, The (1984)
9 - Godfather, The (1972)
10 - Saving Private Ryan (1998)
Peliculas similares a Star Wars: Episode IV - A New Hope (1977):

1 - Waterworld (1995)
2 - Stargate (1994)
3 - Demolition Man (1993)
4 - Star Wars: Episode V - The Empire Strikes Back (1980)
5 - Star Wars: Episode VI - Return of the Jedi (1983)
6 - Star Trek III: The Search for Spock (1984)
7 - Lost in Space (1998)
8 - Rocketeer, The (1991)
9 - Tron (1982)
10 - Six-String Samurai (1998)

La prediccion para Star Wars: Episode IV - A New Hope (1977) es: 5.0


In [3]:
prueba = Procesos()
prueba.df_movies_ratings

Unnamed: 0,userId,movieId,title,rating,genres
0,1,1,Toy Story (1995),4.0,Adventure|Animation|Children|Comedy|Fantasy
1,5,1,Toy Story (1995),4.0,Adventure|Animation|Children|Comedy|Fantasy
2,7,1,Toy Story (1995),4.5,Adventure|Animation|Children|Comedy|Fantasy
3,15,1,Toy Story (1995),2.5,Adventure|Animation|Children|Comedy|Fantasy
4,17,1,Toy Story (1995),4.5,Adventure|Animation|Children|Comedy|Fantasy
...,...,...,...,...,...
100831,610,160341,Bloodmoon (1997),2.5,Action|Thriller
100832,610,160527,Sympathy for the Underdog (1971),4.5,Action|Crime|Drama
100833,610,160836,Hazard (2005),3.0,Action|Drama|Thriller
100834,610,163937,Blair Witch (2016),3.5,Horror|Thriller


In [4]:
user_ratings = prueba.df_movies_ratings[(prueba.df_movies_ratings['userId'] == 1) & (prueba.df_movies_ratings['title'] == "Heat (1995)")]
user_ratings

Unnamed: 0,userId,movieId,title,rating,genres
267,1,6,Heat (1995),4.0,Action|Crime|Thriller


In [5]:
prueba.df_movies_ratings[prueba.df_movies_ratings['userId']==1]
prueba.df_movies_ratings

Unnamed: 0,userId,movieId,title,rating,genres
0,1,1,Toy Story (1995),4.0,Adventure|Animation|Children|Comedy|Fantasy
1,5,1,Toy Story (1995),4.0,Adventure|Animation|Children|Comedy|Fantasy
2,7,1,Toy Story (1995),4.5,Adventure|Animation|Children|Comedy|Fantasy
3,15,1,Toy Story (1995),2.5,Adventure|Animation|Children|Comedy|Fantasy
4,17,1,Toy Story (1995),4.5,Adventure|Animation|Children|Comedy|Fantasy
...,...,...,...,...,...
100831,610,160341,Bloodmoon (1997),2.5,Action|Thriller
100832,610,160527,Sympathy for the Underdog (1971),4.5,Action|Crime|Drama
100833,610,160836,Hazard (2005),3.0,Action|Drama|Thriller
100834,610,163937,Blair Witch (2016),3.5,Horror|Thriller


In [6]:
prueba.df_movies_ratings[prueba.df_movies_ratings["title"]=="Toy Story (1995)"]

Unnamed: 0,userId,movieId,title,rating,genres
0,1,1,Toy Story (1995),4.0,Adventure|Animation|Children|Comedy|Fantasy
1,5,1,Toy Story (1995),4.0,Adventure|Animation|Children|Comedy|Fantasy
2,7,1,Toy Story (1995),4.5,Adventure|Animation|Children|Comedy|Fantasy
3,15,1,Toy Story (1995),2.5,Adventure|Animation|Children|Comedy|Fantasy
4,17,1,Toy Story (1995),4.5,Adventure|Animation|Children|Comedy|Fantasy
...,...,...,...,...,...
210,606,1,Toy Story (1995),2.5,Adventure|Animation|Children|Comedy|Fantasy
211,607,1,Toy Story (1995),4.0,Adventure|Animation|Children|Comedy|Fantasy
212,608,1,Toy Story (1995),2.5,Adventure|Animation|Children|Comedy|Fantasy
213,609,1,Toy Story (1995),3.0,Adventure|Animation|Children|Comedy|Fantasy


In [43]:

        
print(predecirRatingDeUserPorSusGenero("aa",1))

IndexError: index 0 is out of bounds for axis 0 with size 0