In [2]:
import pandas as pd
import numpy as np
import pickle
import copy
from surprise import Reader, Dataset
from surprise import SVD, NormalPredictor, BaselineOnly, KNNBasic, NMF
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
import scipy as sp 

In [3]:
# Importing data
movies_df = pd.read_csv('movies.csv',sep = ',')
ratings_df = pd.read_csv('ratings.csv')
ratings_df.drop(['timestamp'], axis=1,inplace=True)

In [13]:
movie_1 = movies_df[14930:15200]
movie_2 = movies_df[25055:25255]
movie_3 = movies_df[21100:21200]

In [45]:
n_movies=pd.concat([movie_1,movie_2,movie_3],axis=0,ignore_index=True)

In [46]:
n_ratings = pd.merge(ratings_df,new_movies['movieId'], on='movieId')

In [4]:
# We make use of an SVD model trained on a subset of the MovieLens 10k dataset.
model=pickle.load(open('SVD.pkl', 'rb'))

In [5]:
model

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2190d78cb50>

In [59]:
def prediction_item(movie_id):
    """Map a given favourite movie to users within the MovieLens dataset with the same preference.

    Parameters
    ----------
    movie_id : int
        A MovieLens Movie ID.
    ratings_df : pandas DataFrame
        DataFrame containing the ratings data.
    model : surprise.Prediction algorithm
        Trained prediction algorithm.

    Returns
    -------
    list
        List of Prediction objects for all users on the given movie.

    """
    # Load the data into a Surprise Dataset
    reader = Reader(rating_scale=(0, 5))
    load_df = Dataset.load_from_df(new_ratings, reader)
    full_trainset = load_df.build_full_trainset()

    # Make predictions for all users on the given movie
    predictions = []
    for uid in full_trainset.all_users():
        predictions.append(model.predict(iid=movie_id, uid=uid, verbose=False))
    return predictions


In [66]:
def pred_movies(movie_list):
    """Maps the given favourite movies selected within the app to corresponding
    users within the MovieLens dataset.

    Parameters
    ----------
    movie_list : list
        Three favourite movies selected by the app user.

    Returns
    -------
    list
        User-ID's of users with similar high ratings for each movie.

    """
    # Store the id of users
    id_store=[]
    # For each movie selected by a user of the app,
    # predict a corresponding user within the dataset with the highest rating
    for i in movie_list:
        predictions = prediction_item(movie_id = i)
        predictions.sort(key=lambda x: x.est, reverse=True)
        # Take the top 10 user id's from each movie with highest rankings
        for pred in predictions[:10]:
            id_store.append(pred.uid)
    # Return a list of user id's
    return id_store


In [61]:
movie_list=['Boob, The (1926)','That Awkward Moment (2014)','Interstellar (2014)']

In [73]:
def collab_model(movie_list, N=10):
    """Performs Collaborative filtering based upon a list of movies supplied
       by the app user.

    Parameters
    ----------
    movie_list : list (str)
        Favorite movies chosen by the app user.
    N : int
        Number of top recommendations to return to the user.

    Returns
    -------
    list (str)
        Titles of the top-N movie recommendations to the user.
    """

    #movie_ids = pred_movies(movie_list,ratings_df,model)
    #df_init_users = pd.merge(ratings_df, movies_df[['movieId', 'title']], on='movieId', how='inner')
    #df_init_users = df_init_users[df_init_users['userId'].isin(movie_ids)]
    indices = pd.Series(new_movies['title'])
    movie_ids = pred_movies(movie_list)
    df_init_users = new_ratings[new_ratings['userId']==movie_ids[0]]
    for i in movie_ids :
        df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
        
    
    cosine_sim = cosine_similarity(np.array(df_init_users), np.array(df_init_users))

    idx_1 = indices[indices == movie_list[0]].index[0]
    idx_2 = indices[indices == movie_list[1]].index[0]
    idx_3 = indices[indices == movie_list[2]].index[0]
    
    
   
    idx_1 = new_movies[new_movies['title'] == movie_list[0]].index[0]
    rank_1 = cosine_sim[idx_1]
    
    idx_2 = new_movies[new_movies['title'] == movie_list[1]].index[0]
    rank_2 = cosine_sim[idx_2]
    
    idx_3 = new_movies[new_movies['title'] == movie_list[2]].index[0]
    rank_3 = cosine_sim[idx_3]


    score_series_1 = pd.Series(rank_1).sort_values(ascending=False)
    score_series_2 = pd.Series(rank_2).sort_values(ascending=False)
    score_series_3 = pd.Series(rank_3).sort_values(ascending=False)
    # Appending the names of movies
    listings = score_series_1.append(score_series_2).append(score_series_3).sort_values(ascending = False)
    recommended_movies = []
    # Choose top 50
    top_50_indexes = list(listings.iloc[1:50].index)
    # Removing chosen movies
    top_indexes = np.setdiff1d(top_50_indexes,[idx_1,idx_2,idx_3])
    for i in top_indexes[:N]: 
        recommended_movies.append(list(movies_df['title'])[i])
    return recommended_movies


In [74]:
movie_list=['Boob, The (1926)','That Awkward Moment (2014)','Interstellar (2014)']

In [75]:
collab_model(movie_list, N=10)

  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_users=df_init_users.append(new_ratings[new_ratings['userId']==i])
  df_init_us

IndexError: index 566 is out of bounds for axis 0 with size 19