In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

from sklearn.metrics.pairwise import linear_kernel
from sklearn.metrics import pairwise_distances
from sklearn.model_selection import train_test_split, LeaveOneOut
from surprise.model_selection import train_test_split as tts
from scipy.spatial.distance import cosine, correlation
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from scipy import sparse
import pickle


In [2]:
os.listdir('../Data')

['hitrates.csv',
 'ml-latest-small',
 'movies_processed.csv',
 'movie_diversity.csv',
 'popularity_ratings.csv',
 'popularity_table.csv',
 'ratings_processed.csv',
 'user_diversity.csv']

In [3]:
pickles = ['SVD_one_to_five.sav', 'SVD_negone_to_one.sav', 
           'SVD++_one_to_five.sav', 'SVD++_negone_to_one.sav']

# open a file, where you stored the pickled data
file = open('./Modeling/models/SVD_one_to_five.sav', 'rb')

# dump information to that file
SVDone_to_five = pickle.load(file)

# close the file
file.close()

In [4]:
file = open('./Modeling/models/SVD_negone_to_one.sav', 'rb')
SVD_negone_to_one = pickle.load(file)
file.close()

file = open('./Modeling/models/SVD++_one_to_five.sav', 'rb')
SVDpp_one_to_five = pickle.load(file)
file.close()

file = open('./Modeling/models/SVD++_negone_to_one.sav', 'rb')
SVDpp_negone_to_one = pickle.load(file)
file.close()

algos = [SVDone_to_five, SVD_negone_to_one, SVDpp_one_to_five, SVDpp_negone_to_one]

In [5]:
#dlModels
#set location
path = os.path.join('./Modeling/models', "adaOhOneModel")

# Load it back; can also be done in TensorFlow Serving.
adaModel = tf.keras.models.load_model(path)

path = os.path.join('./Modeling/models', "adamOhOhOneModel")
adamModel = tf.keras.models.load_model(path)



In [6]:
popularityTable = pd.read_csv('../Data/popularity_table.csv', index_col=[0])
popRatings = pd.read_csv('../Data/popularity_ratings.csv', index_col=[0])

In [16]:
movies = pd.read_csv('../Data/movies_processed.csv', index_col=[0])
ratings = pd.read_csv('../Data/ratings_processed1.csv', index_col=[0])

In [17]:
movies = movies[movies.movieId.isin(ratings.movieId)]

In [18]:
recents = movies[movies.year == 2018]

In [19]:
movie_based_similarity = pd.read_csv('../Data/movie_cosine_similarity.csv', index_col=[0])
movie_based_similarity.index = movies.movieId
movie_based_similarity.columns = movies.movieId
user_based_similarity = pd.read_csv('../Data/user_cosine_similarity.csv', index_col=[0])
user_based_similarity.index = ratings.userId.unique()
user_based_similarity.columns = ratings.userId.unique()

In [20]:
similarities = pd.read_csv('../Data/cos_sim_id.csv', index_col=[0])

In [21]:
def cos_sim_preds(usrId, limit:int=None, similarities=similarities):
    df = last_movie(usrId)
    ID = df.movieId[0]
    
    output = similarities.loc[ID].sort_values(ascending=False)
    if limit is not None:
        output = output[:limit]
    
    return output

def rec_movie(movie_id, moviedf=movies, movie_based_similarity=movie_based_similarity):
    temp_table = pd.DataFrame(columns = moviedf.columns)
    movies = movie_based_similarity[movie_id].sort_values(ascending = False).index.tolist()[:11]
    for mov in movies:
#         display(items[items['movie id'] == mov])
        temp_table = temp_table.append(moviedf[moviedf['movieId'] == mov], ignore_index=True)
    return temp_table

def rec_user(user_id, ratingdf=ratings, user_based_similarity=user_based_similarity):
    temp_table = pd.DataFrame(columns = ratingdf.columns)
    us = user_based_similarity[user_id].sort_values(ascending = False).index.tolist()[:101]
    for u in us:
#         display(items[items['movie id'] == mov])
        temp_table = temp_table.append(ratingdf[ratingdf['userId'] == u], ignore_index=True)
    return temp_table

def movieCF_preds(usrId):
    userCF = rec_movie(last_movie(usrId).movieId[0])
    return userCF

def algo_preds(algo, usrId, limit:int=None, movies=movies):
    preds = []
    
    for i in movies.movieId.unique():    
        preds.append(algo.predict(usrId, i))
    
    preds = pd.DataFrame(preds).sort_values('est', ascending=False)  
    
    if limit is not None:
        preds = preds[:limit]
        
    preds = preds.merge(movies[['movieId', 'title']], left_on='iid', right_on='movieId')
    preds.drop(['r_ui', 'details'], axis=1, inplace=True)
    preds['rank'] = preds.est.rank(ascending=False)
    
    return preds

def dl_preds(algo, usrId, movie_df=movies):
    scores, titles = algo([str(usrId)])
    titles = titles.numpy()
    titles_processed = []
    
    for i in range(len(titles[0])):
        tit = titles[0][i].decode('utf-8')
        titles_processed.append(tit)

    titles_processed = pd.DataFrame(titles_processed, 
                                    columns=['title']
                                   ).merge(movie_df[['movieId', 'title']],
                                           on='title')
    
    return titles_processed

In [22]:
def last_movie(usrId, movies=movies, ratings=ratings):
    df = ratings[ratings.userId == usrId].sort_values('timestamp')
    last = pd.DataFrame(df.iloc[-1]).T.merge(movies[['movieId', 'title']], on='movieId')
    return last

def user_rated(usrId, movies=movies, ratings=ratings):
    rated = ratings[ratings.userId == usrId].merge(
        movies[['movieId', 'title']], on='movieId')[['title', 'movieId']].set_index('title')
    return rated

def add_movies(usrId, movId, rating, ratingsdf=ratings):
    #dictionary = {'usrId':usrId, ...}
    df = pd.DataFrame([usrId, movId, rating, dt.datetime.now(), ratingsdf.rating_new.mean()]).T
    df.columns = ratingsdf.columns
    return df

def search_movieId(moviedf=movies.dropna()):
    string = input("Enter a search phrase for title.\n")
    if string == 'exit':
        pass
    else:
        moviedf['title'] = moviedf.title.str.lower()
        lowString = string.lower()
        df = moviedf[moviedf.title.str.contains(lowString)]
        df['title'] = df.title.str.title()
        df = df[['title', 'year', 'movieId']]
    return df

def find_user(idno, ratings=ratings):
    df = ratings[ratings.userId == idno]
    switch = 0
    if len(df) < 3:
        print('Need minimum 3 movies to start')
    else:
        print("Starting RecSys")
        switch = 1
    return switch  


def choose_append(idno):

    restart = True
    while True:
        options = search_movieId()
        display(options)
        choice = input("If we found the movie, type its movieId, else type 'choose again'.\n")
        if choice == 'exit':
            break
        else:
            try:
                choice = int(choice)
                restart = False
                rating = input('Adding to your ratings. Please rate now, .5 to 5.0\n')
                if rating == 'exit':
                    break
                else:
                    try:
                        rating = float(rating)
                        updated_ratings = add_movies(idno, choice, rating)
                        display(updated_ratings)
                        return updated_ratings
                    except:
                        print('Must input float in [.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0]')
                        #This is just going to have to be annoying, sorry
            except:
                print('Choice must be an integer. Please try again.')
                continue

    return updated_ratings

In [34]:
def rec_sys(ratingsdf=ratings, algolist=algos):
    rated = ratingsdf
    
    def ID_setup(ratdf=rated):
        exit_phrase = 'Thank you for trying!\n'
        value = input("Hello! Are you a 'known' user or 'new' user?:\n")
        if value == 'known':
            idno = input("Good to see you! Let's keep recommending! Please enter your ID.\n")
            print(f'You entered {idno}. We will use this until exit. If not you, please exit.')
            idno = int(idno)
        elif value == 'exit':
            print(exit_phrase)
        else:
            value = input("Would you like to set a 'custom' id # or 'no' custom id?\n")
            if value == 'custom':
                idno = input('Enter custom id as an integer greater than 650')
                print(f'Your ID is: {idno}')
            elif value == 'exit':
                print(exit_phrase)
            else:
                idno = ratdf.userId.max()+1
                print(f'Your ID is: {idno}')
            print("Let's start recommending!")
        return idno  
    
    def serve_recs(idno, counter=None, algolist=algos, ratedf=rated, dlmodel=adaModel, movies=movies):
        i = counter
        if counter is None:
            i=0
        SVDpreds = algo_preds(algolist[i], idno)
        SVDpreds = SVDpreds[['title', 'movieId']]
        SVDpreds = SVDpreds[~SVDpreds.movieId.isin(ratedf[ratedf.userId == idno].movieId)]

        dlpreds = dl_preds(dlmodel, idno)
        dlpreds = dlpreds[~dlpreds.movieId.isin(ratedf[ratedf.userId == idno].movieId)]
        dlpreds = dlpreds[~dlpreds.movieId.isin(SVDpreds.movieId)]

        lenSVDpreds = 20 - len(dlpreds) - 1
        SVDpreds = SVDpreds[:lenSVDpreds]

        output = SVDpreds.append(dlpreds)
        output.drop_duplicates(inplace=True)

        while len(output) < 20:
            recentmovie = movies[movies.year == max(movies.year)][['title', 'movieId']].sample()
            output = output.append(recentmovie)

        return output



##########################################################################

    
    print("Welcome to this recommendation system. Type 'exit' to leave at any time.\n")
    idno = ID_setup(rated)
    switch = find_user(idno)
    
    while switch == 0:
        update = choose_append(idno)
        rated = rated.append(update) #this part is weak to incorrect numbers in the long run and duplicate entries
        metric = rated[rated.userId == idno]
        if len(metric) < 3:
            continue
        else:
            switch = 1
            print('Thank you for rating. Preparing to recommend!')
            
    while switch == 1:
        recommend = True
        counter = 0
        while recommend == True:
            recs = serve_recs(idno, counter, ratedf=rated)
            display(recs.sample(frac=1))
            print('Above are your 20 ratings. Choose a movie to rate to proceed')
            
            newRated = input('Input movieId you wish to rate. Type "exit" to leave.\n')
            if newRated == 'exit':
                recommend = False
                print('Thank you for trying!')
                return rated
            newRated = int(newRated)
            
            rat = input('Input rating from .5 to 5.0. Type "exit" to leave.\n')
            if rat == 'exit':
                recommend = False
                print('Thank you for trying!')
                return rated
            
            rat = float(rat)
            new_mov = add_movies(idno, newRated, rat)
            rated = rated.append(new_mov)
            
            if counter == 4:
                counter = 0
            else:
                counter+=1

    return rated





In [36]:
new_ratings = rec_sys() #Have to rate 3 movies as new user or else it crashes, which is fine.

Welcome to this recommendation system. Type 'exit' to leave at any time.

Hello! Are you a 'known' user or 'new' user?:
known
Good to see you! Let's keep recommending! Please enter your ID.
80
You entered 80. We will use this until exit. If not you, please exit.
Starting RecSys


Unnamed: 0,title,movieId
10,To Kill a Mockingbird (1962),1207
9031,Ready Player One,140956
12,Forrest Gump (1994),356
13,It's a Wonderful Life (1946),953
17,"Three Billboards Outside Ebbing, Missouri (2017)",177593
3,"Outlaw Josey Wales, The (1976)",3508
18,Apocalypse Now (1979),1208
9,Trainspotting (1996),778
1,Lawrence of Arabia (1962),1204
14,Battlestar Galactica (2003),27611


Above are your 20 ratings. Choose a movie to rate to proceed
Input movieId you wish to rate. Type "exit" to leave.
exit
Thank you for trying!


In [None]:
new_ratings.tail()

In [None]:
new_ratings = rec_sys(new_ratings)