In [3]:
import numpy as np

In [6]:
import pandas as pd

datapath="../../../10_week/data/ml-latest-small/"#

df_movies=pd.read_csv(datapath+"movies.csv")
df_ratings=pd.read_csv(datapath+"ratings.csv")
df_mov_rat=pd.merge(df_ratings,df_movies, on="movieId", how="inner")

df_mov_rat=df_mov_rat[["userId",  "rating", "title"]]
rating_mat=pd.pivot_table(data=df_mov_rat,index="userId", columns="title", values="rating")
number_of_films_rated=(rating_mat.shape[1] - rating_mat.isna().sum(axis=1))

rating_mat30=rating_mat[number_of_films_rated>30]
ratings=((rating_mat30.T).fillna(rating_mat30.T.mean())).T



all_movies=list(ratings.columns)

# load the model

import pickle

with open('nmf_model_ratings1.pkl',mode='rb') as file:
    nmf_model=pickle.load(file)

In [8]:
nmf_model.feature_names_in_

array(["'71 (2014)", "'Hellboy': The Seeds of Creation (2004)",
       "'Round Midnight (1986)", ..., 'xXx: State of the Union (2005)',
       '¡Three Amigos! (1986)',
       'À nous la liberté (Freedom for Us) (1931)'], dtype=object)

In [9]:
all_movies=np.array(all_movies)

In [10]:
def convert_filmnames(query, all_movies):
    newquery={}
    for filmname in query.keys():
        true_false_list=[filmname in a for a in all_movies]
        filmexist = True in true_false_list
        if not filmexist:
            print("film",filmname, "has to be removed, since it is not in the metrics")
        else:
            index=np.where(np.array(true_false_list))[0][0]
            new_filmname=all_movies[index]
            newquery[new_filmname] = query[filmname]
    return newquery

In [11]:
def recommend_nmf(query, model, k=10):
    """
    Filters and recommends the top k movies for any given input query based on a trained NMF model.
    Returns a list of k movie ids.
    """
    
    query=convert_filmnames(query, model.feature_names_in_)
    
    
    recommendations = []
    # 1. candidate generation
    # construct new_user-item dataframe given the query
    new_user_dataframe =  pd.DataFrame(data=query,
            columns=model.feature_names_in_,
            index = ['new_user'])
    new_user_dataframe_imputed = new_user_dataframe.fillna(float(new_user_dataframe.T.mean()))
    
    P_new_user_matrix = model.transform(new_user_dataframe_imputed)
    #doing the R
    Q_matrix=model.components_
    R_hat_new_user_matrix = np.dot(P_new_user_matrix, Q_matrix)
    R_hat_new_user = pd.DataFrame(data=R_hat_new_user_matrix,
                         columns=model.feature_names_in_,
                         index = ['new_user'])
    # 3. ranking
    R_hat_new_user.transpose().sort_values(by=['new_user'], ascending=False)
    # filter out movies allready seen by the user
    R_hat_new_user.transpose().loc[list(query.keys()),:] = 0
    # return the top-k highest rated movie ids or titles
    recommendations= R_hat_new_user.transpose().sort_values(by=['new_user'],ascending=False).head(k)
    
    return recommendations

In [12]:
def get_random_query(n, all_movies_X, seed=123):
    query={}
    rng = np.random.default_rng(123)
    for i in range(n):
        rng.shuffle(all_movies_X)
        movie=all_movies_X[0]
        all_movies_X=all_movies_X[1:]
        rand_rating=rng.integers(1,6)    
        query[movie]=rand_rating
        
    return query

In [13]:
 get_random_query(1, list(nmf_model.feature_names_in_), seed=123)

{'Denise Calls Up (1995)': 5}

In [14]:
query_action={"Matrix": 5, "Star Wars": 5, "Independence Day": 5 , "Gladiator":5,  "Batman Begins": 5, "Avatar": 5, "Kill Bill": 5, "Iron Man": 4, "Jurassic Park": 5, "Titanic": 1, "Amelie": 2, "Pretty Woman": 1 }

In [15]:
recommend_nmf(query_action, nmf_model, k=10)

Unnamed: 0,new_user
"Silence of the Lambs, The (1991)",4.459075
Star Wars: Episode IV - A New Hope (1977),4.345834
Seven (a.k.a. Se7en) (1995),4.343012
"Beautiful Mind, A (2001)",4.279605
Star Wars: Episode V - The Empire Strikes Back (1980),4.274368
Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981),4.265091
Life Is Beautiful (La Vita è bella) (1997),4.24808
Titanic (1997),4.237547
"Lord of the Rings: The Fellowship of the Ring, The (2001)",4.232179
"Mask, The (1994)",4.227738


In [16]:
query_romance={"Matrix": 1, "Star Wars": 1, "Independence Day": 2 , "Gladiator":1,  "Batman Begins": 2, "Avatar": 1, "Kill Bill": 1, "Iron Man": 2, "Jurassic Park": 1, "Titanic": 5, "Amelie": 5, "Pretty Woman": 5 }

In [17]:
recommend_nmf(query_romance, nmf_model, k=10)

Unnamed: 0,new_user
Titanic (1997),2.467602
Seven (a.k.a. Se7en) (1995),2.439546
Pulp Fiction (1994),2.434945
"Silence of the Lambs, The (1991)",2.431087
Spirited Away (Sen to Chihiro no kamikakushi) (2001),2.425435
Memento (2000),2.409878
Requiem for a Dream (2000),2.409423
"Lord of the Rings: The Fellowship of the Ring, The (2001)",2.403493
Life Is Beautiful (La Vita è bella) (1997),2.403125
Amadeus (1984),2.398961
