## Predicting ratings

In [1]:
# importing required modules and libraries
import numpy as np
import pandas as pd
import matrix_factorization_utilities

In [2]:
# read both ratings and movies datasets into tables using pandas
movies = pd.read_csv("movies.csv", index_col='movie_id')
ratings = pd.read_csv("movie_ratings_data_set.csv")

In [3]:
# building a rating matrix from ratings dataframe
ratings_matrix = pd.pivot_table(ratings, index='user_id', columns='movie_id', aggfunc=np.mean)
#ratings_matrix

In [4]:
# finding laten features of user and movie ratings through matrix factorization
user_ratings, movies_ratings = matrix_factorization_utilities.low_rank_matrix_factorization(ratings_matrix.as_matrix(),
                                                                    num_features=15,
                                                                    regularization_amount=0.1)

  


         Current function value: 32.504372
         Iterations: 3000
         Function evaluations: 4545
         Gradient evaluations: 4545


In [5]:
# calculate predicted ratings through matrix multiplication
predicted_ratings = np.matmul(user_ratings, movies_ratings)

In [6]:
# export the predicted ratings to csv file
predicted_ratings_dataframe = pd.DataFrame(index=ratings_matrix.index,
                                    columns=ratings_matrix.columns,
                                    data=predicted_ratings)
predicted_ratings_dataframe.to_csv("predicted_ratings.csv")

## Finding Similar movies based on ratings

In [7]:
# processing the movies ratings feature by swapping the rows and columns - makes it easier to work with
movies_ratings = np.transpose(movies_ratings)

In [8]:
# Selecting a movie to find similar movie to it based on predicted ratings
movie_id = 10
# extract movie's name and genre
movie_info = movies.loc[movie_id]
print("The model is trying to find a movie similar to:")
print("Movie title: {}".format(movie_info.title))
print("Genre: {}".format(movie_info.genre))

The model is trying to find a movie similar to:
Movie title: Surrounded by Zombies 1
Genre: horror, zombie fiction


In [9]:
# extracting the features for selected movie
selected_movie_features = movies_ratings[movie_id - 1]
print("The featrues for this movie based on model are:")
print(selected_movie_features)

The featrues for this movie based on model are:
[ 0.67956171 -0.58031541 -0.66472264  0.7127944  -1.01531212 -1.84731306
 -0.65114689  0.08536871 -0.05329978 -0.3305498  -0.07375997 -0.35525923
 -0.34703138 -1.03420697  0.96291131]


In [14]:
# the logic for finding similar movies
# 1. find the difference between selected movie's feature and all other movies features
diff_selected_vs_movies = movies_ratings - selected_movie_features
# 2. make the values of difference all positive
diff_selected_vs_movies = np.abs(diff_selected_vs_movies)
# 3. calculate the difference score by summing all 15 features of each movie
diff_score = np.sum(diff_selected_vs_movies, axis =1)
# 4. add new series to the movie data frame and store difference score 
movies['difference_score'] = diff_score
# 5. sort movie dataframe based on difference score
sorted_movies_diff_score = movies.sort_values('difference_score')
# 6. Print the top 3 movies that are similar to the selected movie
print("The top 3 movies that are similar to selected movie are:")
print(sorted_movies_diff_score[['title','difference_score']][1:4])

The top 3 movies that are similar to selected movie are:
                         title  difference_score
movie_id                                        
5         The Big City Judge 2          2.772366
9                  Biker Gangs          2.894406
2         The Big City Judge 1          3.810022


## Recommending movies to user based on movies he/she rated

In [19]:
# obtaining user_id from the user
print("Please enter user_id to get movies recomedations - user_id must be between 1 to 100\n")
selected_user_id = int(input())
# display movies he/she already rated with their ratings
rated_by_user_id = ratings[ratings['user_id'] == selected_user_id]
rated_by_user_id = rated_by_user_id.join(movies, on='movie_id')
print("\nThis user has already rated:\n")
print(rated_by_user_id[['title', 'genre','value']])
# Repeating the process of finding similar moves and display it to user
input("\nPress enter to proceed with movies recommendations....\n") # pause before displaying recommendations
print("\nThe movies we recommend to this user are:\n")
# extracting the ratings of this user
user_id_ratings = predicted_ratings[selected_user_id-1]
# adding new series to movies dataframe that store user ratings
movies['rating'] = user_id_ratings
# extracting the movies the user already rated
movie_id_user_rated = rated_by_user_id['movie_id']
# disclode the movies user already rated
movies_recommended = movies[movies.index.isin(movie_id_user_rated) == False]
# sorting the movies recommneded based on rating
movies_recommended = movies_recommended.sort_values(by=['rating'], ascending = False)
print(movies_recommended[['title','genre','rating']].head(5))

Please enter user_id to get movies recomedations - user_id must be between 1 to 100

3

This user has already rated:

                   title                 genre  value
13         The Sheriff 1  crime drama, western      4
14     Attack on Earth 2        sci-fi, action      3
15         The Sheriff 3  crime drama, western      5
16         The Sheriff 2  crime drama, western      5
17  The Big City Judge 1           legal drama      4
18      Trapped in Space       sci-fi, mystery      3

Press enter to proceed with movies recommendations....
2

The movies we recommend to this user are:

                          title                     genre    rating
movie_id                                                           
34        The Serious Detective           detective drama  4.596754
5          The Big City Judge 2               legal drama  4.528607
9                   Biker Gangs       crime drama, action  4.491327
28                The Sheriff 4      crime drama, western  4.4