<a href="https://colab.research.google.com/github/TheMathAI/Movie-Recommender-System-ML/blob/main/Final_Version.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# import library
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
!apt install subversion
!svn checkout https://github.com/TheMathAI/ML_Project/trunk/clean_Data

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following package was automatically installed and is no longer required:
  libnvidia-common-460
Use 'apt autoremove' to remove it.
The following additional packages will be installed:
  libapr1 libaprutil1 libserf-1-1 libsvn1
Suggested packages:
  db5.3-util libapache2-mod-svn subversion-tools
The following NEW packages will be installed:
  libapr1 libaprutil1 libserf-1-1 libsvn1 subversion
0 upgraded, 5 newly installed, 0 to remove and 5 not upgraded.
Need to get 2,235 kB of archives.
After this operation, 9,916 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/main amd64 libapr1 amd64 1.6.3-2 [90.9 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/main amd64 libaprutil1 amd64 1.6.1-2 [84.4 kB]
Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libserf-1-1 amd64 1.3.9-6 [44.4 kB]
Get:4 http://archive.ubuntu.com/ubuntu bionic-updates/univer

In [2]:
df_movies = pd.read_csv("/content/clean_Data/df_movies.csv")
df_users = pd.read_csv("/content/clean_Data/df_users.csv")
df_ratings = pd.read_csv("/content/clean_Data/df_ratings.csv")

In [3]:
# making the rating dataframe with column for column for movies and row for users (kind of a matrix)
df_ratings_pivot = df_ratings.pivot(index = 'userID', columns ='movieID', values = 'rating').fillna(0)

In [4]:
# turning the datafram into a numpy array for calculations
df_ratings_array = df_ratings_pivot.values
# calculating the mean for each users
mean_users_ratings = np.mean(df_ratings_array, axis = 1)
# demeaning the the array, so that each observations are mean zero
df_demean = df_ratings_array - mean_users_ratings.reshape(-1, 1)

In [5]:
# singular value decomposition of the matrix, to turn the matrix into a singular vector with singular values
U, s, Vh = svds(df_demean, k = 48)

In [6]:
# diagonalize matrix
s = np.diag(s)

In [7]:
# get the user predicting ratings for movies and then add the mean back to get right ratings
predicted_ratings = np.dot(np.dot(U, s), Vh) + mean_users_ratings.reshape(-1, 1)
# turn the predicted ratings into data frame
df_predictions = pd.DataFrame(predicted_ratings, columns = df_ratings_pivot.columns)


In [8]:
def recommend_movies(df_predictions, userID, movies_df, original_ratings_df, num_recommendations=5):
    
    # Get and sort the user's predictions
    user_row_number = userID - 1 
    sorted_user_predictions = df_predictions.iloc[user_row_number].sort_values(ascending=False)
    
    # Get the user's data and merge in the movie information.
    user_data = original_ratings_df[original_ratings_df.userID == (userID)]
    user_full = (user_data.merge(movies_df, how = 'left', left_on = 'movieID', right_on = 'movieID').
                     sort_values(['rating'], ascending=False)
                 )
    
    print(f'User {userID} has rated {user_full.shape[0]} movies.')
    print(f'Recommending the highest {num_recommendations} predicted ratings movies not already rated.')
    
    # Recommend the highest predicted rating movies that the user hasn't seen yet.
    recommendations = (df_movies[~df_movies['movieID'].isin(user_full['movieID'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left',
               left_on = 'movieID',
               right_on = 'movieID').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).
                       iloc[:num_recommendations, :-1]
                      )
    
    return user_full, recommendations

user_ratings, user_predictions = recommend_movies(df_predictions, 847, df_movies, df_ratings, 10)

User 847 has rated 62 movies.
Recommending the highest 10 predicted ratings movies not already rated.


In [9]:
user_predictions

Unnamed: 0,movieID,title,Action,Adventure,Animation,Children's,Comedy,Crime,Documentary,Drama,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
1106,1136,Monty Python and the Holy Grail (1974),0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1262,1307,When Harry Met Sally... (1989),0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0
2686,2804,"Christmas Story, A (1983)",0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0
2673,2791,Airplane! (1980),0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3235,3361,Bull Durham (1988),0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
0,1,Toy Story (1995),0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1230,1270,Back to the Future (1985),0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0
1567,1641,"Full Monty, The (1997)",0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2485,2599,Election (1999),0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1237,1278,Young Frankenstein (1974),0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,0
