In [1]:
# Import numpy 
import numpy as np

# Import pandas
import pandas as pd

In [6]:
columns = ['user_id', 'item_id', 'rating', 'timestamp']

df = pd.read_csv('ml-100k/u.data', sep='\t', names=columns)
columns = ['item_id', 'movie title', 'release date', 'video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
          'Animation', 'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
          'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

movies = pd.read_csv('ml-100k/u.item', sep='|', names=columns, encoding='latin-1')

name_of_movies = movies[['item_id', 'movie title']]
cm = pd.merge(df, movies, on='item_id')
cm = cm[['user_id','movie title', 'rating']]
cm.head()

Unnamed: 0,user_id,movie title,rating
0,196,Kolya (1996),3
1,63,Kolya (1996),3
2,226,Kolya (1996),5
3,154,Kolya (1996),3
4,306,Kolya (1996),5


In [8]:
my_ratings = pd.read_csv('myratings.csv')
print(my_ratings)

    User ID                                           Movie  Ratings
0       123                     Star Wars A New Hope (1977)      4.5
1       123                               Braveheart (1995)      4.0
2       123                            The Lion King (2019)      3.8
3       123                                  Titanic (1997)      3.9
4       123                               The Matrix (1999)      4.4
5       123                                   Wall-E (2008)      4.5
6       123                                 Deadpool (2016)      4.3
7       123                           The Terminator (1984)      3.4
8       123                    The Devil's Violinist (2013)      2.0
9       123                  The Devil's Wears Prada (2006)      1.9
10      123                   Spider-Man: Homecoming (2017)      4.6
11      123                 The Last of the Mohicans (1992)      3.6
12      123                              The Patriot (2000)      3.8
13      123  Harry Potter and the 

In [9]:
cm = pd.concat([cm, my_ratings], axis=0)

# rename the columns to userID, itemID, title, year, movie and rating
cm.columns = ['userID', 'itemID', 'rating', 'title', 'year', 'movie']

# use the transform method group by userID and count to keep the movies with more than 25 reviews
cm['reviews'] = cm.groupby(['itemID'])['rating'].transform('count')

cm = cm[cm.reviews>25][['userID', 'itemID', 'rating']]

In [10]:
# Using the python package: surprise
# Importing SVD, Reader, and Dataset
from surprise import SVD, Reader, Dataset

# Utilizing a model selection from surprise
# Importing cross_validate
from surprise.model_selection import cross_validate

In [11]:
# A reader is still needed but only the rating_scale param is required.
reader = Reader(rating_scale=(1, 5))

data = Dataset.load_from_df(cm, reader)

In [13]:
# get the list of the movie ids
unique_ids = cm['itemID'].unique()

# get the list of the ids that the userid 1001 has rated
iids1001 = cm.loc[cm['userID']==1001, 'itemID']

# remove the rated movies for the recommendations
movies_to_predict = np.setdiff1d(unique_ids,iids1001)

SVD (Singular Value Decomposition)

In [14]:
# Algorithm: SVD
algo = SVD()

# Fitting the data for the SVD
algo.fit(data.build_full_trainset())

my_recs = []

for iid in movies_to_predict:
    my_recs.append((iid, algo.predict(uid=1001,iid=iid).est))
    
pd.DataFrame(my_recs, columns=['iid', 'predictions']).sort_values('predictions', ascending=False).head(10)

Unnamed: 0,iid,predictions
171,"Close Shave, A (1995)",4.545516
657,Schindler's List (1993),4.528626
679,"Shawshank Redemption, The (1994)",4.514694
845,"Wrong Trousers, The (1993)",4.513155
143,Casablanca (1942),4.493591
821,Wallace & Gromit: The Best of Aardman Animatio...,4.486597
621,Rear Window (1954),4.417316
1,12 Angry Men (1957),4.404858
809,"Usual Suspects, The (1995)",4.391677
763,"Third Man, The (1949)",4.380057
