In [1]:
import numpy as np
import pandas as pd

# Import User Rating

In [2]:
df = pd.read_csv("data/collaborative_filtering.csv")
df.head()

Unnamed: 0,userId,movie,rating
0,1,One Flew Over the Cuckoo's Nest (1975),5
1,1,James and the Giant Peach (1996),3
2,1,My Fair Lady (1964),3
3,1,Erin Brockovich (2000),4
4,1,"Bug's Life, A (1998)",5


In [3]:
df.userId.nunique() # Cek banyaknya userID

6040

# Training

In [4]:
from surprise import Reader, Dataset, SVD
from surprise.model_selection import cross_validate

In [7]:
data = Dataset.load_from_df(df=df, reader=Reader())
trainset = data.build_full_trainset()

In [8]:
model = SVD()
model.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x226a87c1208>

In [9]:
model.predict(1, "My Fair Lady (1964)")

Prediction(uid=1, iid='My Fair Lady (1964)', r_ui=None, est=4.130565014795006, details={'was_impossible': False})

# Predicted Rating for a Film Not Watched

In [10]:
user_id = 1

## All Movies

In [11]:
all_movies = df.movie.unique()
len(all_movies) # Untuk mengecek ada berapa jumlah film

3706

## Movie Ratings by User 1

In [13]:
watched = df[df.userId == user_id].movie
watched.head()

0    One Flew Over the Cuckoo's Nest (1975)
1          James and the Giant Peach (1996)
2                       My Fair Lady (1964)
3                    Erin Brockovich (2000)
4                      Bug's Life, A (1998)
Name: movie, dtype: object

## Movie not yet Rated by User 1

In [14]:
not_watched = [movie for movie in all_movies if movie not in watched]
not_watched

["One Flew Over the Cuckoo's Nest (1975)",
 'James and the Giant Peach (1996)',
 'My Fair Lady (1964)',
 'Erin Brockovich (2000)',
 "Bug's Life, A (1998)",
 'Princess Bride, The (1987)',
 'Ben-Hur (1959)',
 'Christmas Story, A (1983)',
 'Snow White and the Seven Dwarfs (1937)',
 'Wizard of Oz, The (1939)',
 'Beauty and the Beast (1991)',
 'Gigi (1958)',
 'Miracle on 34th Street (1947)',
 "Ferris Bueller's Day Off (1986)",
 'Sound of Music, The (1965)',
 'Airplane! (1980)',
 'Tarzan (1999)',
 'Bambi (1942)',
 'Awakenings (1990)',
 'Big (1988)',
 'Pleasantville (1998)',
 'Wallace & Gromit: The Best of Aardman Animation (1996)',
 'Back to the Future (1985)',
 "Schindler's List (1993)",
 'Meet Joe Black (1998)',
 'Pocahontas (1995)',
 'E.T. the Extra-Terrestrial (1982)',
 'Titanic (1997)',
 'Ponette (1996)',
 'Close Shave, A (1995)',
 'Antz (1998)',
 'Girl, Interrupted (1999)',
 'Hercules (1997)',
 'Aladdin (1992)',
 'Mulan (1998)',
 'Hunchback of Notre Dame, The (1996)',
 'Last Days of Di

## Prediction for a Movies not yet rated

In [22]:
score_flt = [model.predict(user_id, movie).est for movie in not_watched] # Dalam bentuk float
score_flt

[4.687868410426526,
 3.4675226694002435,
 4.130565014795006,
 4.001890828742495,
 4.38525031524848,
 4.220676768341943,
 4.674427418348092,
 4.821455009549385,
 4.274016339176491,
 4.532729458603807,
 4.396107508369912,
 3.857930664348562,
 4.180841296657657,
 4.108309763604247,
 4.575254462160095,
 4.357505863540965,
 3.9014651974600665,
 4.504571099440368,
 4.141277217991474,
 4.105957670619834,
 3.5672587525488018,
 4.002085302529788,
 4.435393151970625,
 5,
 3.3699053800231584,
 4.045616795646602,
 4.601651885662026,
 4.051710685965245,
 4.157551428035983,
 4.223365039965746,
 3.5892059599612978,
 3.709959216775112,
 3.684662267737146,
 4.061089333288576,
 3.989170118823787,
 3.6850543410496206,
 3.8575678240435987,
 4.388044913193573,
 4.219779304735531,
 4.298021705581862,
 4.4258869234330005,
 4.354245904451898,
 4.199394164617494,
 3.7525153961456503,
 4.586060056550961,
 4.3370134303041485,
 4.66540756050605,
 4.715087216795421,
 4.611650482859303,
 4.101153960795377,
 4.36021

In [19]:
score_int = [int(model.predict(user_id, movie).est) for movie in not_watched] # Bulatkan menjadi Integer
score_int

[4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 3,
 4,
 4,
 5,
 3,
 4,
 4,
 4,
 4,
 4,
 3,
 3,
 3,
 4,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 3,
 4,
 3,
 4,
 3,
 4,
 4,
 3,
 3,
 4,
 3,
 3,
 4,
 4,
 3,
 2,
 3,
 3,
 4,
 4,
 4,
 3,
 3,
 4,
 4,
 3,
 4,
 4,
 3,
 4,
 3,
 3,
 3,
 3,
 4,
 3,
 3,
 3,
 4,
 4,
 3,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 4,
 3,
 3,
 4,
 4,
 4,
 3,
 4,
 3,
 3,
 3,
 3,
 3,
 4,
 3,
 4,
 4,
 3,
 3,
 4,
 4,
 4,
 2,
 2,
 3,
 2,
 4,
 4,
 4,
 3,
 2,
 4,
 4,
 3,
 4,
 4,
 2,
 3,
 3,
 4,
 3,
 4,
 3,
 4,
 4,
 4,
 3,
 3,
 4,
 3,
 4,
 4,
 3,
 2,
 4,
 3,
 3,
 3,
 3,
 4,
 4,
 3,
 4,
 4,
 4,
 3,
 4,
 4,
 3,
 3,
 3,
 3,
 2,
 2,
 3,
 3,
 3,
 3,
 4,
 3,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 2,
 4,
 4,
 3,
 4,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 3,
 3,
 4,
 3,
 4,
 4,
 4,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 3,
 3,
 3,
 4,
 3,
 3,
 4,
 3,
 3,
 4,
 3,
 4,
 3,
 3,
 1,
 4,
 3,
 4,
 4,
 4,
 3,
 3,
 4,
