In [None]:
!pip install turicreate



In [None]:
# link of turicreate library https://github.com/apple/turicreate
import turicreate as tc # Apple open source library Turi Create that simplifies the development of custom machine learning models.
import pandas as pd

Download our data (OPTIONAL, can skip if running locally)

In [None]:
import gdown
movies_url_raw = 'https://drive.google.com/file/d/1ToNy3YV2djIjh0TGSjnOzagoIvvOu-M2/view?usp=sharing'
movies_url     ='https://drive.google.com/uc?id=' + movies_url_raw.split('/')[-2] #Do not change
gdown.download(movies_url, 'movies.csv',quiet=False) #Name of the file

ratings_raw_url = 'https://drive.google.com/file/d/1I4T-HUkFammZxMtizTMHSCctF4BFq3ug/view?usp=sharing'
ratings_url     ='https://drive.google.com/uc?id=' + ratings_raw_url.split('/')[-2] #Do not change
gdown.download(ratings_url, 'ratings.csv',quiet=False) #Name of the file

Downloading...
From: https://drive.google.com/uc?id=1ToNy3YV2djIjh0TGSjnOzagoIvvOu-M2
To: /content/movies.csv
100%|██████████| 494k/494k [00:00<00:00, 3.94MB/s]
Downloading...
From: https://drive.google.com/uc?id=1I4T-HUkFammZxMtizTMHSCctF4BFq3ug
To: /content/ratings.csv
2.48MB [00:00, 14.5MB/s]


'ratings.csv'

## Data Preprocessing

In [None]:
df_movie = pd.read_csv('movies.csv')
df_movie.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [None]:
df_ratings = pd.read_csv('ratings.csv')
df_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [None]:
df_ratings = df_ratings.merge(df_movie[['movieId', 'title']])
df_ratings.head()

Unnamed: 0,userId,movieId,rating,timestamp,title
0,1,1,4.0,964982703,Toy Story (1995)
1,5,1,4.0,847434962,Toy Story (1995)
2,7,1,4.5,1106635946,Toy Story (1995)
3,15,1,2.5,1510577970,Toy Story (1995)
4,17,1,4.5,1305696483,Toy Story (1995)


In [None]:
train_data = tc.SFrame(df_ratings)  # Data needs to be converted to SFrame format

In [None]:
# Used for quick visualisation and data exploration
train_data.show()

## Train/Test Split

In [None]:
# 80/20 Train/Test Split
train, test = tc.recommender.util.random_split_by_user(train_data, user_id='userId', item_id='movieId', item_test_proportion=0.2)

In [None]:
len(train), len(test)

(80673, 20163)

# Item-Item Similarity Recommender

In [None]:
# training the model
item_similarity_model = tc.item_similarity_recommender.create(train, user_id='userId', item_id='movieId', target='rating', similarity_type='cosine')

In [None]:
# making top 10 recommendations for the first three users
item_similarity_recomm = item_similarity_model.recommend(users=[1,2,3], k=10)
item_similarity_recomm.print_rows(num_rows=30)

+--------+---------+---------------------+------+
| userId | movieId |        score        | rank |
+--------+---------+---------------------+------+
|   1    |   2918  |  0.4987977915688565 |  1   |
|   1    |   1291  | 0.43049490608667074 |  2   |
|   1    |   2028  | 0.41527415106171056 |  3   |
|   1    |   1200  |  0.3773884224264245 |  4   |
|   1    |   2762  |  0.3201700618392543 |  5   |
|   1    |   1968  | 0.31893454664631893 |  6   |
|   1    |   1527  | 0.31769391110068873 |  7   |
|   1    |   593   | 0.31206613057538085 |  8   |
|   1    |   589   |  0.3069003149082786 |  9   |
|   1    |   1198  | 0.30325944204079475 |  10  |
|   2    |  91529  |   1.00496429475871  |  1   |
|   2    |   2959  |  0.8507485769011758 |  2   |
|   2    |  44191  |  0.7872977121309801 |  3   |
|   2    |   7438  |  0.7787007852034136 |  4   |
|   2    |   7153  |  0.7660644514994188 |  5   |
|   2    |  109374 |  0.7513571923429315 |  6   |
|   2    |  33794  |  0.7301526015455072 |  7   |


In [None]:
# Item-Item Similarity Recommender Performance
item_similarity_model_performance = item_similarity_model.evaluate(test)


Precision and recall summary statistics by cutoff
+--------+---------------------+----------------------+
| cutoff |    mean_precision   |     mean_recall      |
+--------+---------------------+----------------------+
|   1    |  0.3655737704918034 | 0.024065794247031827 |
|   2    |  0.3237704918032787 | 0.039910412662857206 |
|   3    | 0.30109289617486334 | 0.05306716833191344  |
|   4    | 0.28524590163934427 | 0.06458541885411224  |
|   5    | 0.27803278688524563 | 0.07929270200303598  |
|   6    | 0.27103825136612025 | 0.09235123805461669  |
|   7    |  0.2653395784543324 | 0.10501288977634035  |
|   8    |  0.257172131147541  | 0.11569347192215351  |
|   9    | 0.25336976320582905 | 0.12680733033979547  |
|   10   | 0.24688524590163943 | 0.13657947357699043  |
+--------+---------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 3.561270964397038

Per User RMSE (best)
+--------+--------------------+-------+
| userId |        rmse        | count |
+------

In [None]:
# The RMSE value around 3.56 for item-item similarity recommender
item_similarity_model_performance['rmse_overall']

3.561270964397038

In [None]:
# Making top 10 recommendation for user 1 and store in dataframe 
user1_recomm = item_similarity_model.recommend(users=[1], k=10)
df_user1_recomm = user1_recomm.to_dataframe()

In [None]:
df_user1_recomm = df_user1_recomm.merge(df_movie[['movieId', 'title']])

In [None]:
df_user1_recomm

Unnamed: 0,userId,movieId,score,rank,title
0,1,2918,0.498798,1,Ferris Bueller's Day Off (1986)
1,1,1291,0.430495,2,Indiana Jones and the Last Crusade (1989)
2,1,2028,0.415274,3,Saving Private Ryan (1998)
3,1,1200,0.377388,4,Aliens (1986)
4,1,2762,0.32017,5,"Sixth Sense, The (1999)"
5,1,1968,0.318935,6,"Breakfast Club, The (1985)"
6,1,1527,0.317694,7,"Fifth Element, The (1997)"
7,1,593,0.312066,8,"Silence of the Lambs, The (1991)"
8,1,589,0.3069,9,Terminator 2: Judgment Day (1991)
9,1,1198,0.303259,10,Raiders of the Lost Ark (Indiana Jones and the...


# Matrix Factorization Recommender Collaborative Filtering

### Build a Model Based or Matrix Based Factorization Recommender 

In [None]:
# training the model
factorization_model = tc.factorization_recommender.create(train, user_id='userId', item_id='movieId', target='rating')

In [None]:
# making top 10 recommendations for the first three users
factorization_recomm = factorization_model.recommend(users=[1,2,3],k=10)
factorization_recomm.print_rows(num_rows=25)

+--------+---------+--------------------+------+
| userId | movieId |       score        | rank |
+--------+---------+--------------------+------+
|   1    |   932   | 4.814048570596123  |  1   |
|   1    |   3266  | 4.800309699975395  |  2   |
|   1    |   3836  | 4.741456133328819  |  3   |
|   1    |   3543  | 4.716321942411804  |  4   |
|   1    |   1237  | 4.711041999064827  |  5   |
|   1    |   1235  | 4.6960240723718645 |  6   |
|   1    |   1172  | 4.691343289815331  |  7   |
|   1    |  58559  | 4.690562603079224  |  8   |
|   1    |   1446  | 4.678267058931732  |  9   |
|   1    |   1251  | 4.676683467828179  |  10  |
|   2    |  44195  | 3.939887334607983  |  1   |
|   2    |   5992  |  3.91655277182951  |  2   |
|   2    |   951   | 3.9055243479241373 |  3   |
|   2    |   2318  | 3.9027228029598238 |  4   |
|   2    |  26131  | 3.8925151275624277 |  5   |
|   2    |   5008  | 3.879494511537695  |  6   |
|   2    |   3972  | 3.877348144554043  |  7   |
|   2    |   1213  |

In [None]:
# Matrix Factorization Recommender Performance
factorization_model_performance = factorization_model.evaluate(test)


Precision and recall summary statistics by cutoff
+--------+----------------------+-----------------------+
| cutoff |    mean_precision    |      mean_recall      |
+--------+----------------------+-----------------------+
|   1    | 0.01147540983606558  | 0.0008647283647283646 |
|   2    | 0.013114754098360656 |  0.001155658171456132 |
|   3    | 0.01693989071038252  | 0.0020181397819620662 |
|   4    | 0.017213114754098365 |  0.002761483196831453 |
|   5    | 0.018032786885245903 | 0.0030599899649712127 |
|   6    | 0.017213114754098365 |  0.003273135704613999 |
|   7    | 0.01662763466042155  | 0.0034998354043258746 |
|   8    | 0.015778688524590165 | 0.0036419554529689402 |
|   9    | 0.016029143897996343 |  0.00439066802471688  |
|   10   | 0.01655737704918032  |  0.005341130264706376 |
+--------+----------------------+-----------------------+
[10 rows x 3 columns]


Overall RMSE: 0.9316670165775767

Per User RMSE (best)
+--------+--------------------+-------+
| userId |        

In [None]:
# The RMSE value around 0.93 for matrix factorization recommender
factorization_model_performance['rmse_overall']

0.9316670165775767

In [None]:
# Making top 10 recommendation for user 1 and store in dataframe 
user1_recomm_fact_model = factorization_model.recommend(users=[1], k=10)
df_user1_recomm_fact_model = user1_recomm_fact_model.to_dataframe()

In [None]:
df_user1_recomm_fact_model = df_user1_recomm_fact_model.merge(df_movie[['movieId', 'title']])

In [None]:
df_user1_recomm_fact_model

Unnamed: 0,userId,movieId,score,rank,title
0,1,932,4.814049,1,"Affair to Remember, An (1957)"
1,1,3266,4.80031,2,Man Bites Dog (C'est arrivé près de chez vous)...
2,1,3836,4.741456,3,Kelly's Heroes (1970)
3,1,3543,4.716322,4,Diner (1982)
4,1,1237,4.711042,5,"Seventh Seal, The (Sjunde inseglet, Det) (1957)"
5,1,1235,4.696024,6,Harold and Maude (1971)
6,1,1172,4.691343,7,Cinema Paradiso (Nuovo cinema Paradiso) (1989)
7,1,58559,4.690563,8,"Dark Knight, The (2008)"
8,1,1446,4.678267,9,Kolya (Kolja) (1996)
9,1,1251,4.676683,10,8 1/2 (8½) (1963)


# Compare both Models

In [None]:
model_performance = tc.recommender.util.compare_models(test, [item_similarity_model, factorization_model])

PROGRESS: Evaluate model M0

Precision and recall summary statistics by cutoff
+--------+---------------------+----------------------+
| cutoff |    mean_precision   |     mean_recall      |
+--------+---------------------+----------------------+
|   1    |  0.3655737704918033 | 0.02406579424703182  |
|   2    |  0.3237704918032787 | 0.039910412662857206 |
|   3    |  0.3010928961748634 | 0.05306716833191342  |
|   4    |  0.2852459016393444 | 0.06458541885411224  |
|   5    | 0.27803278688524585 | 0.07929270200303598  |
|   6    |  0.2710382513661201 |  0.0923512380546167  |
|   7    |  0.2653395784543325 | 0.10501288977634035  |
|   8    |  0.257172131147541  |  0.1156934719221535  |
|   9    |  0.2533697632058289 |  0.1268073303397955  |
|   10   | 0.24688524590163938 | 0.13657947357699035  |
+--------+---------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 3.561270964397038

Per User RMSE (best)
+--------+--------------------+-------+
| userId |        r

**Matrix Factorization recommender gave a better RMSE of 0.93 than item-item similarity recommender gave RMSE of 3.56**