In [3]:
from os import path
import turicreate as tc
from datetime import datetime

In [8]:
data_dir = '/Users/asi.messica/Documents/BGU/ml-20m'

<h1> Load Data </h1> MovieLens dataset collected by the GroupLens Research Project at the University of Minnesota. For more information, see http://grouplens.org/datasets/movielens/

In [9]:
# Table of movies we are recommending: movieId, title, genres
items = tc.SFrame.read_csv(path.join(data_dir, 'movies.csv'))

# Table of interactions between users and items: userId, movieId, rating, timestamp
actions = tc.SFrame.read_csv(path.join(data_dir, 'ratings.csv'))


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[int,str,str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[int,int,float,int]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


<h1> Prepare Data </h1>

In [10]:
# Prepare the data by removing items that are rare
rare_items = actions.groupby('movieId', tc.aggregate.COUNT).sort('Count')
rare_items = rare_items[rare_items['Count'] <= 5]
items = items.filter_by(rare_items['movieId'], 'movieId', exclude=True)
actions = actions.filter_by(rare_items['movieId'], 'movieId', exclude=True)
actions['timestamp'] = actions['timestamp'].astype(datetime)

In [11]:
# Extract year, title, and genre
items['year'] = items['title'].apply(lambda x: x[-5:-1])
items['title'] = items['title'].apply(lambda x: x[:-7])
items['genres'] = items['genres'].apply(lambda x: x.split('|'))

<h1> Train the Model </h1>

In [14]:
training_data, validation_data = tc.recommender.util.random_split_by_user(actions, 'userId', 'movieId')
model = tc.recommender.create(training_data, 'userId', 'movieId')

<h1> Model Predict </h1>

In [15]:
results = model.recommend()

<h1> Find Similar Items </h1>

In [33]:
similar_items = model.get_similar_items()

In [34]:
print(similar_items)

+---------+---------+---------------------+------+
| movieId | similar |        score        | rank |
+---------+---------+---------------------+------+
|    2    |   367   |  0.3666577935218811 |  1   |
|    2    |   586   |  0.3523901104927063 |  2   |
|    2    |   500   | 0.34584909677505493 |  3   |
|    2    |   364   |  0.3412216305732727 |  4   |
|    2    |   208   | 0.30882930755615234 |  5   |
|    2    |   588   |  0.3032993674278259 |  6   |
|    2    |   377   |  0.3014238476753235 |  7   |
|    2    |    19   | 0.29971665143966675 |  8   |
|    2    |   153   | 0.29792654514312744 |  9   |
|    2    |   595   |  0.2974637746810913 |  10  |
+---------+---------+---------------------+------+
[175190 rows x 4 columns]
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.


In [24]:
print(items)

+---------+-----------------------------+--------------------------------+------+
| movieId |            title            |             genres             | year |
+---------+-----------------------------+--------------------------------+------+
|    1    |          Toy Story          | [Adventure, Animation, Chi...  | 1995 |
|    2    |           Jumanji           | [Adventure, Children, Fantasy] | 1995 |
|    3    |       Grumpier Old Men      |       [Comedy, Romance]        | 1995 |
|    4    |      Waiting to Exhale      |    [Comedy, Drama, Romance]    | 1995 |
|    5    | Father of the Bride Part II |            [Comedy]            | 1995 |
|    6    |             Heat            |   [Action, Crime, Thriller]    | 1995 |
|    7    |           Sabrina           |       [Comedy, Romance]        | 1995 |
|    8    |         Tom and Huck        |     [Adventure, Children]      | 1995 |
|    9    |         Sudden Death        |            [Action]            | 1995 |
|    10   |     