In [1]:
import graphlab
graphlab.canvas.set_target('ipynb')

In [2]:
training_data = graphlab.SFrame.read_csv("ratings.csv",
                                         column_type_hints={"rating":int})

model = graphlab.recommender.create(training_data, user_id="userId", item_id="movieId")

[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: /tmp/graphlab_server_1491922569.log


This non-commercial license of GraphLab Create for academic use is assigned to hbharti@umassd.edu and will expire on January 21, 2018.


In [3]:
model_cosine = graphlab.item_similarity_recommender.create(training_data, user_id="userId", item_id="movieId", target="rating",
                                                           similarity_type="cosine")

In [4]:
query_data = graphlab.SFrame.read_csv("ratings.csv", column_type_hints={"rating":int})
query_data.show()

In [5]:
query_result = model.predict(query_data)
query_result.head()

dtype: float
Rows: 10
[0.0, 0.0, 0.0, 0.09561010897159576, 0.011333334445953368, 0.04767906069755554, 0.024145299196243288, 0.037888890504837035, 0.06853834092617035, 0.03170411288738251]

In [6]:

# Scale the results to be on the same scale as the original ratings
scaled_result = (query_result - query_result.min())/(query_result.max() - query_result.min()) * query_data['rating'].max()
scaled_result.head()

dtype: float
Rows: 10
[0.0, 0.0, 0.0, 1.1290221863325245, 0.1338308906060273, 0.5630232820572095, 0.28512234513082946, 0.4474150114000311, 0.8093423211457434, 0.3743814041513098]

In [7]:
recommend_result = model.recommend(users=None, k=5)
recommend_result.head()

userId,movieId,score,rank
1,2915,0.115134230256,1
1,1282,0.105253386497,2
1,1266,0.104882827401,3
1,2194,0.104440882802,4
1,1276,0.0982173681259,5
2,597,0.233024559523,1
2,380,0.202737523537,2
2,344,0.185276264423,3
2,595,0.179132772904,4
2,231,0.175667363562,5


In [9]:
training_subset, validation_subset = graphlab.recommender.util.random_split_by_user(training_data,
                                                                                    user_id="userId", item_id="movieId",
                                                                                    max_num_users=100, item_test_proportion=0.3)

In [10]:
model = graphlab.recommender.create(training_subset, user_id="userId", item_id="movieId", target="rating")

In [11]:
rmse_results = model.evaluate(validation_subset)



Precision and recall summary statistics by cutoff
+--------+----------------+------------------+
| cutoff | mean_precision |   mean_recall    |
+--------+----------------+------------------+
|   1    |      0.23      | 0.00756728697237 |
|   2    |     0.255      |  0.016514885998  |
|   3    | 0.246666666667 | 0.0269482838084  |
|   4    |     0.2175     | 0.0331238513565  |
|   5    |     0.208      | 0.0398244466642  |
|   6    | 0.208333333333 | 0.0505103028069  |
|   7    | 0.202857142857 | 0.0553536406073  |
|   8    |      0.2       | 0.0643181981469  |
|   9    | 0.196666666667 | 0.0677979157166  |
|   10   |      0.19      | 0.0714384758895  |
+--------+----------------+------------------+
[10 rows x 3 columns]

('\nOverall RMSE: ', 0.9947850868742732)

Per User RMSE (best)
+--------+-------+----------------+
| userId | count |      rmse      |
+--------+-------+----------------+
|  181   |   7   | 0.419303934797 |
+--------+-------+----------------+
[1 rows x 3 columns]


Pe

In [12]:
rmse_results['rmse_by_item'].show()


In [14]:
rmse_results['rmse_by_user'].show()
rmse_results

{'precision_recall_by_user': Columns:
 	userId	int
 	cutoff	int
 	precision	float
 	recall	float
 	count	int
 
 Rows: 1800
 
 Data:
 +--------+--------+-----------+--------+-------+
 | userId | cutoff | precision | recall | count |
 +--------+--------+-----------+--------+-------+
 |   1    |   1    |    0.0    |  0.0   |   6   |
 |   1    |   2    |    0.0    |  0.0   |   6   |
 |   1    |   3    |    0.0    |  0.0   |   6   |
 |   1    |   4    |    0.0    |  0.0   |   6   |
 |   1    |   5    |    0.0    |  0.0   |   6   |
 |   1    |   6    |    0.0    |  0.0   |   6   |
 |   1    |   7    |    0.0    |  0.0   |   6   |
 |   1    |   8    |    0.0    |  0.0   |   6   |
 |   1    |   9    |    0.0    |  0.0   |   6   |
 |   1    |   10   |    0.0    |  0.0   |   6   |
 +--------+--------+-----------+--------+-------+
 [1800 rows x 5 columns]
 Note: Only the head of the SFrame is printed.
 You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.,
 'precision_

In [13]:
model2 = graphlab.ranking_factorization_recommender.create(training_subset, user_id="userId", item_id="movieId")
precision_recall_results = model2.evaluate(validation_subset)


Precision and recall summary statistics by cutoff
+--------+----------------+------------------+
| cutoff | mean_precision |   mean_recall    |
+--------+----------------+------------------+
|   1    |      0.24      | 0.00599170505285 |
|   2    |     0.235      | 0.0168678657146  |
|   3    |      0.23      |  0.024776117564  |
|   4    |     0.215      | 0.0325161383251  |
|   5    |     0.202      | 0.0355875558969  |
|   6    | 0.191666666667 | 0.0385272215856  |
|   7    |      0.18      | 0.0433203715991  |
|   8    |     0.1775     | 0.0494925641127  |
|   9    | 0.174444444444 | 0.0536338832024  |
|   10   |     0.166      | 0.0549939505559  |
+--------+----------------+------------------+
[10 rows x 3 columns]



###### [WARNING] Model trained without a target. Skipping RMSE computation.
