#  Two types of Recommender System are performed here:
# 1- popularity based: use meta data such as genres, producer and actor to recommend movies.
# 2- Collaborative Filtering: the behavior of a group of users is used to make a recommendations to other users.
# There are two categories of Collaborative Filtering:
# 1- User-based: In this model products are recommended to a user based on the fact that the products have been liked by users similar to the user.

# 2- Item-based: This system identifies similar items based on user's previous ratings.


In [1]:
import pandas as pd
import numpy as np
import graphlab
import graphlab as gl
from graphlab import SFrame
import matplotlib as mpl
mpl.use('TkAgg')
from matplotlib import pyplot as plt

In [2]:

col_names = ["user_id", "item_id", "rating", "timestamp"]
data = pd.read_table("u.data", names=col_names)
data = data.drop("timestamp", 1)
data.info()
plt.hist(data["rating"])
plt.show()
Number_Ratings = len(data)
Number_Movies = len(np.unique(data["item_id"]))
Number_Users = len(np.unique(data["user_id"]))
print("__________________________________________________")
print(Number_Ratings)
print(Number_Movies)
print(Number_Users)
print("__________________________________________________")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 3 columns):
user_id    100000 non-null int64
item_id    100000 non-null int64
rating     100000 non-null int64
dtypes: int64(3)
memory usage: 2.3 MB
__________________________________________________
100000
1682
943
__________________________________________________


# Here we splite the data into train and test, train gets 70% of the data and test gets 30% of the data.

In [3]:
from sklearn.cross_validation import train_test_split
train, test = train_test_split(data, test_size = 0.3)

# Here I'm using SFrame to scale the data.
# SFrame is a tabular, column-mutable dataframe object that can scale to big data, which allows you to work with datasets that are larger than the amount of RAM on your system

In [4]:
import pandas
df = pandas.DataFrame()
sf = SFrame(data=df)
sf = SFrame(data='u.csv')

This non-commercial license of GraphLab Create for academic use is assigned to 215028172@student.kfu.edu.sa and will expire on November 06, 2019.


[INFO] graphlab.cython.cy_server: GraphLab Create v2.1 started. Logging: C:\Users\Acer\AppData\Local\Temp\graphlab_server_1544488322.log.0


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


# In popularity when no target column is provided, the popularity is determined by the number of observations involving each item. When a target is provided, popularity is computed using the item’s mean target value

In [5]:
print("___________________popularity___________________")
   
popularity_model=graphlab.recommender.popularity_recommender.create(SFrame(train), user_id='user_id',
item_id='item_id', target='rating', user_data=None, item_data=None, random_seed=0, 
verbose=True)
popularity_recomm = popularity_model.recommend(users=[1,2,3,4,5],k=5)
popularity_recomm.print_rows(num_rows=25)

___________________popularity___________________


+---------+---------+-------+------+
| user_id | item_id | score | rank |
+---------+---------+-------+------+
|    1    |   1491  |  5.0  |  1   |
|    1    |   1189  |  5.0  |  2   |
|    1    |   1653  |  5.0  |  3   |
|    1    |   1358  |  5.0  |  4   |
|    1    |   119   |  5.0  |  5   |
|    2    |   1491  |  5.0  |  1   |
|    2    |   1189  |  5.0  |  2   |
|    2    |   1653  |  5.0  |  3   |
|    2    |   1358  |  5.0  |  4   |
|    2    |   119   |  5.0  |  5   |
|    3    |   1491  |  5.0  |  1   |
|    3    |   1189  |  5.0  |  2   |
|    3    |   1653  |  5.0  |  3   |
|    3    |   1358  |  5.0  |  4   |
|    3    |   119   |  5.0  |  5   |
|    4    |   1491  |  5.0  |  1   |
|    4    |   1189  |  5.0  |  2   |
|    4    |   1653  |  5.0  |  3   |
|    4    |   1358  |  5.0  |  4   |
|    4    |   119   |  5.0  |  5   |
|    5    |   1491  |  5.0  |  1   |
|    5    |   1189  |  5.0  |  2   |
|    5    |   1653  |  5.0  |  3   |
|    5    |   1358  |  5.0  |  4   |
|

# The idea behind factorization is to represent users and items in a lower dimensional latent space, by using matrix factorization as well as factorization machines models

In [6]:
print("_________________facrorization____________________")
factorization_model=graphlab.recommender.factorization_recommender.create(SFrame(train), user_id='user_id',
item_id='item_id', target='rating', user_data=None, item_data=None, num_factors=8,
regularization=1e-08, linear_regularization=1e-10, side_data_factorization=True,
nmf=False, binary_target=False, max_iterations=50, sgd_step_size=0,
random_seed=0, solver='auto', verbose=True)

factorization_recomm = factorization_model.recommend(users=[1,2,3,4,5],k=5)
factorization_recomm.print_rows(num_rows=25)

_________________facrorization____________________


+---------+---------+---------------+------+
| user_id | item_id |     score     | rank |
+---------+---------+---------------+------+
|    1    |   647   | 5.88846325595 |  1   |
|    1    |   1558  | 5.76875233371 |  2   |
|    1    |   1449  | 5.29550504405 |  3   |
|    1    |   169   | 5.22946226317 |  4   |
|    1    |   408   | 5.19984125811 |  5   |
|    2    |   1142  | 5.22631316073 |  1   |
|    2    |   963   | 5.20112697966 |  2   |
|    2    |   1512  | 5.19474129565 |  3   |
|    2    |   1449  | 5.18102841265 |  4   |
|    2    |   135   | 5.11270611174 |  5   |
|    3    |   525   | 5.18382458109 |  1   |
|    3    |   1167  |  5.0968926372 |  2   |
|    3    |   493   | 5.09543780703 |  3   |
|    3    |   647   | 5.08862523455 |  4   |
|    3    |   165   | 5.02265958208 |  5   |
|    4    |   1512  | 6.85721234698 |  1   |
|    4    |   1558  | 6.82097010035 |  2   |
|    4    |   641   | 6.69114617724 |  3   |
|    4    |   135   | 6.61692289728 |  4   |
|    4    

# Item- similarity is used to create a recommender based on users in common.
# it uses jaccard similarity type.
# jaccard is used to measure the similarity between two set of elements
![](jaccard.png)

In [7]:

print("___________________item similarity_____________________")
item_similarity_model = graphlab.item_similarity_recommender.create(SFrame(train), user_id='user_id', item_id='item_id',
                                                                    target='rating', similarity_type='jaccard')

#Making recommendations
item_similarity_recomm = item_similarity_model.recommend(users=[1,2,3,4,5],k=5)
item_similarity_recomm.print_rows(num_rows=25)

___________________item similarity_____________________


+---------+---------+-----------------+------+
| user_id | item_id |      score      | rank |
+---------+---------+-----------------+------+
|    1    |   265   | 0.0966830197926 |  1   |
|    1    |   423   | 0.0956051677138 |  2   |
|    1    |   566   | 0.0952553369187 |  3   |
|    1    |    82   | 0.0940276128905 |  4   |
|    1    |   204   | 0.0915050997839 |  5   |
|    2    |   117   |  0.109276761842 |  1   |
|    2    |   121   |  0.106949445813 |  2   |
|    2    |    7    |  0.106531629729 |  3   |
|    2    |    1    | 0.0974355492481 |  4   |
|    2    |   118   | 0.0944340353788 |  5   |
|    3    |   333   | 0.0938007673796 |  1   |
|    3    |   307   | 0.0928900715183 |  2   |
|    3    |   328   |  0.09112144919  |  3   |
|    3    |   258   | 0.0862946036984 |  4   |
|    3    |   678   | 0.0832795132609 |  5   |
|    4    |   333   | 0.0971478765661 |  1   |
|    4    |   300   | 0.0941385843537 |  2   |
|    4    |    7    | 0.0871936949817 |  3   |
|    4    |  