In [1]:
import turicreate as tc
import numpy as np
import pandas as pd
import sklearn.model_selection as sk_ms

## Prepare Data for Recommender

In [23]:
r_cols = ['rating_id', 'dish_id', 'user_id', 'rating', 'review']

ratings = pd.read_csv('rating.csv', sep=',', names=r_cols, encoding='latin-1')

In [24]:
ratings.head()

Unnamed: 0,rating_id,dish_id,user_id,rating,review
0,rating_id,dish_id,user_id,rating,review
1,1,35,1,4,
2,2,12,1,4,
3,3,50,1,2,
4,4,4,1,3,


In [34]:
#ratings = ratings.drop(0, axis=0)
ratings = ratings.drop(['rating_id','review'], axis=1)
ratings.head()

Unnamed: 0,dish_id,user_id,rating
1,35,1,4
2,12,1,4
3,50,1,2
4,4,1,3
5,9,1,3


In [46]:
ratings['dish_id'] = ratings['dish_id'].astype(int)
ratings['user_id'] = ratings['user_id'].astype(int)
ratings['rating'] = ratings['rating'].astype(int)

In [47]:
rating_train, rating_test = sk_ms.train_test_split(ratings, test_size=0.2, random_state=42, shuffle=True)

In [48]:
rating_train.head()

Unnamed: 0,dish_id,user_id,rating
37,46,4,3
32,27,4,5
9,14,1,3
18,27,2,1
7,44,1,1


In [49]:
rating_test.head()

Unnamed: 0,dish_id,user_id,rating
1,35,1,4
6,39,1,0
35,46,4,1
14,34,2,2
45,15,5,4


## Put Data into Turicreate Popularity Recommender

In [50]:
rating_train.shape, rating_test.shape

((46, 3), (12, 3))

In [51]:
train_data = tc.SFrame(rating_train)
test_data = tc.SFrame(rating_test)

In [52]:
popularity_model = tc.popularity_recommender.create(train_data, user_id='user_id', item_id='dish_id', target='rating')


## Basic Popularity Model

In [57]:
popularity_recomm = popularity_model.recommend(users=[1,2,3,4,5,6],k=5)
popularity_recomm.print_rows(num_rows=30)

+---------+---------+-------+------+
| user_id | dish_id | score | rank |
+---------+---------+-------+------+
|    1    |    33   |  5.0  |  1   |
|    1    |    36   |  5.0  |  2   |
|    1    |    47   |  4.2  |  3   |
|    1    |    34   |  4.0  |  4   |
|    1    |    46   |  4.0  |  5   |
|    2    |    33   |  5.0  |  1   |
|    2    |    36   |  5.0  |  2   |
|    2    |    47   |  4.2  |  3   |
|    2    |    12   |  4.0  |  4   |
|    2    |    34   |  4.0  |  5   |
|    3    |    33   |  5.0  |  1   |
|    3    |    36   |  5.0  |  2   |
|    3    |    12   |  4.0  |  3   |
|    3    |    34   |  4.0  |  4   |
|    3    |    46   |  4.0  |  5   |
|    4    |    33   |  5.0  |  1   |
|    4    |    36   |  5.0  |  2   |
|    4    |    47   |  4.2  |  3   |
|    4    |    12   |  4.0  |  4   |
|    4    |    50   |  3.5  |  5   |
|    5    |    12   |  4.0  |  1   |
|    5    |    34   |  4.0  |  2   |
|    5    |    50   |  3.5  |  3   |
|    5    |    38   |  3.5  |  4   |
|

## Collaborative Filtering Model 

In [59]:
item_sim_model = tc.item_similarity_recommender.create(train_data, user_id='user_id', item_id='dish_id', target='rating', similarity_type='cosine')

In [62]:
#Making recommendations
item_sim_recomm = item_sim_model.recommend(users=[1,2,3,4,5,6],k=5)
item_sim_recomm.print_rows(num_rows=30)

+---------+---------+---------------------+------+
| user_id | dish_id |        score        | rank |
+---------+---------+---------------------+------+
|    1    |    26   |  0.503007709980011  |  1   |
|    1    |    2    |  0.503007709980011  |  2   |
|    1    |    38   |  0.4024061645780291 |  3   |
|    1    |    27   |  0.3568362849099295 |  4   |
|    1    |    34   |  0.2633015087672642 |  5   |
|    2    |    40   |  0.548105640070779  |  1   |
|    2    |    4    |  0.548105640070779  |  2   |
|    2    |    48   |  0.548105640070779  |  3   |
|    2    |    10   |  0.5214931283678327 |  4   |
|    2    |    12   |  0.5030077355248588 |  5   |
|    3    |    7    |  0.6721321940422058 |  1   |
|    3    |    36   |  0.6721321940422058 |  2   |
|    3    |    39   |  0.6721321940422058 |  3   |
|    3    |    35   |  0.6030445439474923 |  4   |
|    3    |    2    |  0.5481056571006775 |  5   |
|    4    |    33   |  0.3215598464012146 |  1   |
|    4    |    14   | 0.2596861

## Convert Data into correct CSV format for Mongo 