In [None]:
from os import path
import turicreate as tc
from sklearn.metrics import mean_absolute_error
from datetime import datetime

data_dir = '/Users/sivanr/PycharmProjects/eCommerce/ml-100k'

r_cols = ['user_id', 'movie_id', 'rating', 'timestamp']
ratings_base = tc.SFrame.read_csv(path.join(data_dir, 'u1.base'), delimiter='\t',  header=False)
ratings_test = tc.SFrame.read_csv(path.join(data_dir, 'u1.test'), delimiter='\t',  header=False)

m_cols = ['movie_id', 'movie_title', 'release_date', 'video_release_date',
          'IMDb_URL', 'unknown', 'Action', 'Adventure', 'Animation', 'Children',
          'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir',
          'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

movies = tc.SFrame.read_csv(path.join(data_dir, 'u.item'), delimiter='|',  header=False)


# Changing column names
new_rating_columns = dict(zip(ratings_base.column_names(), r_cols))
new_movies_columns = dict(zip(movies.column_names(), m_cols))
ratings_training = ratings_base.rename(new_rating_columns)
ratings_testing = ratings_test.rename(new_rating_columns)
movies = movies.rename(new_movies_columns)
ratings_training

In [None]:
def evaluate_model(model, test_data):
    predictions = model.predict(test_data)
    return mean_absolute_error(test_data['rating'], predictions)


In [None]:
# predict rating with ranking_factorization_recommender
start = datetime.now()
ranking_factorization_model = tc.ranking_factorization_recommender.create(ratings_training,
                                                                          'user_id', 'movie_id', 'rating')
ranking_factorization_mae = evaluate_model(ranking_factorization_model, ratings_testing)
ranking_factorization_duration = datetime.now() - start

print("************* ranking_factorization *****************")
print("MAE: ", ranking_factorization_mae, ", duration: ", ranking_factorization_duration)


In [None]:
# predict rating with factorization_recommender
start = datetime.now()
factorization_model = tc.factorization_recommender.create(ratings_training,
                                                          'user_id', 'movie_id', 'rating')
factorization_mae = evaluate_model(factorization_model, ratings_testing)
factorization_duration = datetime.now() - start

print("************* factorization *****************")
print("MAE: ", factorization_mae, ", duration: ", factorization_duration)


In [None]:
# predict rating with item_similarity_recommender
start = datetime.now()
item_similarity_model = tc.item_similarity_recommender.create(ratings_training,
                                                              'user_id', 'movie_id', 'rating')
item_similarity_mae = evaluate_model(item_similarity_model, ratings_testing)
item_similarity_duration = datetime.now() - start

print("************* item_similarity *****************")
print("MAE: ", item_similarity_mae, ", duration: ", item_similarity_duration)


In [None]:
# predict rating with item_content_recommender
start = datetime.now()
item_content_model = tc.item_content_recommender.create(item_data=movies, observation_data=ratings_training,
                                                        user_id='user_id', item_id='movie_id', target='rating')
item_content_mae = evaluate_model(item_content_model, ratings_testing)
item_content_duration = datetime.now() - start

print("************* item_content *****************")
print("MAE: ", item_content_mae, ", duration: ", item_content_duration)


In [None]:
# compare models
tc.recommender.util.compare_models(ratings_testing,
                                   [ranking_factorization_model,
                                    factorization_model,
                                    item_similarity_model,
                                    item_content_model],
                                   model_names=["ranking_factorization",
                                                "factorization",
                                                "item_similarity",
                                                "item_content"])
