## SVO

In [4]:
from surprise import SVD, SVDpp
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split

import matplotlib.pyplot as plt

from tqdm import tqdm_notebook

from sklearn.feature_extraction.text import TfidfTransformer, CountVectorizer
from sklearn.neighbors import NearestNeighbors

import pandas as pd
import numpy as np

In [5]:
links = pd.read_csv('../data/links.csv')
movies = pd.read_csv('../data/movies.csv')
ratings = pd.read_csv('../data/ratings.csv')
tags = pd.read_csv('../data/tags.csv')

In [6]:
movies_with_ratings = movies.join(ratings.set_index('movieId'), on='movieId').reset_index(drop=True)
movies_with_ratings.dropna(inplace=True)

In [7]:
movies_with_ratings.head()

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1.0,4.0,964982700.0
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5.0,4.0,847435000.0
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7.0,4.5,1106636000.0
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15.0,2.5,1510578000.0
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17.0,4.5,1305696000.0


In [8]:
dataset = pd.DataFrame({
    'uid': movies_with_ratings.userId,
    'iid': movies_with_ratings.title,
    'rating': movies_with_ratings.rating
})

In [9]:
reader = Reader(rating_scale=(0.5, 5.0))
data = Dataset.load_from_df(dataset, reader)

In [10]:
trainset, testset = train_test_split(data, test_size=.15, random_state=42)

In [11]:
%%time
algo = SVD(n_factors=20, n_epochs=20)
algo.fit(trainset)

Wall time: 2.32 s


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x2c0746f0f98>

In [12]:
test_pred = algo.test(testset)

In [13]:
accuracy.rmse(test_pred, verbose=True)

RMSE: 0.8671


0.8671133939993214

In [14]:
algo.predict(uid=2.0, iid='Mortal Kombat (1995)').est

2.5795465740953247

## Last

In [15]:
import numpy as np

from lightfm.datasets import fetch_movielens

movielens = fetch_movielens()



In [16]:
for key, value in movielens.items():
    print(key, type(value), value.shape)

train <class 'scipy.sparse.coo.coo_matrix'> (943, 1682)
test <class 'scipy.sparse.coo.coo_matrix'> (943, 1682)
item_features <class 'scipy.sparse.csr.csr_matrix'> (1682, 1682)
item_feature_labels <class 'numpy.ndarray'> (1682,)
item_labels <class 'numpy.ndarray'> (1682,)


In [17]:
train = movielens['train']
test = movielens['test']

In [18]:
from lightfm import LightFM
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import auc_score

model = LightFM()
model.fit(train, epochs=10)

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10, train_interactions=train).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))

Precision: train 0.43, test 0.13.


In [19]:
model = LightFM(learning_rate=0.05, loss='warp')

model.fit_partial(train, epochs=10)

train_precision = precision_at_k(model, train, k=10).mean()
test_precision = precision_at_k(model, test, k=10, train_interactions=train).mean()

print('Precision: train %.2f, test %.2f.' % (train_precision, test_precision))

Precision: train 0.61, test 0.23.


In [31]:
def sample_recommendation(model, data, user_id):
    current_user_id = user_id
    user_movies = movies_with_ratings[movies_with_ratings.userId == current_user_id].title.unique()
    
    n_users, n_items = data['train'].shape
    known_positives = data['item_labels'][data['train'].tocsr()[user_id].indices]
        
    scores = model.predict(user_id, np.arange(n_items))

    movies_to_score = data['item_labels'][np.argsort(-scores)]

    scores = []
    titles = []

    for movie in movies_to_score:
        if movie in user_movies:
            continue

        scores.append(algo.predict(uid=current_user_id, iid=movie).est)
        titles.append(movie)


    best_indexes = np.argsort(scores)[-10:]

    print("User %s" % user_id)
    print("     Recommended:")

    for i in reversed(best_indexes):
        print(titles[i], scores[i])

In [34]:
sample_recommendation(model, movielens, 10)

User 10
     Recommended:
Raging Bull (1980) 4.167839841220732
Cool Hand Luke (1967) 4.137794986554103
Lawrence of Arabia (1962) 4.126109363505988
Patton (1970) 4.125933673404083
Shawshank Redemption, The (1994) 4.069476359400483
Philadelphia Story, The (1940) 4.057726388600803
North by Northwest (1959) 4.048459939946542
Streetcar Named Desire, A (1951) 4.040762060999129
Singin' in the Rain (1952) 4.035567026294406
Rear Window (1954) 4.014467586551486
