**The task is to make a hybrid recommendation system.**

In [68]:
from surprise import KNNWithMeans, SVD
from surprise import accuracy
from surprise import Reader
from surprise import Dataset

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

import math

import pandas as pd

from itertools import islice

In [2]:
movies = pd.read_csv('../movies.csv')
ratings = pd.read_csv('../ratings.csv')

In [3]:
movies_with_ratings = movies.join(ratings.set_index('movieId'), on='movieId').reset_index(drop=True)
movies_with_ratings.dropna(inplace=True)

In [10]:
dataset = pd.DataFrame({
    'uid': movies_with_ratings.userId,
    'iid': movies_with_ratings.title,
    'rating': movies_with_ratings.rating
})

In [5]:
min_r = ratings.rating.min()
max_r = ratings.rating.max()

In [13]:
reader = Reader(rating_scale=(min_r, max_r))
data = Dataset.load_from_df(dataset, reader)

In [23]:
algo = KNNWithMeans(k=41, min_k=3)
algo.fit(data.build_full_trainset())

Computing the msd similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNWithMeans at 0x1ee4776e788>

In [24]:
algo2 = SVD(n_factors = 17, n_epochs = 28, lr_all = 0.009, reg_all = 0.07)
algo2.fit(data.build_full_trainset())

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1ee488b3d48>

In [27]:
movies_with_ratings

Unnamed: 0,movieId,title,genres,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1.0,4.0,9.649827e+08
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,5.0,4.0,8.474350e+08
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,7.0,4.5,1.106636e+09
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,15.0,2.5,1.510578e+09
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,17.0,4.5,1.305696e+09
...,...,...,...,...,...,...
100849,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy,184.0,4.0,1.537109e+09
100850,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy,184.0,3.5,1.537110e+09
100851,193585,Flint (2017),Drama,184.0,3.5,1.537110e+09
100852,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation,184.0,3.5,1.537110e+09


In [35]:
# preparing dataset for meta model
def get_meta(row):
    meta_row = pd.Series()
    
    meta_row['a1'] = algo.predict(uid=row['userId'], iid=row['title']).est
    meta_row['a2'] = algo2.predict(uid=row['userId'], iid=row['title']).est
    meta_row['rating'] = row['rating']
    
    return meta_row


meta_set = movies_with_ratings.apply(get_meta, axis=1)

In [36]:
meta_set

Unnamed: 0,a1,a2,rating
0,4.568164,4.641332,4.0
1,4.049250,3.770081,4.0
2,3.745175,3.754645,4.5
3,3.342836,3.387618,2.5
4,4.527167,4.031425,4.5
...,...,...,...
100849,3.705224,3.651660,4.0
100850,3.705224,3.492670,3.5
100851,3.705224,3.568342,3.5
100852,3.705224,3.601968,3.5


In [38]:
X_train, X_test, y_train, y_test = train_test_split(meta_set[['a1', 'a2']], meta_set['rating'], test_size=0.2)

In [45]:
model = LinearRegression(fit_intercept=False) 
model.fit(X_train, y_train)
p = model.predict(X_test)
math.sqrt(mean_squared_error(y_test, p))

0.723954788669605

In [56]:
X = [[algo.predict(uid=2, iid='Fight Club (1999)').est, algo2.predict(uid=2, iid='Fight Club (1999)').est]]
model.predict(X)[0]

4.333897504565961

RMSE is really lower than for a single model.

In [73]:
def recommend_movie(user_id):
    not_seen = movies_with_ratings[movies_with_ratings['userId'] != user_id].title.unique()

    mr = {}

    for mov in not_seen:
        # predictions of base models
        x1 = algo.predict(uid=user_id, iid=mov).est
        x2 = algo2.predict(uid=user_id, iid=mov).est

        mr[mov] = model.predict([[x1, x2]])[0]

    # sort by rating
    mr = {k: v for k, v in sorted(mr.items(), key=lambda item: item[1], reverse=True)}

    # return 10 first movies
    return list(islice(mr.items(), 10))

In [74]:
recommend_movie(5)

[('Neon Genesis Evangelion: The End of Evangelion (Shin seiki Evangelion Gekijô-ban: Air/Magokoro wo, kimi ni) (1997)',
  4.584931824625935),
 ('Three Billboards Outside Ebbing, Missouri (2017)', 4.492075938910382),
 ('Submarine (2010)', 4.491926920704842),
 ("Adam's Rib (1949)", 4.491428947245444),
 ('Long Goodbye, The (1973)', 4.487095563465912),
 ('Raiders of the Lost Ark: The Adaptation (1989)', 4.470708881849545),
 ("Guess Who's Coming to Dinner (1967)", 4.460227117408882),
 ('Reign Over Me (2007)', 4.453394592375302),
 ('Woman in the Dunes (Suna no onna) (1964)', 4.450363273266603),
 ('Bad Boy Bubby (1993)', 4.4486586603037654)]