# Voorspellen

In [1]:
import pickle
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model

pd.set_option('display.max_columns', None)

def save(obj, path):
    with open(path, 'bw') as file:
        pickle.dump(obj, file)

def load(path):
    with open(path, 'br') as file:
        return pickle.load(file)

def drop_seen(user_id, X, ratings):
    seen_ids = ratings[ratings['user_id'] == user_id]['movie_id'].unique()
    return X[~X['movie_id'].isin(seen_ids)]

def join_user(X, user_id):
    return (
        pd
            .DataFrame({'user_id': user_id}, X.index)
            .join(X)
    )

def to_X(user_id, X, ratings):
    return join_user(drop_seen(user_id, X, ratings), user_id)

def to_inputs_cf(X):
    return [X['user_id'], X['movie_id']]

def to_inputs_hy(X):
    return [X['user_id'], X['movie_id'], X.drop(columns=['user_id', 'movie_id'])]

def predict(inputs, model):
    return inputs[1], model.predict(inputs)

def predict_rg(inputs, model):
    movie_ids, ratings = predict(inputs, model)
    return (
        pd
            .DataFrame({'movie_id': movie_ids.values, 'rating': ratings.reshape(-1)})
            .sort_values('rating', ascending=False)
    )

def predict_cl(inputs, model):
    movie_ids, ratings = predict(inputs, model)
    classes = ratings.argmax(axis=1)
    return (
        pd
            .DataFrame(
                {
                    'movie_id': movie_ids.values, # Don't reuse index of movie_ids so ratings can be joined
                    'rating': classes + 1,
                    'confidence': ratings[range(len(ratings)), classes]
                }
            )
            .join(pd.DataFrame(ratings))
            .sort_values(['rating', 'confidence'], ascending=False)
    )

def movie_url(imdb_id):
    return f'https://www.imdb.com/title/tt{imdb_id:07}/'

def join_desc(preds, movies):
    ps = preds.merge(movies, 'left', 'movie_id')
    ps = (
        ps
            .drop(columns='imdb_id')
            .join(ps['imdb_id'].apply(movie_url))
    )
    rating_cols = list(preds.columns)[1:]
    ps = ps[['title', 'imdb_id'] + rating_cols]
    ps.columns = ['title', 'movie_url'] + rating_cols
    return ps

def top(preds, movies, n=5):
    return join_desc(preds, movies).head(n)

def drop_active(X, ratings, z=3, z_left=None, z_right=None):
    ratings_movie = ratings.groupby('movie_id')['rating'].count()
    x, s = ratings_movie.mean(), ratings_movie.std()
    z_l = z_left or ((not z_right) and z) or None
    z_r = z_right or ((not z_left) and z) or None
    l = (z_l and x - s * z_l) or 1
    r = (z_r and x + s * z_r) or ratings_movie.max()
    active_ids = ratings_movie[np.logical_or(ratings_movie < l, ratings_movie > r)].index.values
    return X[~X['movie_id'].isin(active_ids)]

# Data set
Movies, ratings en users.

In [2]:
movies = pd.read_parquet('data/movies_active.parquet')
ratings = pd.read_parquet('data/ratings_active.parquet')
users = pd.read_parquet('data/users_active.parquet')

Features zonder gebruiker.

In [3]:
year_pop_transformer = load('data/train_test_active.pickle')[2]
transform_cols = ['year', 'popularity']
X = movies.drop(columns=['imdb_id', 'title'])
X = (
    pd
        .DataFrame(year_pop_transformer.transform(X[transform_cols]), X.index, transform_cols)
        .join(X.drop(columns=transform_cols))
        .reindex(columns=X.columns)
)

Features voor specifieke gebruikers.

In [4]:
user_id_test = users[users['twitter_id'] == 0]['user_id'].values[0]
user_id_1, user_id_2, user_id_3 = (
    ratings
        .groupby('user_id')['rating']
        .count()
        .sort_values(ascending=False)
        .iloc[:3]
)
X_test = to_X(user_id_test, X, ratings)
X_1 = to_X(user_id_1, X, ratings)
X_2 = to_X(user_id_2, X, ratings)
X_3 = to_X(user_id_3, X, ratings)

# Modellen

In [5]:
movies_cf_r     = load_model('data/movies_cf_r.h5')
movies_cf_r_1   = load_model('data/movies_cf_r_1.h5')
movies_cf_r_2   = load_model('data/movies_cf_r_2.h5')
movies_cf_r_d   = load_model('data/movies_cf_r_d.h5')
movies_cf_r_d_1 = load_model('data/movies_cf_r_d_1.h5')
movies_cf_r_d_2 = load_model('data/movies_cf_r_d_2.h5')
movies_cf_r_d_3 = load_model('data/movies_cf_r_d_3.h5')
movies_h_r      = load_model('data/movies_h_r.h5')
movies_h_r_1    = load_model('data/movies_h_r_1.h5')
movies_cf_c     = load_model('data/movies_cf_c.h5')
movies_cf_c_1   = load_model('data/movies_cf_c_1.h5')
movies_cf_c_2   = load_model('data/movies_cf_c_2.h5')
movies_cf_c_d   = load_model('data/movies_cf_c_d.h5')
movies_cf_c_d_1 = load_model('data/movies_cf_c_d_1.h5')
movies_cf_c_d_2 = load_model('data/movies_cf_c_d_2.h5')
movies_cf_c_d_3 = load_model('data/movies_cf_c_d_3.h5')
movies_h_c      = load_model('data/movies_h_c.h5')
movies_h_c_1    = load_model('data/movies_h_c_1.h5')

In [6]:
predict_cf_r     = lambda X: predict_rg(to_inputs_cf(X), movies_cf_r)
predict_cf_r_1   = lambda X: predict_rg(to_inputs_cf(X), movies_cf_r_1)
predict_cf_r_2   = lambda X: predict_rg(to_inputs_cf(X), movies_cf_r_2)
predict_cf_r_d   = lambda X: predict_rg(to_inputs_cf(X), movies_cf_r_d)
predict_cf_r_d_1 = lambda X: predict_rg(to_inputs_cf(X), movies_cf_r_d_1)
predict_cf_r_d_2 = lambda X: predict_rg(to_inputs_cf(X), movies_cf_r_d_2)
predict_cf_r_d_3 = lambda X: predict_rg(to_inputs_cf(X), movies_cf_r_d_3)
predict_h_r      = lambda X: predict_rg(to_inputs_hy(X), movies_h_r)
predict_h_r_1    = lambda X: predict_rg(to_inputs_hy(X), movies_h_r_1)
predict_cf_c     = lambda X: predict_cl(to_inputs_cf(X), movies_cf_c)
predict_cf_c_1   = lambda X: predict_cl(to_inputs_cf(X), movies_cf_c_1)
predict_cf_c_2   = lambda X: predict_cl(to_inputs_cf(X), movies_cf_c_2)
predict_cf_c_d   = lambda X: predict_cl(to_inputs_cf(X), movies_cf_c_d)
predict_cf_c_d_1 = lambda X: predict_cl(to_inputs_cf(X), movies_cf_c_d_1)
predict_cf_c_d_2 = lambda X: predict_cl(to_inputs_cf(X), movies_cf_c_d_2)
predict_cf_c_d_3 = lambda X: predict_cl(to_inputs_cf(X), movies_cf_c_d_3)
predict_h_c      = lambda X: predict_cl(to_inputs_hy(X), movies_h_c)
predict_h_c_1    = lambda X: predict_cl(to_inputs_hy(X), movies_h_c_1)

# Top tien

In [7]:
top_ten = lambda preds, n=10: top(preds, movies, n)

In [8]:
ratings.groupby('movie_id')['rating'].count().describe()

count    32917.000000
mean        19.511651
std         79.595181
min          1.000000
25%          1.000000
50%          2.000000
75%          7.000000
max       1751.000000
Name: rating, dtype: float64

## movies_cf_r_d

In [9]:
top_ten(predict_cf_r_d(X_test))

Unnamed: 0,title,movie_url,rating
0,Aliens,https://www.imdb.com/title/tt0090605/,4.201241
1,Nema-ye Nazdik,https://www.imdb.com/title/tt0100234/,4.170867
2,In the Name of the Father,https://www.imdb.com/title/tt0107207/,4.166696
3,Singin' in the Rain,https://www.imdb.com/title/tt0045152/,4.14507
4,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,4.131241
5,Ayla: The Daughter of War,https://www.imdb.com/title/tt6316138/,4.115903
6,Schindler's List,https://www.imdb.com/title/tt0108052/,4.106551
7,Shichinin no samurai,https://www.imdb.com/title/tt0047478/,4.100225
8,Se7en,https://www.imdb.com/title/tt0114369/,4.098431
9,Ce que le jour doit à la nuit,https://www.imdb.com/title/tt1934205/,4.096027


In [10]:
top_ten(predict_cf_r_d(X_1))

Unnamed: 0,title,movie_url,rating
0,Aliens,https://www.imdb.com/title/tt0090605/,5.365054
1,In the Name of the Father,https://www.imdb.com/title/tt0107207/,5.337939
2,Nema-ye Nazdik,https://www.imdb.com/title/tt0100234/,5.318475
3,The Dark Knight,https://www.imdb.com/title/tt0468569/,5.311999
4,Singin' in the Rain,https://www.imdb.com/title/tt0045152/,5.293988
5,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,5.293731
6,The Shawshank Redemption,https://www.imdb.com/title/tt0111161/,5.282485
7,Se7en,https://www.imdb.com/title/tt0114369/,5.272576
8,Schindler's List,https://www.imdb.com/title/tt0108052/,5.272473
9,Spider-Man: Into the Spider-Verse,https://www.imdb.com/title/tt4633694/,5.269886


## movies_h_r

In [11]:
top_ten(predict_h_r(X_test))

Unnamed: 0,title,movie_url,rating
0,The Godfather: Part II,https://www.imdb.com/title/tt0071562/,4.166014
1,The Godfather,https://www.imdb.com/title/tt0068646/,4.160283
2,Cinema Paradiso,https://www.imdb.com/title/tt0095765/,4.119936
3,The Green Mile,https://www.imdb.com/title/tt0120689/,4.119329
4,Amadeus,https://www.imdb.com/title/tt0086879/,4.114968
5,The Shawshank Redemption,https://www.imdb.com/title/tt0111161/,4.111031
6,12 Angry Men,https://www.imdb.com/title/tt0050083/,4.098608
7,Avengers: Endgame,https://www.imdb.com/title/tt4154796/,4.089588
8,Saving Private Ryan,https://www.imdb.com/title/tt0120815/,4.085806
9,"Il buono, il brutto, il cattivo",https://www.imdb.com/title/tt0060196/,4.082156


In [12]:
top_ten(predict_h_r(drop_active(X_test, ratings, z_left=-2.5, z_right=5)))

Unnamed: 0,title,movie_url,rating
0,The Godfather: Part II,https://www.imdb.com/title/tt0071562/,4.166014
1,The Godfather,https://www.imdb.com/title/tt0068646/,4.160283
2,The Green Mile,https://www.imdb.com/title/tt0120689/,4.119329
3,Saving Private Ryan,https://www.imdb.com/title/tt0120815/,4.085805
4,"Il buono, il brutto, il cattivo",https://www.imdb.com/title/tt0060196/,4.082156
5,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,4.066126
6,Schindler's List,https://www.imdb.com/title/tt0108052/,4.05511
7,La vita è bella,https://www.imdb.com/title/tt0118799/,3.997949
8,A Beautiful Mind,https://www.imdb.com/title/tt0268978/,3.979335
9,Incendies,https://www.imdb.com/title/tt1255953/,3.966434


In [13]:
top_ten(predict_h_r(X_1))

Unnamed: 0,title,movie_url,rating
0,Cinema Paradiso,https://www.imdb.com/title/tt0095765/,5.035201
1,It's a Wonderful Life,https://www.imdb.com/title/tt0038650/,5.010557
2,The Godfather: Part II,https://www.imdb.com/title/tt0071562/,4.973618
3,Saving Private Ryan,https://www.imdb.com/title/tt0120815/,4.971828
4,The Shawshank Redemption,https://www.imdb.com/title/tt0111161/,4.97144
5,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,4.965374
6,The Godfather,https://www.imdb.com/title/tt0068646/,4.962259
7,Barry Lyndon,https://www.imdb.com/title/tt0072684/,4.959643
8,"Il buono, il brutto, il cattivo",https://www.imdb.com/title/tt0060196/,4.957294
9,Capharnaüm,https://www.imdb.com/title/tt8267604/,4.91197


In [14]:
top_ten(predict_h_r(drop_active(X_1, ratings, z_left=-2.5, z_right=5)))

Unnamed: 0,title,movie_url,rating
0,The Godfather: Part II,https://www.imdb.com/title/tt0071562/,4.973617
1,Saving Private Ryan,https://www.imdb.com/title/tt0120815/,4.971828
2,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,4.965374
3,The Godfather,https://www.imdb.com/title/tt0068646/,4.962259
4,"Il buono, il brutto, il cattivo",https://www.imdb.com/title/tt0060196/,4.957294
5,Schindler's List,https://www.imdb.com/title/tt0108052/,4.894881
6,La vita è bella,https://www.imdb.com/title/tt0118799/,4.879961
7,The Green Mile,https://www.imdb.com/title/tt0120689/,4.868151
8,The Matrix,https://www.imdb.com/title/tt0133093/,4.80175
9,Warrior,https://www.imdb.com/title/tt1291584/,4.780261


## movies_cf_c_d

In [15]:
top_ten(predict_cf_c_d(X_test))

Unnamed: 0,title,movie_url,rating,confidence,0,1,2,3,4
0,The Godfather: Part II,https://www.imdb.com/title/tt0071562/,5,0.592469,0.010816,0.011059,0.095619,0.290036,0.592469
1,The Shawshank Redemption,https://www.imdb.com/title/tt0111161/,5,0.585589,0.013634,0.013238,0.099692,0.287847,0.585589
2,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,5,0.577816,0.007367,0.008413,0.089869,0.316534,0.577816
3,12 Angry Men,https://www.imdb.com/title/tt0050083/,5,0.553436,0.007563,0.009025,0.097284,0.332692,0.553436
4,Interstellar,https://www.imdb.com/title/tt0816692/,5,0.544012,0.018856,0.018699,0.118913,0.29952,0.544012
5,The Godfather,https://www.imdb.com/title/tt0068646/,5,0.501128,0.03848,0.034288,0.139136,0.286967,0.501128
6,It's a Wonderful Life,https://www.imdb.com/title/tt0038650/,5,0.500476,0.007329,0.009602,0.108342,0.374252,0.500476
7,Amadeus,https://www.imdb.com/title/tt0086879/,5,0.490163,0.004565,0.006672,0.102764,0.395837,0.490163
8,3 Idiots,https://www.imdb.com/title/tt1187043/,5,0.458696,0.015694,0.019218,0.141233,0.36516,0.458696
9,Avengers: Endgame,https://www.imdb.com/title/tt4154796/,5,0.455843,0.032178,0.033222,0.154008,0.324749,0.455843


In [16]:
top_ten(predict_cf_c_d(X_1))

Unnamed: 0,title,movie_url,rating,confidence,0,1,2,3,4
0,The Dark Knight,https://www.imdb.com/title/tt0468569/,5,0.929196,6.2e-05,4.5e-05,0.003689,0.067007,0.929196
1,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,5,0.926236,7.4e-05,5.4e-05,0.004167,0.069469,0.926236
2,The Godfather: Part II,https://www.imdb.com/title/tt0071562/,5,0.925205,0.000122,8.2e-05,0.005069,0.069521,0.925205
3,The Shawshank Redemption,https://www.imdb.com/title/tt0111161/,5,0.924359,0.000168,0.000104,0.00563,0.069739,0.924359
4,12 Angry Men,https://www.imdb.com/title/tt0050083/,5,0.92194,7.5e-05,5.7e-05,0.004407,0.073521,0.92194
5,Interstellar,https://www.imdb.com/title/tt0816692/,5,0.909558,0.000259,0.000171,0.007672,0.082339,0.909558
6,Amadeus,https://www.imdb.com/title/tt0086879/,5,0.908935,4.3e-05,3.9e-05,0.004085,0.086898,0.908935
7,It's a Wonderful Life,https://www.imdb.com/title/tt0038650/,5,0.907147,8.7e-05,7.1e-05,0.005183,0.087512,0.907147
8,3 Idiots,https://www.imdb.com/title/tt1187043/,5,0.898137,0.000209,0.000161,0.007893,0.093601,0.898137
9,The Godfather,https://www.imdb.com/title/tt0068646/,5,0.895719,0.000675,0.000414,0.01193,0.091261,0.895719


## movies_h_c

In [17]:
top_ten(predict_h_c(X_test))

Unnamed: 0,title,movie_url,rating,confidence,0,1,2,3,4
0,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,5,0.476686,0.006269,0.008822,0.116704,0.39152,0.476686
1,The Shawshank Redemption,https://www.imdb.com/title/tt0111161/,5,0.476316,0.007159,0.009503,0.120618,0.386403,0.476316
2,The Lost Weekend,https://www.imdb.com/title/tt0037884/,4,0.686821,0.000735,0.004321,0.237499,0.686821,0.070624
3,Dear Zachary: A Letter to a Son About His Father,https://www.imdb.com/title/tt1152758/,4,0.675218,0.001144,0.003872,0.17432,0.675218,0.145446
4,Prison Terminal: The Last Days of Private Jack...,https://www.imdb.com/title/tt3145026/,4,0.666947,0.00212,0.00684,0.214435,0.666947,0.109658
5,Red Army,https://www.imdb.com/title/tt3264102/,4,0.662327,0.001522,0.006948,0.249594,0.662327,0.079609
6,Rush,https://www.imdb.com/title/tt1979320/,4,0.659955,0.002388,0.009631,0.219021,0.659955,0.109004
7,The Killing,https://www.imdb.com/title/tt0049406/,4,0.657428,0.001533,0.007403,0.247972,0.657428,0.085664
8,Life Itself,https://www.imdb.com/title/tt2382298/,4,0.656392,0.001618,0.007122,0.266923,0.656392,0.067945
9,Captain Phillips,https://www.imdb.com/title/tt1535109/,4,0.655441,0.002398,0.009723,0.240693,0.655441,0.091745


In [18]:
top_ten(predict_h_c(drop_active(X_test, ratings, z_left=-2.5, z_right=5)))

Unnamed: 0,title,movie_url,rating,confidence,0,1,2,3,4
0,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,5,0.476686,0.006269,0.008822,0.116704,0.39152,0.476686
1,Catch Me If You Can,https://www.imdb.com/title/tt0264464/,4,0.647723,0.002256,0.008419,0.231324,0.647723,0.110278
2,Reservoir Dogs,https://www.imdb.com/title/tt0105236/,4,0.643887,0.002778,0.009633,0.223438,0.643887,0.120263
3,The King's Speech,https://www.imdb.com/title/tt1504320/,4,0.63551,0.002205,0.008684,0.259122,0.63551,0.094478
4,Philomena,https://www.imdb.com/title/tt2431286/,4,0.633914,0.002278,0.009866,0.269692,0.633914,0.08425
5,Batman Begins,https://www.imdb.com/title/tt0372784/,4,0.624551,0.003713,0.014683,0.249105,0.624551,0.107948
6,Moneyball,https://www.imdb.com/title/tt1210166/,4,0.620673,0.001404,0.008228,0.327639,0.620673,0.042055
7,The Gentlemen,https://www.imdb.com/title/tt8367814/,4,0.610244,0.004927,0.017059,0.269651,0.610244,0.098119
8,Fargo,https://www.imdb.com/title/tt0116282/,4,0.608949,0.004643,0.015323,0.268883,0.608949,0.102201
9,Full Metal Jacket,https://www.imdb.com/title/tt0093058/,4,0.60337,0.004643,0.014436,0.257396,0.60337,0.120155


In [19]:
top_ten(predict_h_c(X_1))

Unnamed: 0,title,movie_url,rating,confidence,0,1,2,3,4
0,The Shawshank Redemption,https://www.imdb.com/title/tt0111161/,5,0.898352,0.000169,0.000151,0.009491,0.091837,0.898352
1,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,5,0.89561,0.000153,0.000147,0.008719,0.09537,0.89561
2,12 Angry Men,https://www.imdb.com/title/tt0050083/,5,0.890904,7.7e-05,8.1e-05,0.007339,0.101598,0.890904
3,The Godfather: Part II,https://www.imdb.com/title/tt0071562/,5,0.873627,0.000165,0.000167,0.010583,0.115459,0.873627
4,Nuit et brouillard,https://www.imdb.com/title/tt0048434/,5,0.867381,7.4e-05,7.3e-05,0.008297,0.124175,0.867381
5,Interstellar,https://www.imdb.com/title/tt0816692/,5,0.864925,0.0007,0.000642,0.016791,0.116942,0.864925
6,The Godfather,https://www.imdb.com/title/tt0068646/,5,0.860444,0.000556,0.000487,0.017779,0.120734,0.860444
7,La vita è bella,https://www.imdb.com/title/tt0118799/,5,0.856317,0.00033,0.000305,0.017392,0.125657,0.856317
8,Amadeus,https://www.imdb.com/title/tt0086879/,5,0.855299,6.5e-05,8.4e-05,0.009223,0.135329,0.855299
9,It's a Wonderful Life,https://www.imdb.com/title/tt0038650/,5,0.84602,0.000196,0.000203,0.015417,0.138164,0.84602


In [20]:
top_ten(predict_h_c(drop_active(X_1, ratings, z_left=-2.5, z_right=5)))

Unnamed: 0,title,movie_url,rating,confidence,0,1,2,3,4
0,The Lord of the Rings: The Return of the King,https://www.imdb.com/title/tt0167260/,5,0.89561,0.000153,0.000147,0.008719,0.09537,0.89561
1,The Godfather: Part II,https://www.imdb.com/title/tt0071562/,5,0.873627,0.000165,0.000167,0.010583,0.115459,0.873627
2,The Godfather,https://www.imdb.com/title/tt0068646/,5,0.860444,0.000556,0.000487,0.017779,0.120734,0.860444
3,La vita è bella,https://www.imdb.com/title/tt0118799/,5,0.856317,0.00033,0.000305,0.017392,0.125657,0.856317
4,Gladiator,https://www.imdb.com/title/tt0172495/,5,0.841923,0.000506,0.000541,0.018279,0.138751,0.841923
5,Schindler's List,https://www.imdb.com/title/tt0108052/,5,0.839328,0.000234,0.00025,0.014759,0.145429,0.839328
6,"Il buono, il brutto, il cattivo",https://www.imdb.com/title/tt0060196/,5,0.819679,0.00016,0.000183,0.013521,0.166457,0.819679
7,Scent of a Woman,https://www.imdb.com/title/tt0105323/,5,0.816542,0.000135,0.000179,0.013643,0.169501,0.816542
8,Saving Private Ryan,https://www.imdb.com/title/tt0120815/,5,0.811873,0.000351,0.000361,0.019844,0.167571,0.811873
9,The Green Mile,https://www.imdb.com/title/tt0120689/,5,0.807107,0.000139,0.000187,0.01414,0.178427,0.807107
