In [1]:
import keras 

model = keras.models.load_model('../data/model-11-0.64.hdf5')

In [2]:
import pandas as pd
import pickle

# load
with open('../data/df.pickle', 'rb') as f:
    df: pd.DataFrame = pickle.load(f)

from keras.models import Model
from keras.layers import Input, Embedding, Dot, Add, Flatten
from keras.regularizers import l2
from keras.optimizers import SGD

N = df.userId.max() + 1 # number of users
M = df.movie_idx.max() + 1 # number of movies
mu = df.rating.mean()
print(N, M, mu)

283228 53889 3.5304452124932677


In [3]:
import numpy as np

# predict rating
user_ids = np.array([0 for _ in range(100)])
movie_ids = np.array([i for i in range(100)])
print(model.predict([user_ids, movie_ids]) + mu)

[[3.5517342]
 [4.3460045]
 [3.0030994]
 [2.7830217]
 [2.3606212]
 [2.690297 ]
 [3.6369572]
 [3.064438 ]
 [2.6969767]
 [2.1831944]
 [3.30249  ]
 [3.4576218]
 [2.4493232]
 [3.1668854]
 [3.4441833]
 [2.515944 ]
 [3.6993132]
 [3.973466 ]
 [3.6087096]
 [2.5335536]
 [2.2830217]
 [3.746786 ]
 [3.007741 ]
 [2.5374885]
 [2.9016745]
 [3.718927 ]
 [3.745581 ]
 [3.1586273]
 [3.8041706]
 [4.2295394]
 [3.674914 ]
 [2.8749766]
 [4.2067094]
 [3.8277094]
 [3.7806222]
 [3.4395213]
 [3.9659388]
 [3.1651542]
 [2.3910422]
 [3.7228518]
 [3.5965934]
 [4.025488 ]
 [2.9073286]
 [3.4503515]
 [2.135878 ]
 [3.5081854]
 [2.946239 ]
 [4.1264796]
 [3.0472953]
 [3.5979276]
 [4.4918065]
 [3.032384 ]
 [3.9848955]
 [3.8730507]
 [2.5493963]
 [3.1329942]
 [1.9783142]
 [3.7592354]
 [4.2920575]
 [3.903016 ]
 [2.9099827]
 [2.4691887]
 [3.3879771]
 [2.7484093]
 [2.4481769]
 [1.9212872]
 [1.2065713]
 [3.333022 ]
 [3.4925435]
 [3.5476432]
 [3.257946 ]
 [1.7805738]
 [3.6508534]
 [3.5303392]
 [2.7538307]
 [2.0587277]
 [2.5245914]

In [4]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_3 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 input_4 (InputLayer)           [(None, 1)]          0           []                               
                                                                                                  
 embedding_4 (Embedding)        (None, 1, 10)        2832280     ['input_3[0][0]']                
                                                                                                  
 embedding_5 (Embedding)        (None, 1, 10)        538890      ['input_4[0][0]']                
                                                                                              

In [5]:
# feature vector
layer = model.layers[3]
embeddings = layer.get_weights()[0]
print(embeddings, type(embeddings), embeddings.shape)
print(embeddings[1], type(embeddings[1]))


[[ 0.03475061 -0.03351479 -0.04821135 ... -0.00899982 -0.01128197
  -0.03497137]
 [-0.21813284 -0.4215871  -1.1773993  ...  0.72377855 -0.8283253
  -0.7409931 ]
 [-0.23580888 -0.8799319  -0.36925396 ...  0.6117956   0.23890261
   0.12726311]
 ...
 [-0.17651837  0.0057981  -0.10137367 ...  0.06385631  0.15607937
  -0.18796821]
 [ 0.00939851  0.00128566  0.04321404 ... -0.03340289 -0.02494802
   0.02705743]
 [-0.04039875  0.00259182 -0.05556618 ... -0.00821041 -0.05843848
   0.03771361]] <class 'numpy.ndarray'> (53889, 10)
[-0.21813284 -0.4215871  -1.1773993   0.3369268   0.8653104  -0.43065664
 -1.2269403   0.72377855 -0.8283253  -0.7409931 ] <class 'numpy.ndarray'>


In [6]:
from sklearn.metrics.pairwise import cosine_similarity

def similariy(i, j):
    return cosine_similarity([embeddings[i]], [embeddings[j]])[0][0]

def similar(movie):
    similar = []
    for i in range(len(embeddings)):
        similar.append((i, similariy(movie, i)))
    return sorted(similar, key=lambda t: t[1], reverse=True)

similar(0)

[(0, 1.0000001),
 (52347, 0.9673236),
 (131, 0.947269),
 (43006, 0.91931534),
 (50500, 0.9146307),
 (16013, 0.91179866),
 (27144, 0.907325),
 (5443, 0.9029605),
 (17596, 0.9020531),
 (27018, 0.90146554),
 (1911, 0.894499),
 (47730, 0.8925148),
 (272, 0.89251196),
 (1411, 0.8902431),
 (33256, 0.88851523),
 (42586, 0.8835111),
 (50233, 0.8829199),
 (17244, 0.8807415),
 (1641, 0.874928),
 (16251, 0.87340367),
 (41878, 0.8698675),
 (40526, 0.86875653),
 (3087, 0.86478245),
 (49447, 0.86447704),
 (17584, 0.86407506),
 (41602, 0.8633564),
 (26535, 0.8619393),
 (21139, 0.86172557),
 (28542, 0.8612796),
 (2490, 0.86058307),
 (24970, 0.8579168),
 (45442, 0.85745883),
 (1446, 0.85693705),
 (6787, 0.85515666),
 (49430, 0.85511076),
 (21870, 0.85267055),
 (30102, 0.85258293),
 (19714, 0.84806997),
 (32771, 0.84743434),
 (5687, 0.8464058),
 (34777, 0.844883),
 (314, 0.84464836),
 (26692, 0.8446162),
 (24185, 0.8439287),
 (46393, 0.8437929),
 (52309, 0.8428738),
 (49755, 0.84214735),
 (36051, 0.8414

In [7]:
sorted(enumerate(cosine_similarity([embeddings[10]], embeddings)[0]), key=lambda x:x[1], reverse=True)

[(10, 1.0),
 (1720, 0.95009136),
 (19912, 0.9114113),
 (3079, 0.89514035),
 (7528, 0.8822774),
 (18418, 0.88028884),
 (26579, 0.87770164),
 (2987, 0.86977553),
 (7524, 0.8661754),
 (10376, 0.861804),
 (11094, 0.861098),
 (15409, 0.85774803),
 (32390, 0.8567215),
 (52310, 0.84833837),
 (15145, 0.84703624),
 (34255, 0.8458506),
 (20289, 0.8452946),
 (3632, 0.8452215),
 (25938, 0.8446585),
 (31842, 0.84117985),
 (4002, 0.8408454),
 (1999, 0.8400413),
 (1915, 0.8362547),
 (27410, 0.8358685),
 (14228, 0.8309961),
 (53584, 0.83076674),
 (2986, 0.8302181),
 (1370, 0.8298567),
 (1998, 0.82973635),
 (49540, 0.8288634),
 (50445, 0.8261908),
 (10521, 0.8250419),
 (15863, 0.8250134),
 (47826, 0.8235644),
 (36266, 0.821484),
 (9334, 0.82105345),
 (14923, 0.8193947),
 (32383, 0.8178133),
 (52569, 0.8132109),
 (5869, 0.81218314),
 (3981, 0.8112756),
 (13605, 0.8085181),
 (24916, 0.8045982),
 (39817, 0.8038558),
 (36305, 0.8035333),
 (49030, 0.80323493),
 (6889, 0.8020142),
 (22635, 0.8014576),
 (1997

In [8]:
similar(1)

[(1, 1.0),
 (3111, 0.98129535),
 (49244, 0.9295303),
 (39882, 0.92706907),
 (52867, 0.92172813),
 (6373, 0.9178695),
 (8762, 0.90562415),
 (4883, 0.90548754),
 (15465, 0.9053206),
 (2352, 0.9035988),
 (8029, 0.89974),
 (22525, 0.8990438),
 (44409, 0.89489615),
 (38364, 0.89015615),
 (47724, 0.886063),
 (31275, 0.88562405),
 (49243, 0.8852348),
 (43762, 0.8847831),
 (22086, 0.8839524),
 (43694, 0.8833426),
 (19100, 0.877581),
 (17877, 0.87646425),
 (50714, 0.8723194),
 (15476, 0.8695186),
 (49681, 0.8650487),
 (11514, 0.8630124),
 (46882, 0.85911703),
 (50487, 0.8588194),
 (51837, 0.8577059),
 (42427, 0.85504526),
 (23297, 0.8489586),
 (11491, 0.84781563),
 (21268, 0.84766316),
 (29594, 0.8445034),
 (39829, 0.8443408),
 (19139, 0.84294355),
 (24932, 0.8425225),
 (38232, 0.8417859),
 (40753, 0.84162056),
 (24722, 0.84150267),
 (30176, 0.8410421),
 (17977, 0.83805144),
 (18601, 0.83728576),
 (34198, 0.837205),
 (3748, 0.83681285),
 (23253, 0.8362069),
 (31354, 0.8354684),
 (44026, 0.83414

In [9]:
from sklearn.metrics.pairwise import cosine_similarity
from sortedcontainers import SortedList

def similariy(i, j):
    return cosine_similarity([embeddings[i]], [embeddings[j]])[0][0]

def similar(movie, max):
    sl = SortedList()
    for i in range(len(embeddings)):
        sl.add((-similariy(movie, i), i))
        if len(sl) > max:
            del sl[-1]
    return sl

similar(0, 10)

SortedList([(-1.0000001, 0), (-0.9673236, 52347), (-0.947269, 131), (-0.91931534, 43006), (-0.9146307, 50500), (-0.91179866, 16013), (-0.907325, 27144), (-0.9029605, 5443), (-0.9020531, 17596), (-0.90146554, 27018)])

In [17]:
model.layers[6].get_weights()[0][1][0]

0.27616036