In [22]:
import numpy as np
import pickle
import random
random.seed(12)

similarity_files = ['director-similarity', 'genre-similarity', 'rating-similarity']
weights = np.array([1., 1., 1.], dtype='<f2')
movie_to_id = pickle.load(open('movie-id-map.pkl', 'rb'))

In [23]:
import pandas as pd
cm = pd.read_csv('final.csv')
allmv = set(cm.movieId)
del cm
mymv = random.sample(allmv, 3000)
inmv = random.sample(allmv, 10)
del allmv
del pd

In [24]:
adj_mat = calc_adj_mat()

In [4]:
def calc_adj_mat():
    adj_mat_t = np.zeros((len(movie_to_id), len(movie_to_id)), dtype='<f2')
    for param, weight in zip(similarity_files, weights):
        npf = np.memmap(param+'.npy', mode='r', shape=adj_mat_t.shape, dtype='<f2')
        for i in range(len(movie_to_id)):
            adj_mat_t[:, i] += npf[:, i] * weight
    return adj_mat_t

In [60]:
pagerank(mymv)

In [58]:
def pagerank(movies: list, epsilon:float = 1e-4, maxiterations:int = 1000):
    inds = np.array([movie_to_id[i] for i in movies])
    mat = adj_mat[np.vstack(inds), inds]
    v = np.zeros(mat.shape[0], dtype='<f4')
    oldv = v.copy()
    v[0] = 1
    i = 0
    while np.amax(np.abs(oldv-v)) > epsilon and i < maxiterations:
        oldv = v.copy()
        v = np.matmul(v, mat)
        v /= np.linalg.norm(v)
        i += 1
    pr = {movies[i]: v[i] for i in range(len(movies))}
    return pr

In [30]:
a = np.arange(9).reshape(3, 3)
v = np.ones(3)
print(a)
print(v)
x = np.matmul(v, a)
print(x)
print(np.linalg.norm(x))
print(x/np.linalg.norm(x))

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[1. 1. 1.]
[ 9. 12. 15.]
21.213203435596427
[0.42426407 0.56568542 0.70710678]


In [5]:
for param in similarity_files:
    with np.load(param, 'r') as npf:
        arr = npf['arr_0']
        mm = np.memmap(param+'.npy', mode='w+', dtype='<f2', shape=arr.shape)
        mm[:] = arr[:]
        mm.flush()
    print('done', param)

done director-similarity.npz
done genre-similarity.npz
done rating-similarity.npz


In [5]:
for param in similarity_files:
    with np.load(param, 'r') as npf:
        arr = npf['arr_0']
    print('loaded', param)
    arr += arr.T
    print('added', param)
    np.savez_compressed(param, arr)
    print('saved', param)

loaded director-similarity.npz
added director-similarity.npz
saved director-similarity.npz
loaded genre-similarity.npz
added genre-similarity.npz
saved genre-similarity.npz
