In [1]:
import numpy as np
import scipy
from scipy import optimize as op
import numba
import pickle
import random
random.seed(12)

In [2]:
similarity_files = ['all_similarities/director-similarity', 'all_similarities/genre-similarity', 'all_similarities/rating-similarity', 'all_similarities/tags-similarity']
weights = np.ones(shape=(len(similarity_files,)), dtype='f2')
movie_to_id = pickle.load(open('movie-id-map2.pkl', 'rb'))

In [3]:
import pandas as pd
cm = pd.read_csv('final.csv')
allmv = set(cm.movieId)
del cm
mymv = random.sample(allmv, 3000)
inmv = random.sample(allmv, 10)
del allmv
del pd

In [13]:
adj_mat = calc_adj_mat(weights)

In [25]:
@numba.jit(forceobj=True, parallel=True, fastmath=True)
def calc_adj_mat(wts):
    adj_mat_t = np.zeros((len(movie_to_id), len(movie_to_id)), dtype='<f2')
    for param, weight in zip(similarity_files, wts):
        npf = np.memmap(param+'.npy', mode='r', shape=adj_mat_t.shape, dtype='<f2')
        adj_mat_t += npf * weight
    return adj_mat_t

In [60]:
pagerank(mymv)

In [4]:
@numba.jit(forceobj=True, fastmath=True, parallel=True)
def pagerank(movies: list, adj_mat: np.array, epsilon:float = 1e-4, maxiterations:int = 1000):
    inds = np.array([movie_to_id[i] for i in movies])
    mat = adj_mat[np.vstack(inds), inds]
    v = np.zeros(mat.shape[0], dtype='<f4')
    oldv = v.copy()
    v[0] = 1
    i = 0
    while np.amax(np.abs(oldv-v)) > epsilon and i < maxiterations:
        oldv = v.copy()
        v = np.matmul(v, mat)
        v /= np.linalg.norm(v)
        i += 1
    pr = {movies[i]: v[i] for i in range(len(movies))}
    return pr

In [None]:
nm = similarity_files[0]
with np.load(nm+'.npz', 'r') as npf:
    spm = sp.lil_matrix(npf['arr_0'])

In [38]:
gd(100)

(-1.134378077582987e-10, 1.2868136229008733e-20)

In [34]:
def gd(start):
    pt = start
    while True:
        y = f(pt)
        ypre = f(pt-0.001)
        ypost = f(pt+0.001)
#         print(pt, end=' ')
        if (y-ypre) / (ypost-y) < 0:
            return pt, f(pt)
        elif y-ypre > 0:
            pt -= 0.001
        else:
            pt += 0.001

In [36]:
import math
def f(x):
    return x*x

In [5]:
for param in similarity_files:
    with np.load(param, 'r') as npf:
        arr = npf['arr_0']
        mm = np.memmap(param+'.npy', mode='w+', dtype='<f2', shape=arr.shape)
        mm[:] = arr[:]
        mm.flush()
    print('done', param)

done director-similarity.npz
done genre-similarity.npz
done rating-similarity.npz


In [29]:
np.reciprocal(np.arange(9.).reshape((3, 3)))/8

  np.reciprocal(np.arange(9.).reshape((3, 3)))/8


array([[       inf, 0.125     , 0.0625    ],
       [0.04166667, 0.03125   , 0.025     ],
       [0.02083333, 0.01785714, 0.015625  ]])

In [23]:
np.linalg.norm(np.triu(np.reciprocal(np.arange(9.).reshape((3, 3))), 1))

  np.linalg.norm(np.triu(np.reciprocal(np.arange(9.).reshape((3, 3))), 1))


1.1357816691600546

In [4]:
for param in similarity_files:
    with np.load(param + '.npz', 'r+') as npf:
        mm = np.memmap(param+'.npy', mode='w+', dtype='<f2', shape=(23843, 23843))
        mm[:] = np.reciprocal(npf['arr_0'], dtype='<f2')
        mm[:] /= np.linalg.norm(np.triu(mm, 1))
        mm.flush()

  mm[:] = np.reciprocal(npf['arr_0'], dtype='<f2')
  mm[:] /= np.linalg.norm(np.triu(mm, 1))


In [15]:
zr = [x for x in range(25113) if np.sum(tsim[x:, x], dtype='f4') == 0]

In [3]:
with np.load('all_similarities/tags-similarity.npz', 'r') as npf:
    tsim = npf['arr_0']

In [4]:
with np.load('all_similarities/tags-similarity.npz', 'r+') as npf:
        mm = np.memmap('all_similarities/tags-similarity.npy', mode='w+', dtype='<f2', shape=(23843, 23843))
        mm[:] = np.reciprocal(npf['arr_0'], dtype='<f2')
        mm[:] /= np.linalg.norm(np.triu(mm, 1))
        mm.flush()

True

In [9]:
with np.load('all_similarities/user_similarity.npz', 'r') as npf:
    usim = npf['arr_0']

In [24]:
@numba.jit(forceobj=True, cache=True, parallel=True, fastmath=True)
def weightfn(x):
    adj_mat = calc_adj_mat(x)
    adj_mat -= usim
    np.abs(adj_mat, out=adj_mat)
    return adj_mat.sum(dtype='f4')

In [26]:
weightfn(weights)

522617400.0

In [None]:
op.optimize()