In [6]:
%tensorflow_version 1.x
import matplotlib
import os
import tensorflow as tf
import numpy as np
import random

random.seed(0)
np.random.seed(0) 

import h5py
import scipy.io as sio
import scipy.sparse as sp
from scipy.sparse.linalg import eigs
from scipy import linalg as linalg
import matplotlib.pyplot as plt
import networkx as nx
from numpy import linalg as npla
from numpy import matlib
import IPython
from google.colab import drive
drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


This is the code to replicate results submitted in the paper. Please download the matlab files containing user and movies graph from prior work such as Monti et. al or Boyarski et. al. Please upload these matrices to your google drive so that google colab can access it.

In code below, for Flixter dataset, we set m and n  to 2990. For Movie Lens 100k, we set m and n to 900 and 1650. Note that the values of p_max and q_max are set to the dimensions of underlying rating matrix. For Synthetic netflix, we set m and n to 30. For synthetic Netflix, we obtain best results with perturbation in underlying graphs i.e. random deletion of edges from the graph helps to improve the performance. 


In [0]:
def load_matlab_file(path_file, name_field):
    
    db = h5py.File(path_file, 'r')
    ds = db[name_field]
    try:
        if 'ir' in ds.keys():
            data = np.asarray(ds['data'])
            ir = np.asarray(ds['ir'])
            jc = np.asarray(ds['jc'])
            out = sp.csc_matrix((data, ir, jc)).astype(np.float32)
    except AttributeError:
        # Transpose in case is a dense matrix because of the row- vs column- major ordering between python and matlab
        out = np.asarray(ds).astype(np.float32).T
    db.close()
    return out

def eigen(A):
    eigenValues, eigenVectors = npla.eigh(A)
    idx = np.argsort(eigenValues)
    eigenValues = eigenValues[idx]
    eigenVectors = eigenVectors[:, idx]
    return (eigenValues, eigenVectors)

def init_graph_basis(W):
    # gets basis returns eig_vals and eig_vecs
    W = W - np.diag(np.diag(W))
    D = np.diagflat(np.sum(W, 1))
    L = D - W
    eig_vals, eig_vecs = eigen(L)
    return eig_vals, eig_vecs


def squared_frobenius_norm(tensor):
    square_tensor = tf.square(tensor)
    tensor_sum = tf.reduce_sum(square_tensor)
    return tensor_sum

In [0]:
#load graphs and compute eigendecomposition of Laplacians

path_dataset = '/content/drive/My Drive/Colab Notebooks/training_test_dataset_10_NNs.mat' # flixter

#path_dataset = '/content/drive/My Drive/Colab Notebooks/split_1.mat'
#path_dataset = '/content/drive/My Drive/Colab Notebooks/synthetic_netflix.mat'
W_rows = load_matlab_file(path_dataset, 'W_users')
#.todense()  # Row Graph
W_cols = load_matlab_file(path_dataset, 'W_movies')
#.todense()  # Column Graph
#W_rows = load_matlab_file(path_dataset, 'Wrow').todense()  # Row Graph for syntheitc netflix
#W_cols = load_matlab_file(path_dataset, 'Wcol').todense()  # Column Graph for syntheitc netflix
m = 2990  # this is for flixter
n = 2990


# extract Laplacians of the row and column graphs
eig_vals_row, eig_vecs_row = init_graph_basis(W_rows)
eig_vals_col, eig_vecs_col = init_graph_basis(W_cols)

M = load_matlab_file(path_dataset, 'M')
S_training = load_matlab_file(path_dataset, 'Otraining')
S_test = load_matlab_file(path_dataset, 'Otest')
M_training = np.array(M)*np.array(S_training)
M_test = np.array(M)*np.array(S_test)

lr = 0.000001 
num_iters = 12000
p_init = 1
q_init = 1
p_max = 3000
q_max = 3000 #flixter



In [0]:
C_init = np.zeros([p_max, q_max], dtype = np.float32)
#C_init = np.zeros([m, n], dtype = np.float32)
C_init[p_init-1,q_init-1] = np.matmul(np.matmul(np.transpose(eig_vecs_row[:, 0:p_init]),M_training), eig_vecs_col[:, 0:q_init])

P_init = np.eye(m, p_max)
Q_init = np.eye(n, q_max)

C_tf = tf.Variable(C_init, trainable=True, dtype=tf.float32)
#C_tf = tf.Variable(np.matmul(np.matmul(P_init, C_init), np.transpose(Q_init)), trainable=True, dtype=tf.float32)
P_tf = tf.Variable(P_init, trainable=True, dtype=tf.float32)
Q_tf = tf.Variable(Q_init, trainable=True, dtype=tf.float32)
C_new = tf.matmul(tf.matmul(P_tf, C_tf), tf.transpose(Q_tf)) #check
#C_new = C_tf
Phi_tf = tf.constant(eig_vecs_row[:,0:m], dtype=tf.float32)
Psi_tf = tf.constant(eig_vecs_col[:,0:n], dtype=tf.float32)

lambda_row_tf = tf.constant(eig_vals_row[0:m], dtype=tf.float32)
lambda_col_tf = tf.constant(eig_vals_col[0:n], dtype=tf.float32)

S_training_tf = tf.constant(S_training, dtype=tf.float32)
S_test_tf = tf.constant(S_test, dtype=tf.float32)
M_training_tf = tf.constant(M_training, dtype=tf.float32)
M_test_tf = tf.constant(M_test, dtype=tf.float32)
X = tf.matmul(tf.matmul(Phi_tf, C_new), tf.transpose(Psi_tf))

E_data = squared_frobenius_norm(tf.multiply(X, S_training) - M_training)

C_new_t = tf.transpose(C_new)
left_mul = tf.matmul(C_new, tf.diag(lambda_col_tf))
right_mul = tf.matmul(tf.diag(lambda_row_tf),C_new)
E_comm = squared_frobenius_norm(left_mul-right_mul)

E_tot = E_data + .00001*E_comm
optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr)
opt_op = optimizer.minimize(E_tot)
# Create a session for running Ops on the Graph.
config = tf.ConfigProto(allow_soft_placement=True)
config.gpu_options.allow_growth = True
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())

train_loss = tf.sqrt(squared_frobenius_norm(tf.multiply(X, S_training_tf) - M_training_tf)/ tf.reduce_sum(S_training_tf))
#validation_loss = tf.sqrt(squared_frobenius_norm(tf.multiply(S_validation_tf, (X - M))) / tf.reduce_sum(S_validation_tf))
test_loss = tf.sqrt(squared_frobenius_norm(tf.multiply(X, S_test_tf)- M_test_tf)/tf.reduce_sum(S_test_tf))

In [0]:
for iter in range(12000):
    if iter%100 == 0:
      train_loss_np, test_loss_np = sess.run([train_loss, test_loss])
      IPython.display.clear_output()    
      print("iter " + str(iter) +" ,train loss: "+str(train_loss_np)+", test loss: " + str(test_loss_np) )
      #X_np = sess.run(X)      
      #plt.imshow(X_np)
      #plt.title('X')
      #plt.show()
    sess.run(opt_op)