# Collaborative Topic Regression

In [1]:
import numpy as np
from random import sample

In [4]:
def ctm(R_dict,Theta,n_U,n_epoch=20,batch_size=50,lambda_u=0.002,lambda_v=0.2,alpha=0.002):
    '''
    input: R_dict, {(i,j): Rij} maps ith user and jth item to non-zero rating i,j
           Theta, JxK matrix, jth row is a K-dim topic vector for jth item
           n_U, number of unique users in R_dict
           n_epoch, number of iterations
           lambda_u, regularization parameter for U
           lambda_v, regularization parameter for V
           alpha, learning rate
           
    output: U, a IxK matrix, each row is a K-dim representation for ith user
            V, a JxK matrix, each row is a K-dim representation for jth item
            U_id, user ids
            V_id, item ids
    '''
    n_V,K = Theta.shape # get dimension of parameters
    U = np.random.rand(n_U,K)
    V = Theta.copy() # initialize parameters
    N = len(R_dict)
    pairs = R_dict.keys()
    
    if N<batch_size:
        batch_size = 1
    
    for t in range(n_epoch):
        delta = 0 # change in gradient
        to_use = sample(pairs, batch_size)
        for i,j in to_use: 
                r = R_dict[(i,j)]
                u,v = U[i,:],V[j,:]
                theta = Theta[j,:]
                
                gu = 2*(u.dot(v.T)-r)*v+2*lambda_u*u
                gv = 2*(v.dot(u.T)-r.T)*u+2*lambda_v*(v-theta) # calculate gradient
                u -= alpha*gu
                v -= alpha*gv # update parameters
                
                delta += (np.linalg.norm(gu)+np.linalg.norm(gv)) # calculate change in gradient
                U[i,:] = u
                V[j,:] = v
        
        if delta < 0.001:
            break
    return (U,V)

# example

In [6]:
n = 5
np.random.seed(59)
R = np.random.randint(0,5,size=(n,n))
R_dict = {}
for i in range(n):
    for j in range(n):
        if R[i,j]!=0:
            R_dict[(i,j)] = R[i,j]

Theta = np.random.rand(n,50)

U,V = ctm(R_dict,Theta,n,n_epoch=1000)
np.sum((U.dot(V.T)-R)**2)/len(R_dict)

5.7035805547427936

In [11]:
# To make it converge quick we should take average topic distribution of all the books a costumer has read as his or her initial feature
% timeit U,V = ctm(R_dict,Theta,n,n_epoch=1000)

10 loops, best of 3: 30.6 ms per loop


Estimated Rating

In [9]:
U.dot(V.T).round(1)

array([[ 1.3,  3.9,  2.9,  1.1,  3.9],
       [ 2.7,  5.8,  3. ,  1.7,  4.7],
       [ 1.2,  3.8,  2.2,  1. ,  3. ],
       [ 2.1,  2.1,  2.3,  3.5,  1.1],
       [ 2. ,  5.2,  1.4,  2. ,  3.8]])

True Rating

In [10]:
R

array([[1, 4, 3, 1, 0],
       [3, 0, 3, 1, 0],
       [1, 4, 2, 1, 3],
       [2, 2, 2, 4, 1],
       [2, 0, 1, 2, 0]])

In [330]:
'''
def ctm_matrix(R,Theta,U_batch_size=50,V_batch_size=40,n_epoch=20,lambda_u=0.002,lambda_v=0.2,alpha=0.0002):
    I,J = R.shape
    K = Theta.shape[1] # get dimension of parameters
    
    U = np.random.rand(I,K)
    V = Theta.copy() # initialize parameters
    
    n_U = int(np.ceil(I/(U_batch_size*1.0)))
    n_V = int(np.ceil(J/(V_batch_size*1.0)))# number of batches to be iterated
    
    for t in range(n_epoch):
        delta = 0 # change in gradient
        for u_th in range(n_U):
            for v_th in range(n_V): # for each mini_batch, do
                r = R[U_batch_size*(u_th):U_batch_size*(u_th+1),V_batch_size*(v_th):V_batch_size*(v_th+1)] 
                u = U[U_batch_size*(u_th):U_batch_size*(u_th+1),:]
                v = V[V_batch_size*(v_th):V_batch_size*(v_th+1),:]
                theta = Theta[V_batch_size*(v_th):V_batch_size*(v_th+1),:] # get sub parameters
                
                gu = 2*(u.dot(v.T)-r).dot(v)+2*lambda_u*u
                gv = 2*(v.dot(u.T)-r.T).dot(u)+2*lambda_v*(v-theta) # calculate gradient
                u -= alpha*gu
                v -= alpha*gv # update parameters
                
                delta += (np.linalg.norm(gu)+np.linalg.norm(gv)) # calculate change in gradient
                U[U_batch_size*(u_th):U_batch_size*(u_th+1),:] = u
                V[V_batch_size*(v_th):V_batch_size*(v_th+1),:] = v
        
        if delta < 0.001:
            break
    return (U,V)

'''

'\ndef ctm_matrix(R,Theta,U_batch_size=50,V_batch_size=40,n_epoch=20,lambda_u=0.002,lambda_v=0.2,alpha=0.0002):\n    I,J = R.shape\n    K = Theta.shape[1] # get dimension of parameters\n    \n    U = np.random.rand(I,K)\n    V = Theta.copy() # initialize parameters\n    \n    n_U = int(np.ceil(I/(U_batch_size*1.0)))\n    n_V = int(np.ceil(J/(V_batch_size*1.0)))# number of batches to be iterated\n    \n    for t in range(n_epoch):\n        delta = 0 # change in gradient\n        for u_th in range(n_U):\n            for v_th in range(n_V): # for each mini_batch, do\n                r = R[U_batch_size*(u_th):U_batch_size*(u_th+1),V_batch_size*(v_th):V_batch_size*(v_th+1)] \n                u = U[U_batch_size*(u_th):U_batch_size*(u_th+1),:]\n                v = V[V_batch_size*(v_th):V_batch_size*(v_th+1),:]\n                theta = Theta[V_batch_size*(v_th):V_batch_size*(v_th+1),:] # get sub parameters\n                \n                gu = 2*(u.dot(v.T)-r).dot(v)+2*lambda_u*u\n          

# Spark Version CTM

In [None]:
from __future__ import print_function

import numpy as np
from numpy.random import rand
from numpy import matrix
from pyspark.sql import SparkSession