In [1]:
push!(LOAD_PATH,"../src/","../src/models/","../src/models/c_functions/")
using SGMCMC
using MatrixFactorisation

In [2]:
using JLD

In [3]:
# download MovieLens 100k dataset
if !isfile("ml-100k/u1.base")
    run(`wget http://files.grouplens.org/datasets/movielens/ml-100k.zip`)
    run(`unzip ml-100k.zip`)
    run(`rm ml-100k.zip`)
end

In [4]:
#load data
trainset = readdlm("ml-100k/u1.base")[:,1:3];
testset = readdlm("ml-100k/u1.test")[:,1:3];

In [5]:
#check size
size(trainset)

(80000,3)

In [6]:
#define models.
mfmodel = MatrixFactorisationModel(trainset,5,10000,β₀=300);
mftest = MatrixFactorisationModel(testset,5,10000);

In [7]:
grad = DataModel.getgrad(mfmodel, λ = ones(mfmodel.d*2+2))
s = SGMCMC.SGLDState(0.1*randn(MatrixFactorisation.fetchnparams(mfmodel)),1e-6)

SGMCMC.SGLDState([0.0553979,0.0376624,0.131276,0.194805,0.0769204,-0.0202916,-0.149733,0.0883006,0.0319501,0.172276  …  0.0740418,0.0996171,0.135453,0.0236933,-0.00795913,-0.0352123,-0.0194444,-0.0594558,-0.121529,0.0694018],0,SGMCMC.s)

In [8]:
#get initial rmse
MatrixFactorisation.evaluate(mfmodel,s.x)

Dict{Symbol,Float64} with 1 entry:
  :rmse => 1.13006

In [9]:
#sample lambda hyperparameters
λ = MatrixFactorisation.lambda_sample(mfmodel,s.x)
# get new gradient
grad = DataModel.getgrad(mfmodel,λ=λ)
for i in 1:10000
    sample!(s,grad)
    if rem(i,10) == 0
        # update hyperparameters every 10 SGLD steps.
        λ = MatrixFactorisation.lambda_sample(mfmodel,s.x)
        grad = DataModel.getgrad(mfmodel,λ=λ)
    end
    if rem(i,1000) == 0 
        # evaluate train/test rmse every 1000 iterations.
        println("iteration $i")
        println("train rmse: ", MatrixFactorisation.evaluate(mfmodel,s.x))
        println("test rmse: ", MatrixFactorisation.evaluate_test(mftest,s.x,mfmodel))
    end
end

iteration 1000
train rmse: Dict(:rmse=>1.08235)
test rmse: Dict(:rmse=>1.1144)
iteration 2000
train rmse: Dict(:rmse=>1.05094)
test rmse: Dict(:rmse=>1.08401)
iteration 3000
train rmse: Dict(:rmse=>1.02909)
test rmse: Dict(:rmse=>1.06355)
iteration 4000
train rmse: Dict(:rmse=>1.01357)
test rmse: Dict(:rmse=>1.04902)
iteration 5000
train rmse: Dict(:rmse=>1.00258)
test rmse: Dict(:rmse=>1.03947)
iteration 6000
train rmse: Dict(:rmse=>0.993667)
test rmse: Dict(:rmse=>1.03135)
iteration 7000
train rmse: Dict(:rmse=>0.987115)
test rmse: Dict(:rmse=>1.02406)
iteration 8000
train rmse: Dict(:rmse=>0.980602)
test rmse: Dict(:rmse=>1.01802)
iteration 9000
train rmse: Dict(:rmse=>0.976747)
test rmse: Dict(:rmse=>1.01435)
iteration 10000
train rmse: Dict(:rmse=>0.972287)
test rmse: Dict(:rmse=>1.0105)


In [10]:
# use sparse SGLD sampler from Ahn et al.
# Large-Scale Distributed Bayesian Matrix Factorization using Stochastic Gradient MCMC
s = MatrixFactorisation.SparseSGLDState(0.02*randn(MatrixFactorisation.fetchnparams(mfmodel)),ones(2*mfmodel.d+2),1e-6,niters=1)

MatrixFactorisation.SparseSGLDState([0.0188594,-0.00358597,0.0221352,0.0161686,0.0149696,-0.0324228,0.00695328,0.0164148,0.0166288,0.000985349  …  -0.00125548,-0.019739,0.00624112,-0.010793,0.0125397,0.00659125,-0.0258528,0.0489105,-0.00131484,0.006055],[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0  …  0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],[1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0],0,MatrixFactorisation.#13,1)

In [11]:
λ = MatrixFactorisation.lambda_sample(mfmodel,s.x)
grad = DataModel.getgrad(mfmodel,λ=λ)
for i in 1:10000
    MatrixFactorisation.sample_sparse!(mfmodel,s)
    if rem(i,10) == 0
        λ = MatrixFactorisation.lambda_sample(mfmodel,s.x)
        grad = DataModel.getgrad(mfmodel,λ=λ)
    end
    if rem(i,1000) == 0 
        println("iteration $i")
        println("train rmse: ", MatrixFactorisation.evaluate(mfmodel,s.x))
        println("test rmse: ", MatrixFactorisation.evaluate_test(mftest,s.x,mfmodel))
    end
end

iteration 1000
train rmse: Dict(:rmse=>1.09362)
test rmse: Dict(:rmse=>1.12823)
iteration 2000
train rmse: Dict(:rmse=>1.07379)
test rmse: Dict(:rmse=>1.10971)
iteration 3000
train rmse: Dict(:rmse=>1.05658)
test rmse: Dict(:rmse=>1.09258)
iteration 4000
train rmse: Dict(:rmse=>1.04383)
test rmse: Dict(:rmse=>1.08025)
iteration 5000
train rmse: Dict(:rmse=>1.03352)
test rmse: Dict(:rmse=>1.07034)
iteration 6000
train rmse: Dict(:rmse=>1.02371)
test rmse: Dict(:rmse=>1.06137)
iteration 7000
train rmse: Dict(:rmse=>1.01635)
test rmse: Dict(:rmse=>1.05483)
iteration 8000
train rmse: Dict(:rmse=>1.00999)
test rmse: Dict(:rmse=>1.04833)
iteration 9000
train rmse: Dict(:rmse=>1.00451)
test rmse: Dict(:rmse=>1.0434)
iteration 10000
train rmse: Dict(:rmse=>0.99982)
test rmse: Dict(:rmse=>1.03824)
