# Julia project

In [1]:
using NPZ
using Gen
using Plots
using LinearAlgebra

In [2]:
xs = npzread("Z:/Master I/PP    - Probabilistic Programming/Project/input/input_10x10.npy")

10×10 Matrix{Float64}:
  8.0  6.0   8.0  0.0   4.0   6.0   6.0   6.0   0.0  6.0
  0.0  8.0  10.0  6.0  10.0   8.0   8.0   6.0   0.0  6.0
  0.0  6.0  10.0  0.0   6.0   6.0   8.0   6.0   0.0  8.0
  5.0  5.0   4.0  0.0   4.0   8.0   4.0   0.0   0.0  9.0
  8.0  6.0   6.0  0.0   6.0   6.0   6.0   0.0   0.0  6.0
  6.0  5.0   3.0  0.0   0.0   9.0   5.0   0.0   3.0  7.0
 10.0  0.0   6.0  0.0   6.0   6.0   6.0   0.0   6.0  0.0
 10.0  8.0   0.0  0.0   6.0   8.0   0.0   0.0   6.0  6.0
 10.0  0.0  10.0  0.0  10.0   0.0  10.0  10.0   0.0  8.0
  8.0  0.0   8.0  0.0   8.0  10.0   6.0   0.0  10.0  0.0

## Hierarchical Poisson Factorization

In [3]:
@gen function hpf_model(num_users::Int64,
                        num_items::Int64,
                        k::Int64,
                        a::Float64, c::Float64,
                        a_prim::Float64, b_prim::Float64, c_prim::Float64, d_prim::Float64)
    
    # for each user u
    X_preference = Vector[] # asta i theta care trebuie resimulat practic 
    for u = 1:num_users
        activity = ({(:activity,u)} ~ gamma(a_prim,a_prim/b_prim))
        preference = Float64[]
        for _k = 1:k  # for each component k ( k is like a random number of categories)
            push!(preference,{(:preference,u, _k)} ~ gamma(a,activity))
        end
        push!(X_preference,preference)
    end

    # for each item i 
    X_sample_attribute = Vector[] # asta i beta care trebuie resimulat
    for i = 1:num_items
        popularity = ({(:popularity,i)} ~ gamma(c_prim,c_prim/d_prim))
        attribute = Float64[]
        for _k = 1:k # for each component k ( k is like a random number of categories)
            push!(attribute,{(:attribute, i, _k)} ~ gamma(c,popularity))
        end
        push!(X_sample_attribute, attribute)
    end
    
    # for each user u and item i
    y = Matrix{Float64}(undef,0,num_items)
    for u = 1:num_users
        rating = Float64[]
        for i = 1:num_items
            push!(rating,{(:rating, u, i)} ~ poisson(dot(X_preference[u],X_sample_attribute[i])))
        end
        y = vcat(y, rating')
    end
    
    y
end

DynamicDSLFunction{Any}(Dict{Symbol, Any}(), Dict{Symbol, Any}(), Type[Int64, Int64, Int64, Float64, Float64, Float64, Float64, Float64, Float64], false, Union{Nothing, Some{Any}}[nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing], var"##hpf_model#313", Bool[0, 0, 0, 0, 0, 0, 0, 0, 0], false)

### Test to see if it generates a matrix with ratings

In [4]:
num_users = 10
num_items = 10
k = 10 # pentru primele 10 filme sunt 10 categorii
a, c, a_prim, b_prim, c_prim, d_prim = [0.3 for _ = 1:6];
y = hpf_model(num_users, num_items, k, a, c, a_prim, b_prim, c_prim, d_prim );
y

10×10 Matrix{Float64}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  23.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0   0.0  0.0

In [5]:
trace = Gen.simulate(hpf_model, (num_users, num_items, k, a, c, a_prim, b_prim, c_prim, d_prim,))

Gen.DynamicDSLTrace{DynamicDSLFunction{Any}}(DynamicDSLFunction{Any}(Dict{Symbol, Any}(), Dict{Symbol, Any}(), Type[Int64, Int64, Int64, Float64, Float64, Float64, Float64, Float64, Float64], false, Union{Nothing, Some{Any}}[nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing, nothing], var"##hpf_model#313", Bool[0, 0, 0, 0, 0, 0, 0, 0, 0], false), Trie{Any, Gen.ChoiceOrCallRecord}(Dict{Any, Gen.ChoiceOrCallRecord}((:rating, 6, 10) => Gen.ChoiceOrCallRecord{Int64}(0, -0.0002339270194919225, NaN, true), (:preference, 9, 9) => Gen.ChoiceOrCallRecord{Float64}(0.05758278885700929, 1.0115939047703972, NaN, true), (:rating, 9, 5) => Gen.ChoiceOrCallRecord{Int64}(0, -0.00014007203600753518, NaN, true), (:attribute, 4, 3) => Gen.ChoiceOrCallRecord{Float64}(0.001294617894415501, 4.725567062283881, NaN, true), (:rating, 6, 4) => Gen.ChoiceOrCallRecord{Int64}(0, -0.0006105420200919684, NaN, true), (:attribute, 4, 10) => Gen.ChoiceOrCallRecord{Float64}(0.013716732634152951, 2.27

### See if trace is corectly created

In [44]:
Gen.get_choices(trace)


│
├── (:rating, 6, 10) : 0
│
├── (:preference, 9, 9) : 0.4866740340294369
│
├── (:rating, 9, 5) : 0
│
├── (:attribute, 4, 3) : 0.3656949394884153
│
├── (:rating, 6, 4) : 0
│
├── (:attribute, 4, 10) : 0.01029092583548561
│
├── (:rating, 3, 10) : 0
│
├── (:rating, 4, 6) : 0
│
├── (:attribute, 8, 2) : 0.012665188145778942
│
├── (:activity, 6) : 0.07390284258122365
│
├── (:preference, 5, 5) : 0.012104156632052549
│
├── (:rating, 7, 1) : 0
│
├── (:rating, 3, 4) : 0
│
├── (:activity, 4) : 1.4104443881057465
│
├── (:rating, 7, 8) : 0
│
├── (:activity, 7) : 0.014351758031160936
│
├── (:preference, 8, 6) : 0.0005545263503797618
│
├── (:rating, 10, 7) : 0
│
├── (:rating, 2, 2) : 0
│
├── (:attribute, 7, 5) : 0.00024673750171484894
│
├── (:popularity, 10) : 0.1750755341133708
│
├── (:rating, 7, 2) : 0
│
├── (:attribute, 4, 4) : 0.11291890066067706
│
├── (:preference, 5, 6) : 6.495897080477486e-5
│
├── (:rating, 3, 9) : 0
│
├── (:preference, 1, 6) : 0.0014740892635608867
│
├── (:attribute, 3, 10) :

### Create constraints

In [45]:
function make_constraints(ys::Matrix{Float64})
    constraints = Gen.choicemap()
    for u=1:size(ys)[1]
        for i=1:size(ys)[2]  
        constraints[(:rating, u, i)] = ys[u,i]
        end
    end
    constraints
end;

Check if it works

In [46]:
make_constraints(xs)

│
├── (:rating, 6, 10) : 7.0
│
├── (:rating, 1, 1) : 8.0
│
├── (:rating, 9, 5) : 10.0
│
├── (:rating, 6, 4) : 0.0
│
├── (:rating, 8, 10) : 6.0
│
├── (:rating, 3, 10) : 8.0
│
├── (:rating, 4, 5) : 4.0
│
├── (:rating, 1, 9) : 0.0
│
├── (:rating, 4, 6) : 8.0
│
├── (:rating, 9, 4) : 0.0
│
├── (:rating, 1, 3) : 8.0
│
├── (:rating, 5, 10) : 6.0
│
├── (:rating, 3, 2) : 6.0
│
├── (:rating, 1, 10) : 6.0
│
├── (:rating, 5, 2) : 6.0
│
├── (:rating, 2, 6) : 8.0
│
├── (:rating, 6, 3) : 3.0
│
├── (:rating, 7, 1) : 10.0
│
├── (:rating, 3, 4) : 0.0
│
├── (:rating, 9, 8) : 10.0
│
├── (:rating, 7, 8) : 0.0
│
├── (:rating, 10, 10) : 0.0
│
├── (:rating, 5, 7) : 6.0
│
├── (:rating, 10, 5) : 8.0
│
├── (:rating, 5, 4) : 0.0
│
├── (:rating, 4, 8) : 0.0
│
├── (:rating, 10, 7) : 6.0
│
├── (:rating, 2, 2) : 8.0
│
├── (:rating, 2, 4) : 6.0
│
├── (:rating, 8, 5) : 6.0
│
├── (:rating, 8, 1) : 10.0
│
├── (:rating, 7, 2) : 0.0
│
├── (:rating, 2, 10) : 6.0
│
├── (:rating, 3, 9) : 0.0
│
├── (:rating, 10, 9) : 10.0
│
├─

In [47]:
function block_resimulation_update(tr,num_users, num_items, k)

    # Block 1: Update preference (theta)
    for u = 1:num_users
        for _k = 1:k 
        latent_variable = select(:preference,u,_k)
        (tr, _) = mh(tr, latent_variable)
        end
    end


    # Block 2: Update attribute (beta)
    for i = 1:num_items
        for _k = 1:k
        latent_variable = select(:attribute,i,_k)
        (tr, _) = mh(tr, latent_variable)
        end
    end

    tr

end

block_resimulation_update (generic function with 1 method)

In [48]:
function block_resimulation_inference(n_burnin, n_samples, thin)
    observations = make_constraints(xs)
    (tr, _) = generate(hpf_model, (num_users, num_items, k, a, c, a_prim, b_prim, c_prim, d_prim), observations)
    for iter=1:n_burnin
        tr = block_resimulation_update(tr,num_users,num_items,k)
        if iter % 100 == 0 
            print(iter)
        end

    end
    trs = []
    for iter=1:n_samples
        for itert = 1:thin # reduce the autocorrelation in a sample of generated data by MCMC
            tr = block_resimulation_update(tr,num_users,num_items,k)
        end
        push!(trs, tr)
        if iter % 100 == 0 
            print(iter)
        end
    end

    trs

end;

### TRAIN

In [49]:
n_burnin = 100000
n_samples = 40000
thin = 2
trs = block_resimulation_inference(n_burnin, n_samples, thin);

100

200300

400

500600

700800

9001000

1100

1200

1300

1400

15001600

17001800

19002000

21002200

2300

24002500

26002700

2800

2900

3000

3100

320033003400

35003600

3700

3800

39004000

4100

4200

430044004500

4600

47004800

4900

5000

5100

52005300

5400

550056005700

5800

5900

600061006200

6300

64006500

66006700

6800

6900

7000

7100

7200

73007400

7500

76007700

78007900

8000

8100

8200

830084008500

8600

8700

88008900

9000

91009200

9300

94009500

96009700

98009900

10000

1010010200

10300

104001050010600

10700

108001090011000

11100

1120011300

11400

11500

1160011700

11800

1190012000

1210012200

12300

1240012500

1260012700

12800

1290013000

13100

1320013300

1340013500

13600

13700

1380013900

14000

1410014200

14300

14400

14500

14600

1470014800

14900

1500015100

15200

15300

154001550015600

1570015800

15900

1600016100

1620016300

1640016500

1660016700

16800

1690017000

17100

17200

1730017400

17500

1760017700

17800

1790018000

1810018200

18300

1840018500

18600

1870018800

1890019000

1910019200

19300

1940019500

19600

1970019800

1990020000

20100

20200

20300

20400

2050020600

20700

2080020900

21000

2110021200

21300

2140021500

21600

217002180021900

22000

22100

22200

2230022400

22500

22600

22700

2280022900

23000

2310023200

23300

2340023500

23600

2370023800

2390024000

24100

24200

2430024400

24500

2460024700

2480024900

2500025100

25200

2530025400

2550025600

25700

2580025900

26000

26100

26200

2630026400

26500

26600

2670026800

2690027000

27100

2720027300

27400

27500

2760027700

2780027900

2800028100

2820028300

2840028500

28600

28700

28800

2890029000

2910029200

29300

29400

2950029600

29700

2980029900

3000030100

30200

3030030400

30500

3060030700

30800

3090031000

31100

3120031300

31400

31500

3160031700

3180031900

32000

32100

3220032300

32400

3250032600

3270032800

32900

33000

3310033200

3330033400

33500

33600

3370033800

3390034000

3410034200

34300

3440034500

3460034700

34800

3490035000

3510035200

35300

35400

355003560035700

35800

3590036000

36100

36200

3630036400

36500

366003670036800

3690037000

3710037200

3730037400

3750037600

37700

37800

3790038000

38100

3820038300

38400

38500

3860038700

3880038900

3900039100

39200

393003940039500

39600

3970039800

3990040000

40100

40200

4030040400

4050040600

4070040800

4090041000

41100

41200

41300

4140041500

4160041700

41800

41900

4200042100

4220042300

42400

4250042600

4270042800

42900

4300043100

4320043300

43400

43500

43600

4370043800

43900

440004410044200

44300

4440044500

4460044700

44800

44900

45000

4510045200

4530045400

45500

4560045700

4580045900

46000

4610046200

463004640046500

46600

4670046800

46900

4700047100

47200

4730047400

47500

4760047700

4780047900

48000

48100

48200

48300

4840048500

48600

48700

4880048900

4900049100

4920049300

49400

49500

4960049700

49800

49900

50000

50100

50200

50300

50400

5050050600

50700

5080050900

51000

51100

5120051300

51400

5150051600

51700

5180051900

52000

5210052200

5230052400

52500

5260052700

52800

52900

5300053100

53200

5330053400

53500

5360053700

53800

539005400054100

54200

54300

5440054500

54600

5470054800

54900

5500055100

5520055300

554005550055600

55700

558005590056000

56100

56200

5630056400

56500

5660056700

5680056900

57000

5710057200

5730057400

5750057600

57700

5780057900

58000

5810058200

58300

5840058500

5860058700

58800

58900

5900059100

5920059300

59400

59500

5960059700

598005990060000

60100

6020060300

60400

6050060600

6070060800

6090061000

61100

6120061300

6140061500

61600

61700

6180061900

62000

6210062200

62300

624006250062600

62700

6280062900

63000

63100

6320063300

6340063500

63600

6370063800

63900

64000

64100

6420064300

64400

6450064600

64700

64800

6490065000

65100

65200

65300

65400

65500

6560065700

65800

6590066000

6610066200

66300

6640066500

66600

66700

6680066900

67000

6710067200

6730067400

67500

67600

6770067800

67900

68000

6810068200

6830068400

68500

6860068700

68800

6890069000

6910069200

69300

6940069500

6960069700

6980069900

70000

70100

7020070300

70400

7050070600

7070070800

70900

7100071100

71200

713007140071500

71600

71700

71800

7190072000

72100

7220072300

7240072500

72600

7270072800

72900

7300073100

73200

73300

73400

73500

73600

73700

73800

7390074000

74100

7420074300

74400

7450074600

74700

7480074900

75000

75100

7520075300

75400

75500

75600

7570075800

7590076000

7610076200

76300

7640076500

76600

76700

76800

7690077000

7710077200

77300

77400

77500

7760077700

77800

77900

7800078100

78200

78300

7840078500

78600

78700

7880078900

7900079100

7920079300

7940079500

7960079700

79800

7990080000

80100

8020080300

8040080500

80600

8070080800

8090081000

81100

81200

81300

81400

8150081600

81700

81800

81900

82000

8210082200

8230082400

8250082600

82700

82800

8290083000

8310083200

83300

8340083500

83600

83700

8380083900

84000

8410084200

84300

84400

8450084600

847008480084900

85000

85100

85200

8530085400

85500

85600

8570085800

85900

8600086100

8620086300

86400

8650086600

8670086800

86900

870008710087200

8730087400

87500

8760087700

8780087900

88000

8810088200

88300

8840088500

88600

8870088800

88900

89000

89100

89200

893008940089500

89600

89700

8980089900

90000

9010090200

9030090400

9050090600

9070090800

90900

9100091100

9120091300

91400

9150091600

91700

91800

9190092000

9210092200

92300

92400

9250092600

92700

9280092900

93000

9310093200

93300

9340093500

93600

9370093800

9390094000

94100

9420094300

94400

94500

9460094700

9480094900

9500095100

95200

95300

9540095500

9560095700

95800

95900

96000

96100

9620096300

9640096500

9660096700

96800

9690097000

9710097200

97300

9740097500

97600

9770097800

97900

9800098100

9820098300

98400

98500

98600

98700

988009890099000

99100

9920099300

99400

9950099600

99700

9980099900

100000

100

200300

400500

600

700

800900

1000

11001200

1300

14001500

1600

17001800

1900

20002100

22002300

2400

2500

2600

27002800

2900

30003100

3200

33003400

3500

3600

3700

38003900

4000

41004200

4300

4400

45004600

4700

48004900

50005100

5200

53005400

55005600

57005800

5900

6000

61006200

6300

6400

65006600

67006800

6900

7000

71007200

7300

7400

7500

76007700

7800

79008000

8100

82008300

84008500

8600

87008800

8900

9000

91009200

9300

9400

95009600

97009800

9900

1000010100

102001030010400

10500

10600

1070010800

10900

1100011100

11200

1130011400

11500

11600

11700

11800

1190012000

12100

1220012300

12400

1250012600

127001280012900

13000

13100

1320013300

13400

1350013600

13700

13800

1390014000

14100

1420014300

14400

14500

1460014700

148001490015000

15100

15200

1530015400

15500

1560015700

15800

15900

16000

1610016200

1630016400

1650016600

16700

16800

16900

170001710017200

17300

17400

17500

1760017700

17800

17900

18000

1810018200

1830018400

1850018600

1870018800

18900

19000

1910019200

19300

19400

1950019600

19700

1980019900

2000020100

2020020300

2040020500

20600

2070020800

2090021000

21100

2120021300

21400

21500

21600

2170021800

21900

22000

22100

2220022300

2240022500

22600

22700

2280022900

23000

2310023200

2330023400

2350023600

2370023800

23900

2400024100

242002430024400

2450024600

24700

24800

2490025000

25100

25200

25300

25400

2550025600

2570025800

25900

2600026100

26200

26300

2640026500

2660026700

2680026900

2700027100

2720027300

27400

27500

27600

27700

27800

2790028000

28100

2820028300

2840028500

28600

2870028800

28900

29000

29100

29200

2930029400

29500

29600

29700

29800

299003000030100

3020030300

30400

30500

3060030700

30800

3090031000

31100

3120031300

31400

3150031600

317003180031900

3200032100

32200

32300

32400

325003260032700

3280032900

33000

33100

3320033300

3340033500

33600

33700

33800

33900

3400034100

3420034300

34400

3450034600

3470034800

34900

35000

35100

35200

35300

35400

35500

35600

35700

35800

3590036000

3610036200

36300

36400

36500

3660036700

3680036900

37000

37100

3720037300

37400

3750037600

37700

37800

37900

38000

3810038200

38300

38400

38500

38600

38700

38800

38900

39000

391003920039300

3940039500

3960039700

39800

39900

40000

In [50]:
function create_rating_vector(one_trace,num_users,num_items,k)
    X_preference = Vector[] 
    for u = 1:num_users
        preference = Float64[]
        for _k = 1:k  
            push!(preference,one_trace[(:preference,u,_k)])
        end

        push!(X_preference,preference)
    end

    X_sample_attribute = Vector[] 
    for i = 1:num_items
        attribute = Float64[]
        for _k = 1:k 
            push!(attribute,one_trace[(:attribute, i, _k)])
        end
        push!(X_sample_attribute, attribute)
    end
    y = Matrix{Float64}(undef,0,num_items)
    for u = 1:num_users
        rating = Float64[]
        for i = 1:num_items
            push!(rating,poisson(dot(X_preference[u],X_sample_attribute[i])))
        end
        y = vcat(y, rating')
    end
     
    y

end



create_rating_vector (generic function with 1 method)

Compute matrix of rating for each trace 

In [51]:
final_matrix = zeros(num_users, num_items)
for i=1:n_samples
    y = create_preference_vector(trs[i],num_users,num_items,k)
    final_matrix = final_matrix + y
end
final_matrix/n_samples


10×10 Matrix{Float64}:
 5.0e-5  0.03755   0.286125  0.04735   …  0.0236    0.0148    0.162075
 2.5e-5  0.10555   0.085225  0.1516       0.647425  0.038325  0.602075
 0.0     0.010475  0.03625   0.025075     0.032775  0.005125  0.02295
 0.0     0.00015   0.000875  0.005675     0.000675  5.0e-5    0.0012
 0.0     0.000175  0.000675  0.0001       5.0e-5    0.000125  0.003275
 0.0     0.11015   0.358525  0.028825  …  0.264375  0.0856    1.37005
 0.0     0.0       0.0       0.0          0.0       0.0       0.0
 0.0     7.5e-5    0.00015   7.5e-5       2.5e-5    0.0       5.0e-5
 0.0     0.000775  0.0099    0.007775     0.0039    0.00085   0.00745
 0.0     0.0       5.0e-5    2.5e-5       2.5e-5    0.0       0.0

In [52]:
xs

10×10 Matrix{Float64}:
  8.0  6.0   8.0  0.0   4.0   6.0   6.0   6.0   0.0  6.0
  0.0  8.0  10.0  6.0  10.0   8.0   8.0   6.0   0.0  6.0
  0.0  6.0  10.0  0.0   6.0   6.0   8.0   6.0   0.0  8.0
  5.0  5.0   4.0  0.0   4.0   8.0   4.0   0.0   0.0  9.0
  8.0  6.0   6.0  0.0   6.0   6.0   6.0   0.0   0.0  6.0
  6.0  5.0   3.0  0.0   0.0   9.0   5.0   0.0   3.0  7.0
 10.0  0.0   6.0  0.0   6.0   6.0   6.0   0.0   6.0  0.0
 10.0  8.0   0.0  0.0   6.0   8.0   0.0   0.0   6.0  6.0
 10.0  0.0  10.0  0.0  10.0   0.0  10.0  10.0   0.0  8.0
  8.0  0.0   8.0  0.0   8.0  10.0   6.0   0.0  10.0  0.0