In [12]:
import surprise
import pandas as pd
import numpy as np

In [13]:
dataset = pd.read_csv('ratings.txt', sep=' ', names = ['uid','iid','rate'])

In [15]:
dataset.head()

Unnamed: 0,uid,iid,rate
0,1,1,2.0
1,1,2,4.0
2,1,3,3.5
3,1,4,3.0
4,1,5,4.0


In [11]:
min_rate = dataset.rate.min()
max_rate = dataset.rate.max()

In [12]:
reader = surprise.Reader(rating_scale=(0.5,4))
data = surprise.Dataset.load_from_df(dataset,reader)

In [13]:
alg = surprise.SVDpp()

In [14]:
output = alg.fit(data.build_full_trainset())

In [33]:
pred = alg.predict(uid='50', iid='52')
pred

Prediction(uid='50', iid='52', r_ui=None, est=3.0028030537791928, details={'was_impossible': False})

In [18]:
all_ids = dataset.iid.unique()

In [28]:
uid50_iids = dataset.loc[dataset.uid==50, 'iid']

In [34]:
ids2predict = np.setdiff1d(all_ids, uid50_iids)

In [35]:
ids2predict

array([  14,   15,   16, ..., 2069, 2070, 2071], dtype=int64)

In [39]:
testset = [[50,ids,4.] for ids in ids2predict]

In [40]:
pred = alg.test(testset)

In [41]:
pred

[Prediction(uid=50, iid=14, r_ui=4.0, est=3.197749373548429, details={'was_impossible': False}),
 Prediction(uid=50, iid=15, r_ui=4.0, est=3.3442616237926996, details={'was_impossible': False}),
 Prediction(uid=50, iid=16, r_ui=4.0, est=3.703790484378665, details={'was_impossible': False}),
 Prediction(uid=50, iid=18, r_ui=4.0, est=3.795865348076193, details={'was_impossible': False}),
 Prediction(uid=50, iid=19, r_ui=4.0, est=3.4775042484879326, details={'was_impossible': False}),
 Prediction(uid=50, iid=20, r_ui=4.0, est=3.370949490972873, details={'was_impossible': False}),
 Prediction(uid=50, iid=21, r_ui=4.0, est=3.3638194449987253, details={'was_impossible': False}),
 Prediction(uid=50, iid=22, r_ui=4.0, est=3.50535809065327, details={'was_impossible': False}),
 Prediction(uid=50, iid=23, r_ui=4.0, est=3.6471165251327866, details={'was_impossible': False}),
 Prediction(uid=50, iid=24, r_ui=4.0, est=3.7022263819163164, details={'was_impossible': False}),
 Prediction(uid=50, iid=25

In [45]:
preds_rating = np.array([rate.est for rate in pred])

In [46]:
preds_rating.argmax()

53

In [47]:
pred[53]

Prediction(uid=50, iid=68, r_ui=4.0, est=4, details={'was_impossible': False})

In [49]:
param_grid = {'lr_all':[0.001, 0.01], 'reg_all':[0.1,0.5]}
gs = surprise.model_selection.GridSearchCV(surprise.SVDpp,param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(data)
print(gs.best_params['rmse'])

{'lr_all': 0.01, 'reg_all': 0.1}


In [50]:
alg = surprise.SVDpp(lr_all = 0.01 , reg_all=0.1)
output = surprise.model_selection.cross_validate(alg, data,verbose=True)

Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.8029  0.7871  0.7968  0.8160  0.7938  0.7993  0.0098  
MAE (testset)     0.6203  0.6072  0.6151  0.6307  0.6135  0.6174  0.0079  
Fit time          23.36   23.40   23.68   24.48   23.31   23.65   0.44    
Test time         0.57    0.57    0.57    0.67    0.55    0.59    0.04    


In [22]:
dataset.head()

Unnamed: 0,uid,iid,rate
0,1,1,2.0
1,1,2,4.0
2,1,3,3.5
3,1,4,3.0
4,1,5,4.0


0           1
1           1
2           1
3           1
4           1
5           1
6           1
7           1
8           1
9           1
10          1
11          1
12          2
13          3
14          3
15          3
16          3
17          3
18          3
19          3
20          3
21          3
22          3
23          3
24          3
25          3
26          3
27          3
28          3
29          3
         ... 
35467    1507
35468    1508
35469    1508
35470    1508
35471    1508
35472    1508
35473    1508
35474    1508
35475    1508
35476    1508
35477    1508
35478    1508
35479    1508
35480    1508
35481    1508
35482    1508
35483    1508
35484    1508
35485    1508
35486    1508
35487    1508
35488    1508
35489    1508
35490    1508
35491    1508
35492    1508
35493    1508
35494    1508
35495    1508
35496    1508
Name: uid, Length: 35497, dtype: int64

In [2]:
import numpy as np
X = np.array([[1, 1], [2, 1], [3, 1.2], [4, 1], [5, 0.8], [6, 1]])
from sklearn.decomposition import NMF
model = NMF(n_components=2, init='random', random_state=0)
W = model.fit_transform(X)
H = model.components_

In [3]:
X

array([[1. , 1. ],
       [2. , 1. ],
       [3. , 1.2],
       [4. , 1. ],
       [5. , 0.8],
       [6. , 1. ]])

In [4]:
W

array([[0.        , 0.46880684],
       [0.55699523, 0.3894146 ],
       [1.00331638, 0.41925352],
       [1.6733999 , 0.22926926],
       [2.34349311, 0.03927954],
       [2.78981512, 0.06911798]])

In [5]:
H

array([[2.09783018, 0.30560234],
       [2.13443044, 2.13171694]])

In [8]:
W.dot(H)

array([[1.00063558, 0.99936347],
       [1.99965977, 1.00034074],
       [2.99965485, 1.20034566],
       [3.9998681 , 1.0001321 ],
       [5.00009002, 0.79990984],
       [6.00008587, 0.999914  ]])

In [10]:
0.55699523*2.09783018 + 0.3894146 *2.13443044

1.9996597796304654

In [1]:
# dense to sparse
from numpy import array
from scipy.sparse import csr_matrix
# create dense matrix
A = array([[1, 0, 0, 1, 0, 0], [0, 0, 2, 0, 0, 1], [0, 0, 0, 2, 0, 0]])
print(A)
# convert to sparse matrix (CSR method)
S = csr_matrix(A)
print(S)
# reconstruct dense matrix
B = S.todense()
print(B)

[[1 0 0 1 0 0]
 [0 0 2 0 0 1]
 [0 0 0 2 0 0]]
  (0, 0)	1
  (0, 3)	1
  (1, 2)	2
  (1, 5)	1
  (2, 3)	2
[[1 0 0 1 0 0]
 [0 0 2 0 0 1]
 [0 0 0 2 0 0]]


In [14]:
print(S)

  (0, 0)	1
  (0, 3)	1
  (1, 2)	2
  (1, 5)	1
  (2, 3)	2


In [15]:
S[(0,1)] =2 

  """Entry point for launching an IPython kernel.


In [17]:
print(S)

  (0, 0)	1
  (0, 1)	2
  (0, 3)	1
  (1, 2)	2
  (1, 5)	1
  (2, 3)	2


In [20]:
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
a = csr_matrix((data, (row, col)), shape=(3, 3))

In [21]:
a.

array([1, 2, 3, 4, 5, 6], dtype=int32)

In [30]:
np.array(rows)

array([   1,    1,    1, ..., 1508, 1508, 1508], dtype=int64)

In [40]:
rows = np.array(dataset.uid)
clmns = np.array(dataset.iid)
dt = np.array(dataset.rate)
sp = csr_matrix((dt,(rows, clmns))).toarray()

In [41]:
sp

array([[0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 2. , 4. , ..., 0. , 0. , 0. ],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [0. , 0. , 4. , ..., 0. , 0. , 0. ],
       [0. , 1.5, 3. , ..., 0. , 0. , 0. ]])

In [46]:
import numpy as np
from sklearn.decomposition import NMF
model = NMF(n_components=20, init='random', random_state=0)
W = model.fit_transform(sp)
H = model.components_

In [47]:
W.shape

(1509, 20)

In [48]:
H.shape

(20, 2072)

In [49]:
sp.shape

(1509, 2072)

In [55]:
W.dot(H).shape

(1509, 2072)

In [61]:
u0 = W[1,:]
i0 = H[:,2]

In [62]:
u0.dot(i0)

3.9976127637399967