In [1]:
import keras.optimizers
import numpy as np
import tensorflow as tf
import pandas as pd
from math import ceil

from scipy.sparse import csr_matrix, dok_matrix

In [4]:
columns = ['user_id', 'item_id', 'rating', 'timestamp']
ratings = pd.read_csv('../ml-100k/u.data', sep='\t', names=columns)
ratings.drop('timestamp', axis=1, inplace=True)

columns = ['item_id', 'movie title', 'release date', 'video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure', 'Animation', 'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
movies = pd.read_csv('../ml-100k/u.item', sep='|', names=columns, encoding='latin-1')
movies = movies[['item_id', 'movie title']]

n_users = len(ratings['user_id'].unique())
n_items = len(movies['item_id'].unique())

In [5]:
ratings

Unnamed: 0,user_id,item_id,rating
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1
...,...,...,...
99995,880,476,3
99996,716,204,5
99997,276,1090,1
99998,13,225,2


In [6]:
def create_mat(data, users_col, items_col, ratings_col, threshold=None):
    # threshold가 넘은 rating은 긍정으로 판단 => 1로 수정
    if threshold is not None:
        data = data[data[ratings_col] >= threshold]
        data[ratings_col] = 1

    # 해당 'user_id' 부분에 'item_id' 를 count해서 'num_items' column으로 추가한 새로운 DataFrame
    # 결국 user_id가 평가한 갯수를 가지고 있는 DataFrame
    data_user_num_items = (data
                           .groupby('user_id')
                           .agg(**{'num_items': ('item_id', 'count')})
                           .reset_index())
    # 원래 DataFrame에 'user_id'를 기반으로 하여, num_items를 조인해준다.
    data = data.merge(data_user_num_items, on='user_id', how='inner')
    # 평가한 갯수가 1개 초과, 평가하지 않은 user_id는 제거
    data = data[data['num_items'] > 1]

    for col in (items_col, users_col, ratings_col):
        data[col] = data[col].astype('category')

    #cat.codes : index를 0으로 맞춰서 바꿔주는거 같음
    #csr_matrix(data, (row, col)) =>  rating data를 해당 (user_id, item_id) 위치에 놓은 sparse matrix를 만들어준다.
    # user_id = 196, item_id = 242, rating=1 => (196, 242) = 1
    ratings = csr_matrix((data[ratings_col],
                          (data[users_col].cat.codes, data[items_col].cat.codes)))

    ratings.eliminate_zeros()
    return ratings, data

In [7]:
items_col = 'item_id'
users_col = 'user_id'
ratings_col = 'rating'
threshold = 3
X, df = create_mat(ratings, users_col, items_col, ratings_col, threshold)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[ratings_col] = 1


In [8]:
print(X)
print(df)

  (0, 0)	1
  (0, 1)	1
  (0, 2)	1
  (0, 3)	1
  (0, 4)	1
  (0, 5)	1
  (0, 6)	1
  (0, 8)	1
  (0, 9)	1
  (0, 11)	1
  (0, 12)	1
  (0, 13)	1
  (0, 14)	1
  (0, 15)	1
  (0, 16)	1
  (0, 17)	1
  (0, 18)	1
  (0, 19)	1
  (0, 21)	1
  (0, 22)	1
  (0, 23)	1
  (0, 24)	1
  (0, 25)	1
  (0, 27)	1
  (0, 29)	1
  :	:
  (942, 618)	1
  (942, 648)	1
  (942, 665)	1
  (942, 678)	1
  (942, 710)	1
  (942, 714)	1
  (942, 715)	1
  (942, 725)	1
  (942, 732)	1
  (942, 756)	1
  (942, 758)	1
  (942, 785)	1
  (942, 787)	1
  (942, 799)	1
  (942, 807)	1
  (942, 815)	1
  (942, 816)	1
  (942, 830)	1
  (942, 915)	1
  (942, 930)	1
  (942, 1031)	1
  (942, 1061)	1
  (942, 1175)	1
  (942, 1215)	1
  (942, 1308)	1
      user_id item_id rating  num_items
0         196     242      1         34
1         196     393      1         34
2         196     381      1         34
3         196     251      1         34
4         196     655      1         34
...       ...     ...    ...        ...
82515     941     919      1         20
825

In [9]:
print(X.todok()) # dok_sparse : dictionary 형태로 데이터에 접근할 수 있게 데이터를 sparse matrix로 바꿔준다 => data[row, col]

  (0, 0)	1
  (1, 0)	1
  (4, 0)	1
  (5, 0)	1
  (9, 0)	1
  (12, 0)	1
  (15, 0)	1
  (16, 0)	1
  (17, 0)	1
  (19, 0)	1
  (20, 0)	1
  (22, 0)	1
  (24, 0)	1
  (25, 0)	1
  (37, 0)	1
  (40, 0)	1
  (41, 0)	1
  (42, 0)	1
  (43, 0)	1
  (44, 0)	1
  (53, 0)	1
  (55, 0)	1
  (56, 0)	1
  (57, 0)	1
  (62, 0)	1
  :	:
  (827, 1552)	1
  (654, 1553)	1
  (654, 1554)	1
  (654, 1555)	1
  (654, 1556)	1
  (661, 1557)	1
  (674, 1558)	1
  (882, 1559)	1
  (726, 1560)	1
  (732, 1561)	1
  (893, 1561)	1
  (781, 1562)	1
  (781, 1563)	1
  (869, 1563)	1
  (879, 1563)	1
  (781, 1564)	1
  (781, 1565)	1
  (781, 1566)	1
  (834, 1567)	1
  (839, 1568)	1
  (850, 1569)	1
  (853, 1570)	1
  (862, 1571)	1
  (895, 1572)	1
  (915, 1573)	1


In [10]:
X[0].indices

array([  0,   1,   2,   3,   4,   5,   6,   8,   9,  11,  12,  13,  14,
        15,  16,  17,  18,  19,  21,  22,  23,  24,  25,  27,  29,  30,
        31,  32,  37,  38,  39,  41,  42,  43,  44,  45,  46,  47,  48,
        49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,  61,
        63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  74,  75,  76,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  94,  95,  96,  97,  98,  99, 105, 106, 107, 108, 110,
       112, 113, 114, 115, 116, 117, 118, 120, 121, 122, 123, 124, 126,
       127, 128, 129, 131, 132, 133, 134, 135, 136, 138, 140, 143, 145,
       146, 149, 150, 151, 152, 153, 155, 156, 157, 158, 159, 160, 161,
       162, 163, 164, 165, 167, 168, 169, 170, 171, 172, 173, 174, 175,
       176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188,
       189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201,
       202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 213, 21

In [11]:
X.toarray()

array([[1, 1, 1, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0]], dtype=int64)

In [12]:
ratings.loc[ratings.user_id==1][ratings.rating >= 3]

  ratings.loc[ratings.user_id==1][ratings.rating >= 3]


Unnamed: 0,user_id,item_id,rating
202,1,61,4
305,1,189,3
333,1,33,4
334,1,160,4
478,1,20,4
...,...,...,...
89876,1,13,5
92049,1,28,4
92487,1,172,5
94019,1,122,3


In [13]:
def create_train_test(ratings, test_size=0.2, seed=1234):
    # test_size의 비율은 0 <  < 1
    assert test_size < 1.0 and test_size > 0.0

    # train DataFrame을 dok_sparse matrix(dictionary형으로 접근하는) 로 바꿔준다. => 더 효율적이래요
    train = ratings.copy().todok()
    # test DataFrame도 train과 shape이 맞게 만들어준다.(아무 데이터도 들어가 있지 않음)
    test = dok_matrix(train.shape)

    # random seed 지정
    rstate = np.random.RandomState(seed)

    # user_id 의 갯수만큼 반복
    for u in range(ratings.shape[0]):
        # 해당 'user_id'가 평가한 작품의 index를 가져온다.
        split_index = ratings[u].indices
        # 해당 'user_id'가 평가한 작품의 갯수(split_index.shape[0]) 에 test_size 비율을 곱해서 올림을 해준다. => 그걸 test의 갯수(n_splits)만큼으로 사용
        n_splits = ceil(test_size * split_index.shape[0])
        # 테스트의 개수(n_splits)만큼 평가한 작품들 인덱스(split_index) 에서 랜덤으로 선택한다. 중복 선택 안된다.(replace=False)
        test_index = rstate.choice(split_index, size=n_splits, replace=False)
        # 만들어 놓은 빈 test Matrix의 해당 user(u) 해당 index(test_index) 자리에 rating 정보를 대입해준다.
        test[u, test_index] = ratings[u, test_index]
        # train에서는 그 자리의 정보는 지운다.
        train[u, test_index] = 0

    # csr matrix로 변환해서 넘겨준다.
    train, test = train.tocsr(), test.tocsr()
    return train, test

In [14]:
X_train, X_test = create_train_test(X, test_size=0.2, seed=1234)

In [15]:
# indices를 이용하면, 0이 아닌 index들을 모두 반환한다. 그럼 user_id에 해당하는 것은 어떻게 얻어와야하나?
# indptr을 이용 => indptr[user_id] : indptr[user_id+1] => 그럼 user_id에 해당하는 indices 정보들의 대한 index 범위가 나온다.
X_train.indices[X_train.indptr[1] : X_train.indptr[2]]

array([  0,  12,  13,  18,  24,  49, 110, 126, 236, 241, 250, 254, 256,
       257, 268, 271, 272, 273, 274, 275, 278, 279, 282, 283, 284, 285,
       286, 287, 288, 290, 295, 296, 297, 298, 299, 300, 301, 302, 303,
       304, 305, 306, 307, 310, 312], dtype=int32)

In [16]:
X_test.indices[X_test.indptr[1] : X_test.indptr[2]]

array([289, 314, 311, 309, 277, 280,  99, 291, 292, 294, 276, 281],
      dtype=int32)

In [17]:
X_test.indices[X_test.indptr[0] : X_test.indptr[1]]

array([  5, 129,   6, 200,  70,  65, 188,  90, 225,  11,  61, 164,   8,
       261, 205, 256,  38,  87, 177, 162,  75, 157, 124, 112,  41,  84,
       135, 158, 179, 215, 266, 193,  53,  69, 231,  58, 240, 181, 211,
        50, 107,  79, 182, 134], dtype=int32)

In [34]:
def get_train_set(X_train, batch_size=100):
    n_users, n_items = X_train.shape
    sample_pos_items = np.zeros(batch_size, dtype=np.int32)
    sample_neg_items = np.zeros(batch_size, dtype=np.int32)
    sampled_users = np.random.choice(n_users, size=batch_size, replace=False)

    for idx, user in enumerate(sampled_users):
        pos_items = X_train.indices[X_train.indptr[user] : X_train.indptr[user+1]]
        pos_item = np.random.choice(pos_items)
        neg_item = np.random.choice(n_items)
        while neg_item in pos_items:
            neg_item = np.random.choice(n_items)
        sample_pos_items[idx] = pos_item
        sample_neg_items[idx] = neg_item

    return sampled_users, sample_pos_items, sample_neg_items

In [35]:
user, pos_item, neg_item = get_train_set(X_train)

In [36]:
pd.DataFrame({"user" : user, "pos_item": pos_item, "neg_item" : neg_item})

Unnamed: 0,user,pos_item,neg_item
0,220,264,1348
1,897,317,113
2,432,468,748
3,936,274,892
4,268,809,940
...,...,...,...
95,818,343,1103
96,745,0,57
97,675,474,1444
98,625,287,1508


In [None]:
class BPR_Loss(tf.keras.losses.Loss):


In [None]:
class MyOptimizer(tf.keras.optimizers.Optimizer):
    def __init__(self, learning_rate=0.001, name="BPR_OPT", **kwargs):
        super().__init__(name, **kwargs)
        self._set_hyper("learning_rate", kwargs.get("lr", learning_rate))

    def _create_slots(self, var_list):
        super()._create_slots(var_list)

    def _resource_apply_dense(self, grad, handle, apply_state):
        pass

    def _resource_apply_sparse(self, grad, handle, indices, apply_state):
        pass

    def get_config(self):
        pass


In [134]:
class MatrixLayer(tf.keras.Model):
    def __init__(self, n_users, n_items, seed1=1234, seed2=5678, K=20, embedding_depth=50):
        super(MatrixLayer, self).__init__()
        self.u_embedding = tf.keras.layers.Embedding(embedding_depth, K, embeddings_initializer=tf.keras.initializers.RandomNormal(seed=seed1))
        self.i_embedding = tf.keras.layers.Embedding(embedding_depth, K, embeddings_initializer=tf.keras.initializers.RandomNormal(seed=seed2))
        self.reg = tf.Variable(np.random.random(1))

    def call(self, inputs):
        self.user_f = self.u_embedding(np.zeros(n_users))
        self.item_f = self.i_embedding(np.zeros(n_items))


        return self.user_f @ tf.transpose(self.item_f)

    def train_step(self, data):
        x, y = data
        u, i, j = self._get_train_set(x)

        for (a, b, c) in zip(u, i, j):
            user_u = self.user_f[a]
            item_i = self.item_f[b]
            item_j = self.item_f[c]

            with tf.GradientTape() as tape:
                loss = tf.reduce_sum(tf.math.log(user_u * (item_i - item_j)), axis=1)

            y_pred = user_u @ tf.transpose(self.item_f)

            trainable_vars = self.trainable_variables
            gradients = tape.gradient(loss, trainable_vars)
            self.optimizer.apply_gradients(zip(gradients, trainable_vars))
            self.compiled_metrics.update_state(y, y_pred)
        # grad_u = sigmoid_loss * (item_j - item_i) + self.reg * user_u
        # grad_i = sigmoid_loss * (-user_u) + self.reg * item_i
        # grad_j = sigmoid_loss * user_u + self.reg * item_j

        return {m.name : m.result() for m in self.metrics}

    def _get_train_set(X_train, batch_size=100):
        n_users, n_items = X_train.shape
        sample_pos_items = np.zeros(batch_size, dtype=np.int32)
        sample_neg_items = np.zeros(batch_size, dtype=np.int32)
        sampled_users = np.random.choice(n_users, size=batch_size, replace=False)

        for idx, user in enumerate(sampled_users):
            pos_items = X_train.indices[X_train.indptr[user] : X_train.indptr[user+1]]
            pos_item = np.random.choice(pos_items)
            neg_item = np.random.choice(n_items)
            while neg_item in pos_items:
                neg_item = np.random.choice(n_items)
            sample_pos_items[idx] = pos_item
            sample_neg_items[idx] = neg_item

        return sampled_users, sample_pos_items, sample_neg_items



In [135]:
md = MatrixLayer(X_train.shape[0], X_train.shape[1])

In [136]:
print(md(X_train))

tf.Tensor(
[[0.00652106 0.00652106 0.00652106 ... 0.00652106 0.00652106 0.00652106]
 [0.00652106 0.00652106 0.00652106 ... 0.00652106 0.00652106 0.00652106]
 [0.00652106 0.00652106 0.00652106 ... 0.00652106 0.00652106 0.00652106]
 ...
 [0.00652106 0.00652106 0.00652106 ... 0.00652106 0.00652106 0.00652106]
 [0.00652106 0.00652106 0.00652106 ... 0.00652106 0.00652106 0.00652106]
 [0.00652106 0.00652106 0.00652106 ... 0.00652106 0.00652106 0.00652106]], shape=(943, 1682), dtype=float32)


In [146]:
md.trainable_variables

[<tf.Variable 'matrix_layer_9/embedding_29/embeddings:0' shape=(50, 20) dtype=float32, numpy=
 array([[-0.00682165,  0.03804878, -0.08698365,  0.02000416,  0.06398633,
          0.01565262, -0.0046783 ,  0.06139007,  0.0101874 ,  0.0021352 ,
         -0.02615482, -0.0548641 , -0.07185643,  0.02863084,  0.06557573,
         -0.00845705,  0.01562701, -0.04647553, -0.03054772,  0.00185169],
        [-0.01845822,  0.00500589,  0.04147984, -0.01473373, -0.01301052,
         -0.01276167,  0.04798264,  0.01492919,  0.00763836, -0.0407983 ,
         -0.00606241,  0.02299438, -0.01212481,  0.0666387 ,  0.04677532,
         -0.03029574, -0.09772898, -0.00122785,  0.00619325, -0.07178642],
        [ 0.06504812, -0.09849685, -0.00164992, -0.00582678,  0.02359527,
          0.05080794, -0.0268607 ,  0.05285976, -0.02864602, -0.05365189,
          0.02990671, -0.00687724, -0.01487051, -0.01029079, -0.05977713,
          0.05105176, -0.01462513, -0.00306088,  0.03193115, -0.00671104],
        [ 0.057

In [140]:
md.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
           metrics=[tf.keras.metrics.AUC()])

In [143]:
md.fit(X_train, _,  epochs=50)

ValueError: Dimensions 943 and 100 are not compatible

In [125]:
u = tf.keras.layers.Embedding(100, 20, embeddings_initializer=tf.keras.initializers.RandomNormal(seed=1234))(tf.zeros((X.shape[0], )))
i = tf.keras.layers.Embedding(100, 20, embeddings_initializer=tf.keras.initializers.RandomNormal(seed=5678))(tf.zeros((X.shape[1], )))

In [127]:
a, b, c = get_train_set(X_train)

In [128]:
u[a]

TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got array([210,  12, 152, 548,   4, 850, 753, 523, 410, 381, 109, 419, 185,
        14, 606, 176, 449, 916, 220, 154,  42, 783, 831, 586, 724, 806,
       810, 166,   2, 661, 180, 541, 671, 621,  90, 167, 312, 401, 350,
       921, 583, 584,  28, 254, 390, 687, 219,  80, 400, 739, 638,  57,
       926, 161,  66, 560, 620, 477,  38, 446, 382, 680, 120, 789, 745,
       885, 130, 677, 918, 171, 193, 456, 845, 255, 486, 413, 311, 816,
       372,  26, 191, 591,  39, 865, 735,  20, 195,  25, 475, 882,  68,
       796,  10, 421, 670, 559, 835, 158, 521,  43])

In [124]:
print(u[u])
print(i)
print(u @ tf.transpose(i))

IndexError: index 182 is out of bounds for axis 0 with size 100

In [68]:
p = MatrixLayer(X_train.shape[0], X_train.shape[1], seed=1234, K=20)

In [69]:
p(get_train_set(X_train))

TypeError: Exception encountered when calling layer "matrix_layer_7" "                 f"(type MatrixLayer).

'KerasTensor' object is not callable

Call arguments received by layer "matrix_layer_7" "                 f"(type MatrixLayer):
  • inputs=('tf.Tensor(shape=(100,), dtype=int32)', 'tf.Tensor(shape=(100,), dtype=int32)', 'tf.Tensor(shape=(100,), dtype=int32)')

In [66]:
p.summary()

Model: "matrix_layer_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_10 (Embedding)    multiple                  0 (unused)
                                                                 
 embedding_11 (Embedding)    multiple                  0 (unused)
                                                                 
Total params: 0
Trainable params: 0
Non-trainable params: 0
_________________________________________________________________


In [47]:
print(tf.keras.layers.Dot(axes=2)((u, i)))

tf.Tensor(
[[[0.02988671 0.02988671 0.02988671 ... 0.02988671 0.02988671 0.02988671]
  [0.02988671 0.02988671 0.02988671 ... 0.02988671 0.02988671 0.02988671]
  [0.02988671 0.02988671 0.02988671 ... 0.02988671 0.02988671 0.02988671]
  ...
  [0.02988671 0.02988671 0.02988671 ... 0.02988671 0.02988671 0.02988671]
  [0.02988671 0.02988671 0.02988671 ... 0.02988671 0.02988671 0.02988671]
  [0.02988671 0.02988671 0.02988671 ... 0.02988671 0.02988671 0.02988671]]], shape=(1, 943, 1574), dtype=float32)


In [103]:
class BPR(tf.keras.Model):
    def __init__(self, n_users, n_items, K = 20):
        super(BPR, self).__init__()

    def build(self, input_shape):

    def call(self, inputs, training=None, mask=None):


In [60]:
n_users

943

<943x1574 sparse matrix of type '<class 'numpy.int64'>'
	with 65641 stored elements in Compressed Sparse Row format>

In [71]:
batch_size = 100
sample_pos_items = np.zeros(shape=(batch_size, ), dtype=np.int32)
sample_neg_items = np.zeros(shape=(batch_size, ), dtype=np.int32)
sampled_users = np.random.choice(n_users, size=batch_size, replace=False)

In [72]:
for idx, user in enumerate(sampled_users):
    pos_items = X_train.indices[X_train.indptr[user] : X_train.indptr[user+1]]
    pos_item = np.random.choice(pos_items)
    neg_item = np.random.choice(n_items)
    while neg_item in pos_items:
        neg_item = np.random.choice(n_items)
    sample_pos_items[idx] = pos_item
    sample_neg_items[idx] = neg_item

In [73]:
sample_pos_items

array([ 432,  466,  135,  322,  741,  299,   99,  723,  522,  194,  168,
         22,  406,  505,  883,  529,   82,  474,  193, 1019,   49,  235,
        122,   63,  167,  844,  126,  331,   99,  524,  867,  203,  698,
        214,  939,  400,  120,  281,  245,    0,  126,  320,  200,   49,
        221,  341, 1162,  325,  671,  176,  310,  596,   27,  671,  502,
        189,  472,  344,  268,  309,  254,  507,  566,  743, 1184,  212,
        492,  221,  303,  345, 1022,  282,  330,  123,  324,  254,   95,
        773,  242,    6,   13,  267,  182,    8,  275,  391,  259,  110,
        222,  741,  355,  478,   99,  217,  470,  474,  910,  303,  409,
        698])

In [74]:
sample_neg_items

array([ 376, 1365,   15,  860, 1184, 1390,  485,  976,  358,   37,  573,
        466, 1460, 1163, 1091,  654,  419,   85, 1005,  952, 1293, 1120,
       1049,   20,  757,  356, 1374,  470,  704,  394,  275,  998, 1108,
        399,  422, 1563,  277,  413,  526, 1330,  338,  658, 1106,  267,
       1668,  413,  101,  578,  774, 1037, 1545,  281, 1427, 1075,   77,
       1187, 1102,  284, 1260, 1548,  186, 1196, 1081, 1635, 1442,  774,
       1418, 1510,  759, 1214, 1458,  616, 1326, 1070, 1087,  524, 1652,
        375,  417, 1653,  149,  682,  356,  286,   87, 1132, 1254,  190,
       1478,   98, 1064, 1213, 1403, 1631,  851, 1333,  342, 1370,  905,
       1024])

In [137]:
tf.keras.layers.Embedding(X.shape[0], 20)(X[0].toarray())

<tf.Tensor: shape=(1, 1574, 20), dtype=float32, numpy=
array([[[ 0.0065667 ,  0.0373587 , -0.0126505 , ...,  0.00081793,
          0.00276158,  0.02203126],
        [ 0.0065667 ,  0.0373587 , -0.0126505 , ...,  0.00081793,
          0.00276158,  0.02203126],
        [ 0.0065667 ,  0.0373587 , -0.0126505 , ...,  0.00081793,
          0.00276158,  0.02203126],
        ...,
        [-0.01280067, -0.01815722, -0.01978045, ..., -0.00129392,
          0.03975532, -0.02808129],
        [-0.01280067, -0.01815722, -0.01978045, ..., -0.00129392,
          0.03975532, -0.02808129],
        [-0.01280067, -0.01815722, -0.01978045, ..., -0.00129392,
          0.03975532, -0.02808129]]], dtype=float32)>

In [110]:
np.random.choice([1, 2, 3, 4, 5, 6, 7, 8, 9])

1

In [112]:
X_train.indices[X_train.indptr[0] : X_train.indptr[1]]

array([  0,   1,   2,   3,   4,   9,  12,  13,  14,  15,  16,  17,  18,
        19,  21,  22,  23,  24,  25,  27,  29,  30,  31,  32,  37,  39,
        42,  43,  44,  45,  46,  47,  48,  49,  51,  52,  54,  55,  56,
        57,  59,  60,  63,  64,  66,  67,  68,  71,  72,  74,  76,  78,
        80,  81,  82,  83,  85,  86,  88,  89,  91,  92,  94,  95,  96,
        97,  98,  99, 105, 106, 108, 110, 113, 114, 115, 116, 117, 118,
       120, 121, 122, 123, 126, 127, 128, 131, 132, 133, 136, 138, 140,
       143, 145, 146, 149, 150, 151, 152, 153, 155, 156, 159, 160, 161,
       163, 165, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 178,
       180, 183, 184, 185, 186, 187, 189, 190, 191, 192, 194, 195, 196,
       197, 198, 199, 201, 202, 203, 204, 206, 207, 208, 209, 210, 213,
       214, 216, 217, 219, 220, 221, 222, 223, 226, 227, 228, 229, 233,
       234, 235, 237, 238, 239, 241, 245, 247, 248, 249, 250, 252, 255,
       257, 264, 267, 268, 269, 271], dtype=int32)

In [115]:
rstate = np.random.RandomState(1234)
rstate.normal(size=(X.shape[0], 10))

array([[ 0.47143516, -1.19097569,  1.43270697, ..., -0.6365235 ,
         0.01569637, -2.24268495],
       [ 1.15003572,  0.99194602,  0.95332413, ...,  0.28909194,
         1.32115819, -1.54690555],
       [-0.20264632, -0.65596934,  0.19342138, ..., -1.81702723,
        -0.18310854,  1.05896919],
       ...,
       [-1.08122588, -0.71054264,  0.22577435, ..., -1.32841842,
        -0.10509024, -0.30474092],
       [-1.32883294, -0.21492897,  1.02977576, ..., -1.44969975,
        -0.47642839,  0.28240043],
       [-0.10858796,  0.88954734,  0.5372482 , ...,  0.00656127,
         0.48912706, -0.87496395]])

In [88]:
tf.random.set_seed(1234)
user_f = tf.random.uniform(shape=[1, 3])
item_f = tf.random.uniform(shape=[1, 3])

In [91]:
print(user_f)
print(item_f)
print(user_f - item_f)
print(3*(user_f - item_f) + 2)

tf.Tensor([[0.5380393  0.36461866 0.5816301 ]], shape=(1, 3), dtype=float32)
tf.Tensor([[0.3253647  0.1387006  0.64804935]], shape=(1, 3), dtype=float32)
tf.Tensor([[ 0.21267462  0.22591805 -0.06641924]], shape=(1, 3), dtype=float32)
tf.Tensor([[2.6380239 2.6777542 1.8007423]], shape=(1, 3), dtype=float32)


In [78]:
tf.Variable(np.random.random(1))

<tf.Variable 'Variable:0' shape=(1,) dtype=float64, numpy=array([0.03575459])>

In [80]:
a = tf.constant(1)

In [81]:
b=  tf.Variable([[1, 2, 3, 4, 5],
                 [6, 7, 8, 9, 10]])

In [82]:
a + b

<tf.Tensor: shape=(2, 5), dtype=int32, numpy=
array([[ 2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11]])>

In [102]:
tf.Variable(tf.random.uniform(shape=[X_train.shape[0], 20]))

<tf.Variable 'Variable:0' shape=(943, 20) dtype=float32, numpy=
array([[0.9099175 , 0.6676756 , 0.19938636, ..., 0.00350428, 0.72572005,
        0.07293105],
       [0.5367712 , 0.58806586, 0.95469594, ..., 0.4465859 , 0.17873228,
        0.09406638],
       [0.6127943 , 0.6706804 , 0.96378195, ..., 0.73364496, 0.61318934,
        0.85675085],
       ...,
       [0.16013873, 0.80373204, 0.8493011 , ..., 0.40641093, 0.6207274 ,
        0.40679705],
       [0.4282552 , 0.23951876, 0.4762038 , ..., 0.18497705, 0.04024673,
        0.47545183],
       [0.26014614, 0.74357116, 0.95878863, ..., 0.15979683, 0.17786896,
        0.31191266]], dtype=float32)>

In [None]:
class BPR:

    def __init__(self, X_train, embedding_depth=10, K=20):
        super().__init__()
        self.K = K
        self.n_users, self.n_items = X_train.shape
        self.embedding_depth = embedding_depth

        self.build()
    def build(self):
        self.userInputLayer = tf.keras.Input(shape=(1, ), dtype="int32")
        self.itemPosInputLayer = tf.keras.Input(shape=(1, ), dtype="int32")
        self.itemNegInputLayer = tf.keras.Input(shape=(1, ), dtype="int32")

        self.u_embedding = tf.keras.layers.Embedding(self.embedding_depth, self.K, embeddings_initializer=tf.keras.initializers.RandomNormal(seed=1234))(self.userInputLayer)
        self.u_embedding = tf.keras.layers.Flatten()(self.u_embedding)

        self.i_embedding = tf.keras.layers.Embedding(self.embedding_depth, self.K, embeddings_initializer=tf.keras.initializers.RandomNormal(seed=5678))
        self.i_embedding = tf.keras.layers.Flatten(self.i_embedding)



    def get_BPR_Loss(self, matrixList):
        userEmbeddingMatrix, itemPosMatrix, itemNegMatrix = matrixList



In [104]:
u, i, j = get_train_set(X_train)

In [107]:
tf.keras.Input(shape=(1, ), dtype="int32")

<KerasTensor: shape=(None, 1) dtype=int32 (created by layer 'input_3')>

In [112]:
userInput = tf.keras.Input(shape=(1, ), dtype="int32")

p = tf.keras.layers.Embedding(50, 20, embeddings_initializer=tf.keras.initializers.RandomNormal(seed=1234))(userInput)

In [116]:
y = tf.keras.Model(inputs=userInput, outputs=p)

In [121]:
y.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 1)]               0         
                                                                 
 embedding_26 (Embedding)    (None, 1, 20)             1000      
                                                                 
Total params: 1,000
Trainable params: 1,000
Non-trainable params: 0
_________________________________________________________________


InvalidArgumentError: Graph execution error:

Detected at node 'mean_squared_error/SquaredDifference' defined at (most recent call last):
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 196, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\runpy.py", line 86, in _run_code
      exec(code, run_globals)
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\traitlets\config\application.py", line 978, in launch_instance
      app.start()
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelapp.py", line 677, in start
      self.io_loop.start()
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\tornado\platform\asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 595, in run_forever
      self._run_once()
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\asyncio\base_events.py", line 1881, in _run_once
      handle._run()
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\asyncio\events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 461, in dispatch_queue
      await self.process_one()
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 450, in process_one
      await dispatch(*args)
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 357, in dispatch_shell
      await result
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\kernelbase.py", line 652, in execute_request
      reply_content = await reply_content
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\ipkernel.py", line 359, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\ipykernel\zmqshell.py", line 532, in run_cell
      return super().run_cell(*args, **kwargs)
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 2768, in run_cell
      result = self._run_cell(
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 2814, in _run_cell
      return runner(coro)
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3012, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3191, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "C:\Users\Yungi\AppData\Local\Programs\Python\Python310\lib\site-packages\IPython\core\interactiveshell.py", line 3251, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "C:\Users\Yungi\AppData\Local\Temp\ipykernel_116876\2375609823.py", line 1, in <module>
      y.fit(u, tf.ones(100))
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\engine\training.py", line 1564, in fit
      tmp_logs = self.train_function(iterator)
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\engine\training.py", line 1160, in train_function
      return step_function(self, iterator)
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\engine\training.py", line 1146, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\engine\training.py", line 1135, in run_step
      outputs = model.train_step(data)
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\engine\training.py", line 994, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\engine\training.py", line 1052, in compute_loss
      return self.compiled_loss(
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\engine\compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\losses.py", line 152, in __call__
      losses = call_fn(y_true, y_pred)
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\losses.py", line 272, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "C:\Users\Yungi\AppData\Roaming\Python\Python310\site-packages\keras\losses.py", line 1486, in mean_squared_error
      return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
Node: 'mean_squared_error/SquaredDifference'
required broadcastable shapes
	 [[{{node mean_squared_error/SquaredDifference}}]] [Op:__inference_train_function_2285]

In [131]:
for (u, i, j) in zip(a, b, c):
    print(u, i, j)

210 116 1224
12 305 1344
152 320 112
548 287 57
4 427 626
850 11 1144
753 272 1510
523 477 1520
410 264 232
381 176 869
109 421 821
419 746 395
185 290 684
14 49 526
606 99 697
176 647 1190
449 258 1014
916 585 1164
220 49 1255
154 285 683
42 533 423
783 291 769
831 180 1026
586 313 696
724 741 1210
806 1261 828
810 671 489
166 480 611
2 342 1271
661 92 281
180 810 866
541 89 993
671 862 10
621 515 441
90 203 110
167 859 378
312 131 1352
401 18 94
350 743 1113
921 236 1210
583 39 816
584 29 505
28 305 246
254 218 1540
390 485 965
687 347 157
219 293 1339
80 1046 542
400 493 947
739 320 1498
638 700 462
57 133 101
926 203 739
161 146 1267
66 23 1216
560 1090 799
620 679 395
477 70 1327
38 312 1318
446 16 609
382 8 540
680 683 1115
120 191 771
789 152 58
745 207 1510
885 417 1266
130 123 448
677 99 990
918 743 1090
171 635 1030
193 319 595
456 442 538
845 785 964
255 209 442
486 793 629
413 299 218
311 120 204
816 403 1203
372 378 780
26 245 10
191 6 529
591 148 363
39 241 753
865 887 7


In [133]:
b

array([ 116,  305,  320,  287,  427,   11,  272,  477,  264,  176,  421,
        746,  290,   49,   99,  647,  258,  585,   49,  285,  533,  291,
        180,  313,  741, 1261,  671,  480,  342,   92,  810,   89,  862,
        515,  203,  859,  131,   18,  743,  236,   39,   29,  305,  218,
        485,  347,  293, 1046,  493,  320,  700,  133,  203,  146,   23,
       1090,  679,   70,  312,   16,    8,  683,  191,  152,  207,  417,
        123,   99,  743,  635,  319,  442,  785,  209,  793,  299,  120,
        403,  378,  245,    6,  148,  241,  887,  293,  735,  115,  125,
        428,  906, 1130,  180,  732,  180,  547,  287,  173,  273,   99,
        749])