In [12]:
import torch.nn as nn

user_num = 1000
item_num = 4000
latent_dim = 100

embedding_user = nn.Embedding(num_embeddings=user_num, 
                               embedding_dim=latent_dim)
embedding_item= nn.Embedding(num_embeddings=item_num, 
                               embedding_dim=latent_dim)

In [8]:
import os
import math
import time
import numpy as np
import scipy.sparse as sp

import torch
import torch.nn as nn
import torch.nn.functional as F

In [19]:
print(embedding_user.weight.shape, embedding_item.weight.shape)
embedding_user.weight

torch.Size([1000, 100]) torch.Size([4000, 100])


Parameter containing:
tensor([[ 0.3122,  1.3069,  0.6989,  ..., -0.1420,  0.2088,  0.7707],
        [ 0.0591, -0.3785,  0.2088,  ..., -0.3123,  0.1208, -0.4062],
        [ 0.1556,  0.2551,  0.4734,  ..., -0.1234,  0.2883,  1.0199],
        ...,
        [ 0.2718, -0.3941, -1.6571,  ..., -2.0296,  0.0838,  0.1728],
        [ 2.0292,  0.0626, -1.2973,  ..., -1.8868, -1.9678, -0.7274],
        [-0.1836, -0.4067,  0.1181,  ..., -1.3339,  0.8144,  0.1789]],
       requires_grad=True)

In [22]:
# weight initalization
embedding_user.weight = nn.init.normal_(embedding_user.weight, 0, 0.01) # (Tensor, mean, std)
embedding_item.weight = nn.init.normal_(embedding_item.weight, 0, 0.01)

In [108]:
users_emb = embedding_user.weight
items_emb = embedding_item.weight
all_emb = torch.cat([users_emb, items_emb], dim=0)

embs = [all_emb]

In [26]:
all_emb.shape

torch.Size([5000, 100])

In [85]:
rating_matrix = np.array(np.ones((4000,1000)))
rating_matrix = sp.csr_matrix(rating_matrix)
n_users, n_items = 4000, 1000

print("generating adjacency matrix")
#s = time.time()
adj_mat = sp.dok_matrix((n_users + n_items, n_users + n_items), dtype=np.float32) # 5000, 5000 # 1 
adj_mat = adj_mat.tolil()                                                                      # 2
R = rating_matrix.tolil()                                                                      # 3
adj_mat[:n_users, n_users:] = R  # user와 item의 interection   n_users수의 행부터, 렬          # 4   
adj_mat[n_users:, :n_users] = R.T  # user와 item의 interection   n_users수 이후의 행부터, 렬   # 5     
adj_mat = adj_mat.todok()                                                                 # 6
rowsum = np.array(adj_mat.sum(axis=1))      # sum(axis=1) 다 합친다.  (5000, 1)           # 7          
d_inv = np.power(rowsum, -0.5).flatten()                                               # 8            
d_inv[np.isinf(d_inv)] = 0.                                                            # 9
d_mat = sp.diags(d_inv) # 합쳐진 embedding값을 diags로 5000, 5000으로 변환

norm_adj = d_mat.dot(adj_mat)                                                            
norm_adj = norm_adj.dot(d_mat)                                                            
norm_adj = norm_adj.tocsr()                                                            
#end = time.time()
print(f"costing {end - s}s, saved norm_mat...")                                                            


generating adjacency matrix


In [61]:
a1 = np.array([[1, 2, 3, 4], [11, 22, 33, 44], [111,222,333, 444], [1111,2222,3333,4444]])

In [64]:
a1

array([[   1,    2,    3,    4],
       [  11,   22,   33,   44],
       [ 111,  222,  333,  444],
       [1111, 2222, 3333, 4444]])

In [67]:
a1[:2,2:]

array([[ 3,  4],
       [33, 44]])

In [74]:
a1[2:,:2]

array([[ 111,  222],
       [1111, 2222]])

In [96]:
def _convert_sp_mat_to_sp_tensor(X):
    coo = X.tocoo().astype(np.float32)
    row = torch.Tensor(coo.row).long()
    col = torch.Tensor(coo.col).long()
    index = torch.stack([row, col])
    data = torch.FloatTensor(coo.data)
    return torch.sparse.FloatTensor(index, data, torch.Size(coo.shape))

In [97]:
Graph = _convert_sp_mat_to_sp_tensor(norm_adj)
Graph = Graph.coalesce()#.to(device)


Do not split the matrix


In [104]:
Graph

tensor(indices=tensor([[   0,    0,    0,  ..., 4999, 4999, 4999],
                       [4000, 4001, 4002,  ..., 3997, 3998, 3999]]),
       values=tensor([0.0005, 0.0005, 0.0005,  ..., 0.0005, 0.0005, 0.0005]),
       size=(5000, 5000), nnz=8000000, layout=torch.sparse_coo)

In [109]:
num_layers = 2
ego_emb = all_emb

for k in range(num_layers):
    all_emb = torch.sparse.mm(Graph, all_emb)
    embs.append(all_emb)

In [115]:
embs[0].shape, embs[1].shape, embs[2].shape, len(embs)

(torch.Size([5000, 100]), torch.Size([5000, 100]), torch.Size([5000, 100]), 3)

In [118]:
embs[0]

tensor([[ 5.0557e-03, -1.5728e-02, -9.4589e-03,  ...,  1.0007e-03,
          3.2845e-04, -4.3579e-03],
        [-5.6766e-03,  5.5115e-03, -1.8081e-03,  ...,  2.2576e-02,
         -9.8114e-03, -2.6106e-04],
        [-1.4560e-03,  2.7112e-04, -8.9938e-03,  ...,  9.3871e-03,
          8.3590e-04,  9.1583e-04],
        ...,
        [-2.0350e-02, -4.8160e-03, -6.5907e-04,  ...,  4.9434e-03,
          3.0143e-03, -1.9157e-02],
        [-2.2831e-02,  1.9603e-02,  6.4267e-05,  ..., -1.5745e-03,
         -1.2905e-02, -1.0184e-02],
        [ 1.6405e-02,  1.5115e-03, -8.3334e-05,  ..., -9.0978e-03,
         -2.9270e-03, -6.0976e-04]], grad_fn=<CatBackward>)

In [121]:
embs = torch.stack(embs, dim=1)
lightgcn_out = torch.mean(embs, dim=1)
# print(lightgcn_out, lightgcn_out.shape, self.num_users, self.num_items)
users, items = torch.split(lightgcn_out, [4000, 1000]) # num_users ~ num_users + num_items


u_embedding = users
i_embedding = items

In [126]:
embs.shape

torch.Size([5000, 3, 100])

In [127]:
lightgcn_out.shape

torch.Size([5000, 100])

In [122]:
users.shape

torch.Size([4000, 100])

In [123]:
items.shape

torch.Size([1000, 100])

In [None]:
user_latent = F.embedding(user, u_embedding) # batch_users
positive_latent = F.embedding(pos, i_embedding) # batch_pos, batch_neg

positive_score = torch.mul(user_latent, positive_latent).sum(1)

In [133]:
input2 = torch.tensor([[1,2,3,4],[5,6,7,8]])
# an embedding matrix containing 10 tensors of size 3
embedding_matrix = torch.rand(10, 3)
F.embedding(input2, embedding_matrix)

tensor([[[0.1646, 0.6449, 0.1580],
         [0.6256, 0.6581, 0.5673],
         [0.7738, 0.3639, 0.0376],
         [0.2759, 0.5159, 0.4110]],

        [[0.1426, 0.4004, 0.0435],
         [0.6455, 0.1574, 0.6328],
         [0.0737, 0.5741, 0.0546],
         [0.3497, 0.9031, 0.0952]]])

In [142]:
F.embedding(input2, embedding_matrix).shape

torch.Size([2, 4, 3])

In [143]:
a22 = F.embedding(input2, embedding_matrix)#.shape
a33 = F.embedding(input2, embedding_matrix)#.shape

In [153]:
torch.mul(a22, a33), torch.mul(a22, a33).shape

(tensor([[[0.0271, 0.4159, 0.0250],
          [0.3914, 0.4331, 0.3219],
          [0.5988, 0.1324, 0.0014],
          [0.0761, 0.2661, 0.1689]],
 
         [[0.0203, 0.1603, 0.0019],
          [0.4166, 0.0248, 0.4004],
          [0.0054, 0.3295, 0.0030],
          [0.1223, 0.8156, 0.0091]]]),
 torch.Size([2, 4, 3]))

In [154]:
torch.mul(a22, a33).sum(1), torch.mul(a22, a33).sum(1).shape

(tensor([[1.0934, 1.2476, 0.5172],
         [0.5647, 1.3302, 0.4143]]),
 torch.Size([2, 3]))

In [158]:
torch.mul(a22, a33).sum(1)

tensor([[1.0934, 1.2476, 0.5172],
        [0.5647, 1.3302, 0.4143]])

In [None]:
[ [0.0271, 0.4159, 0.0250],
  [0.3914, 0.4331, 0.3219],
  [0.5988, 0.1324, 0.0014],
  [0.0761, 0.2661, 0.1689]]

In [169]:
sum(np.array([0.0271, 0.3914, 0.5988, 0.0761]))

1.0934000000000001