In [47]:
!pip install scikit-surprise



In [48]:
from surprise import Dataset
import torch
import torch.nn as nn
import numpy as np

data = Dataset.load_builtin('ml-1m')

In [49]:
import pandas as pd

df = pd.DataFrame(data.raw_ratings)

In [None]:
len(df[1])

1000209

In [None]:
class Env():
  def __init__(self, user_item_matrix, num_users, num_items, N):
    self.matrix = user_item_matrix
    self.num_items = num_items
    self.memory = np.ones([num_users, N]) * num_items

  def reset(self, user_id):
    self.user_id = user_id
    self.viewed_items = []
    self.related_items = np.argwhere()

In [50]:
users = df[0].unique()
items = df[1].unique()

In [51]:
class UserMovieEmbedding(nn.Module):
    def __init__(self, users_count, items_count, embedding_dim):
        super().__init__()
        self.user_embeddings = nn.Embedding(users_count, embedding_dim)
        self.item_embeddings = nn.Embedding(items_count, embedding_dim)
        
    def forward(self, user, items):
        user_embedding = self.user_embeddings(user)
        item_embeddings = self.item_embeddings(items)
        return user_embedding, item_embeddings

In [52]:
user = 1

itemsForFirstUser = torch.LongTensor(df[df[0]==str(user)][1].to_numpy(dtype = float))
embedding_dim = len(itemsForFirstUser)

user_movie_embedding = UserMovieEmbedding(len(users), len(items), embedding_dim)
user_embedding, item_embeddings = user_movie_embedding(torch.LongTensor([user]),  torch.LongTensor(np.expand_dims(itemsForFirstUser, axis=0)))

item_embeddings = item_embeddings.permute(1,0,2)

In [62]:
class InnerProductLayer(nn.Module):
    """InnerProduct Layer used in PNN that compute the element-wise
    product or inner product between feature vectors.
      Input shape
        - a list of 3D tensor with shape: ``(batch_size,1,embedding_size)``.
      Output shape
        - 3D tensor with shape: ``(batch_size, N*(N-1)/2 ,1)`` if use reduce_sum. or 3D tensor with shape:
        ``(batch_size, N*(N-1)/2, embedding_size )`` if not use reduce_sum.
      Arguments
        - **reduce_sum**: bool. Whether return inner product or element-wise product
      References
            - [Qu Y, Cai H, Ren K, et al. Product-based neural networks for user response prediction[C]//
            Data Mining (ICDM), 2016 IEEE 16th International Conference on. IEEE, 2016: 1149-1154.]
            (https://arxiv.org/pdf/1611.00144.pdf)"""

    def __init__(self, num_inputs, device='cpu'):
        super(InnerProductLayer, self).__init__()
        self.W = nn.Parameter(torch.diag(torch.rand((num_inputs,1))))
        self.W.requires_grad = True
        self.to(device)

    def forward(self, inputs, user):

        embed_list = inputs
        row = []
        col = []
        num_inputs = len(embed_list)

        print('num_inputs', num_inputs)

        embed_list = torch.matmul(self.W, embed_list)
        embed_list = embed_list.unsqueeze(1)

        for i in range(num_inputs - 1):
            for j in range(i + 1, num_inputs):
                row.append(i)
                col.append(j)

        p = torch.cat([embed_list[idx]
                       for idx in row], dim=1)  # batch num_pairs k
        q = torch.cat([embed_list[idx]
                       for idx in col], dim=1)

        u = user * embed_list

        inner_product = p * q

        u = u.reshape(-1).unsqueeze(0)
        result = torch.cat((u, inner_product), dim=1)
        print('result: ', result)
        return result

In [76]:
class Critic(nn.Module):
  def __init__(self, state_size, action_size):
    super(Critic, self).__init__()

    self.lin1 = nn.Linear(state_size, state_size)
    self.lin2 = nn.Linear(state_size+action_size, state_size)
    self.lin3 = nn.Linear(state_size, 1)

    self.relu = nn.ReLU()

  def forward(self, action, state):
    state = self.relu(self.lin1(state))
    input_concat = torch.cat((action, state), dim=1)

    x = self.lin2(input_concat)
    x = self.relu(x)
    x = self.lin3(x)
    x = self.relu(x)

    return x

In [70]:
product_layer = InnerProductLayer(num_inputs=item_embeddings.shape[0])

state_representation = product_layer(item_embeddings[:10], user_embedding)
print(state_representation.shape)

num_inputs 10
result:  tensor([[-0.1525, -0.5123, -0.3211,  ..., -1.6515,  0.1273, -0.3380]],
       grad_fn=<CatBackward>)
torch.Size([1, 2915])


In [77]:
mock_action = torch.rand((1,embedding_dim))
print(mock_action.shape)

critic = Critic(state_representation.shape[1], mock_action.shape[1])
Q_value = critic(mock_action, state_representation)

print(Q_value)

torch.Size([1, 53])
tensor([[0.0148]], grad_fn=<ReluBackward0>)
