# Politica Gaussiana

In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import math

def normal(x, mu, sigma_sq):
    a = (-1*(x-mu).pow(2)/(2*sigma_sq)).exp()
    b = 1/(2*sigma_sq*math.pi).sqrt()
    return a*b

class GaussianPolicy(nn.Module):
    def __init__(self, n_users: int, d_R: int, hidden_dimension: int):
        '''
        Params:
            - n_users: número de usuários
            - d_R: dimensão da representação dos usuários
            - hidden_dimension: número de neurôneos na camada oculta
        '''
        super().__init__()
        
        self.n_users = n_users
        self.d_R = d_R
        self.hidden_dimension = hidden_dimension

        self.one_hot = F.one_hot
        
        self.linear1 = nn.Linear(n_users, hidden_dimension)
        self.linear_mu = nn.Linear(hidden_dimension, d_R)
        self.linear_sigma = nn.Linear(hidden_dimension, d_R)

    def forward(self, xb):
        xb = self.one_hot(xb, num_classes=self.n_users).to(torch.float32)
        xb = F.relu(self.linear1(xb))
        mu = self.linear_mu(xb)
        sigma_sq = self.linear_sigma(xb)
        return mu, sigma_sq
    
    def act(self, xb):
        # Batch of users
        mu, sigma_sq = self.forward(xb)
        sigma_sq = F.softplus(sigma_sq)
        eps = torch.randn(mu.size())
        action = (mu + sigma_sq.sqrt()*eps).data
        log_prob = normal(action, mu, sigma_sq).log()
        return action, log_prob

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# from torch.distributions.multivariate_normal import MultivariateNormal
# distrib = MultivariateNormal(loc=mean, covariance_matrix=cov)
# distrib.rsample()

# CPR

In [10]:
import pandas as pd
import os
from os.path import join

from src.pr_model import P_R_Network
from src.ps_model import P_S_Network

In [128]:
class CPR:
    def __init__(self, pr_model, ps_model, n_items):
        self.pr_model = pr_model
        self.Q = pr_model.item_emb.weight.data
        self.wR = pr_model.w.data.squeeze()
        
        self.ps_model = ps_model
        self.n_items = n_items
    
    def sample_alpha_posterior(self):
        return torch.randn(self.n_items)
    
    def sample_beta_posterior(self, k):
        return torch.randn(k)
    
    def gen_r_from_tau(self, tau, k):
        """
        tau: action center - torch.tensor(1, emb_dim)
        k: number of items - int
        """
        alpha = self.sample_alpha_posterior()
        score = (tau @ self.Q.T) + (self.wR * alpha)
        scores_dict = dict(enumerate(score))
        sorted_dict = dict(sorted(scores_dict.items(), key=lambda item:item[1], reverse=True))
        return list(sorted_dict.keys())[:k]
    
    def gen_s(self, u, r, M):
        """
        u: user id - int
        r: list of item ids - [int]
        M - itens to be selected - int
        """
        beta = self.sample_beta_posterior(len(r))
        
        u_b = torch.LongTensor([[u]])
        r_b = torch.LongTensor([r])
        r_mask_b = torch.ones(1, len(r))
        beta_b = beta.unsqueeze(0)
        score = self.ps_model(u_b, r_b, r_mask_b, None, beta_b).data[0]
        
        scores_dict = dict(zip(r, score))
        sorted_dict = dict(sorted(scores_dict.items(), key=lambda item:item[1], reverse=True))
        return list(sorted_dict.keys())[:M]

# Carregando dados

In [11]:
data_path = 'MIND-small_pp'
user_df = pd.read_csv(join(data_path, 'user_d.csv'))
user_d = {code: ind for code, ind in zip(user_df['code'], user_df['indice'])}
item_df = pd.read_csv(join(data_path, 'item_d.csv'))
item_d = {code: ind for code, ind in zip(item_df['code'], item_df['indice'])}

In [12]:
pr_model = P_R_Network(len(user_d)+1, len(item_d)+1)
pr_model.load_state_dict(torch.load('pr_model_60epochs/pr_model.pth'))

<All keys matched successfully>

In [13]:
ps_model = P_S_Network(len(user_d)+1, len(item_d)+1, 299)
ps_model.load_state_dict(torch.load('ps_model_100epochs/ps_model.pth'))

<All keys matched successfully>

In [129]:
cpr = CPR(pr_model, ps_model, len(item_d)+1)

# Treinar Gaussiana


In [131]:
class CPR_Env:
    def __init__(self, cpr, recommender, k, M):
        self.cpr = cpr
        self.recommender = recommender
        self.k = k
        self.M = M
        
    def compute_reward(self, action, u):
        """
        Recebe um batch de acoes e devolve um batch de recompensas
        action: torch.tensor(batch_sz, emb_dim)
        u: torch.tensor(batch_sz)
        reward: torch.tensor(batch_sz)
        """
        batch_sz = action.size(0)
        reward = torch.zeros(batch_sz)
        for i in range(batch_sz):
            u_id = u[i].item()
            tau = action[i].unsqueeze(0)
            r = self.cpr.gen_r_from_tau(tau, self.k)
            s = self.cpr.gen_s(u_id, r, self.M)
            reward[i] = self.recommender.calculate_loss(u_id, r, s)
        return reward

In [132]:
def reinforce_batch(policy, optimizer, u, env):
    action, log_prob = policy.act(u)
    reward = env.compute_reward(action, u)
    loss = -(log_prob * reward.unsqueeze(1)).sum() #Ideia: torch.ones(3,4) * torch.tensor([1,2,3]).unsqueeze(1)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss.item()

In [133]:
env = CPR_Env(cpr, None, 5, 2) # alterar para colocar o recomendador para calcular loss

In [134]:
policy = GaussianPolicy(len(user_d)+1, 32, 16)
optimizer = torch.optim.Adam(policy.parameters(), lr=1e-3)
batch_size = 16

In [135]:
# Loop de treinamento => tem que corrgir para colocar a loss do recomendador
for ep_i in range(100):
    u = torch.randint(1, len(user_d)+1, (batch_size,))
    reinforce_batch(policy, optimizer, u, env)

AttributeError: 'NoneType' object has no attribute 'calculate_loss'

In [141]:
202003 /4150785

0.04866621614947534

In [142]:
4150785 / 202003 #20.54813542373133

20.54813542373133

In [145]:
32*0.011

0.352