# 多目标模型之PLE

## 模型搭建

In [1]:
import torch
from torch import nn

## Tower

In [9]:
class Tower(nn.Module):

    def __init__(self, input_size, output_size, hidden_size, drouout=0.4):
        super(Tower, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout()
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):

        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropoutd(x)
        x = self.fc2(x)
        x = self.sigmoid(x)
        return x

In [10]:
Tower(4, 5, 4)

Tower(
  (fc1): Linear(in_features=4, out_features=4, bias=True)
  (fc2): Linear(in_features=4, out_features=5, bias=True)
  (relu): ReLU()
  (dropout): Dropout(p=0.5, inplace=False)
  (sigmoid): Sigmoid()
)

## 构建各个专家子网络和共享网络

In [14]:
class Expert_shared(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Expert_shared, self).__init__()

        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)


class Expert_task1(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Expert_task1, self).__init__()

        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)


class Expert_task2(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Expert_task2, self).__init__()

        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)


class Gate_shared(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Gate_shared, self).__init__()
        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)


class Gate_task1(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Gate_task1, self).__init__()
        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)


class Gate_task2(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Gate_task2, self).__init__()
        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)

In [15]:
class GatingNetwork:

    def __init__(self, input_units, units, num_experts, selectors):
        super(GatingNetwork, self).__init__()

        self.experts_shared = nn.ModuleList([Expert_shared(input_units, units)
                                             for i in range(num_experts)])
        self.experts_task1 = nn.ModuleList([Expert_task1(input_units, units)
                                            for i in range(num_experts)])
        self.experts_task2 = nn.ModuleList([Expert_task2(input_units, units)
                                            for i in range(num_experts)])
        self.expert_activation = nn.ReLU()

        self.gate_shared = Gate_shared(input_units, num_experts*3)
        self.gate_task1 = Gate_task1(input_units, selectors*num_experts)
        self.gate_task2 = Gate_task2(input_units, selectors*num_experts)

        self.gate_activation = nn.Softmax(dim=-1)
        self.units = units
        self.num_expers = num_experts

    def forward(self, gate_output_shared_final, gate_output_task1_final, gate_output_task2_final):

        # expert shared
        expert_shared_o = [e(gate_output_shared_final)
                           for e in self.experts_shared]
        expert_shared_tensors = torch.cat(expert_shared_o, dim=0)
        expert_shared_tensors = expert_shared_tensors.view(
            -1, self.num_expers, self.units)
        expert_shared_tensors = self.expert_activation(expert_shared_tensors)
        # expert task1
        expert_task1_o = [e(gate_output_task1_final)
                          for e in self.experts_task1]
        expert_task1_tensors = torch.cat(expert_task1_o, dim=0)
        expert_task1_tensors = expert_task1_tensors.view(
            -1, self.num_expers, self.units)
        expert_task1_tensors = self.expert_activation(expert_task1_tensors)
        # expert task2
        expert_task2_o = [e(gate_output_task2_final)
                          for e in self.experts_task2]
        expert_task2_tensors = torch.cat(expert_task2_o, dim=0)
        expert_task2_tensors = expert_task2_tensors.view(
            -1, self.num_expers, self.units)
        expert_task2_tensors = self.expert_activation(expert_task2_tensors)

        # gate task1
        gate_output_task1 = self.gate_task1(gate_output_task1_final)
        gate_output_task1 = self.gate_activation(gate_output_task1)

        gate_expert_output1 = torch.cat(
            [expert_shared_tensors, expert_task1_tensors], dim=1)
        # B*experts *  B*experts*units
        gate_output_task1 = torch.einsum(
            'be,beu ->beu', gate_output_task1, gate_expert_output1)
        gate_output_task1 = gate_output_task1.sum(dim=1)
        
        # gate task2
        gate_output_task2 = self.gate_task2(gate_output_task2_final)
        gate_output_task2 = self.gate_activation(gate_output_task2)

        gate_expert_output2 = torch.cat(
            [expert_shared_tensors, expert_task2_tensors], dim=1)

        gate_output_task2 = torch.einsum(
            'be,beu ->beu', gate_output_task2, gate_expert_output2)
        gate_output_task2 = gate_output_task2.sum(dim=1)
        
        # gate shared
        gate_output_shared = self.gate_shared(gate_output_shared_final)
        gate_output_shared = self.gate_activation(gate_output_shared)

        gate_expert_output_shared = torch.cat(
            [expert_task1_tensors, expert_shared_tensors, expert_task2_tensors], dim=1)

        gate_output_shared = torch.einsum(
            'be,beu ->beu', gate_output_shared, gate_expert_output_shared)
        gate_output_shared = gate_output_shared.sum(dim=1)

        return gate_output_shared, gate_output_task1, gate_output_task2

In [18]:
model = GatingNetwork(5, 6, 3, 3)
model

<__main__.GatingNetwork at 0x7f4658967730>

## 模型

In [32]:
class PLE(nn.Module):

    def __init__(self, user_feature_dict, item_feature_dict, emb_dim=128, hidden_out_size=128, num_experts=2, selectors=2):
        
        if user_feature_dict is None or item_feature_dict is None:
            Exception("用户特征和物品特征不能为空！")
        if isinstance(user_feature_dict, dict) is False or isinstance(item_feature_dict, dict):
            Exception("输入数据类型必须为字典类型！")

        self.user_feature_dict = user_feature_dict
        self.item_feature_dict = item_feature_dict

        # 共享Embedding(Share bottom)
        user_cate_feature_nums, item_cate_feature_nums = 0, 0

        # 用户特征Embedding编码
        for user_cate, num in self.user_feature_dict.items():
            # 必须为Spase Feature
            if num[0] > 1:
                user_cate_feature_nums += 1
                setattr(self, user_cate, nn.Embedding(num[0], emb_dim))

        # 物品特征
        for item_cate, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_cate_feature_nums += 1
                setattr(self, item_cate, nn.Embedding(num[0], emb_dim))

        # 构建独立任务（tower）
        # Spase feat + Dense feat
        input_size = emb_dim * (user_cate_feature_nums + item_cate_feature_nums) \
            + (len(self.user_feature_dict) - user_cate_feature_nums) \
            + (len(self.item_feature_dict) - item_cate_feature_nums)
        # 实例Multi Layer
        self.gate1 = GatingNetwork(input_size, hidden_out_size, num_experts, selectors)
        self.gate1 = GatingNetwork(hidden_out_size, hidden_out_size, num_experts, selectors)
        
        # 实例Tower
        self.tower = nn.ModuleList([Tower(hidden_out_size, 1, )])
        

SyntaxError: invalid syntax (<ipython-input-32-2410405460c5>, line 1)

## 知识点

### 计算加权求和

In [30]:
a = torch.rand(1, 2)
a

In [24]:
b = torch.rand(1, 2, 4)
b

tensor([[[0.8654, 0.1746, 0.0825, 0.7956],
         [0.2113, 0.0699, 0.1199, 0.3511]]])

In [27]:
c = torch.einsum('be,beu ->beu', a, b)
c

tensor([[[0.2753, 0.0556, 0.0262, 0.2531],
         [0.1834, 0.0607, 0.1041, 0.3049]]])

In [31]:
c.sum(dim=1)

tensor([[0.4587, 0.1163, 0.1303, 0.5580]])

In [28]:
0.3181*0.8654

0.27528374

In [29]:
0.3181*0.1746

0.05554026