# 多目标学习

## 数据
### 人口普查数据简介


数据集信息：
```
Barry Becker 从 1994 年人口普查数据库中提取。使用以下条件提取了一组合理干净的记录： ((AAGE>16) && (AGI>100) && (AFNLWGT>1)&& (HRSWK>0))

预测任务是确定一个人是否赚了超过 50K年。

>50K，<=50K。
属性列表：
年龄：连续。
工作班级：私人、Self-emp-not-inc、Self-emp-inc、联邦政府、地方政府、州政府、无薪、从未工作。
fnlwgt：连续。
教育：学士，一些大学，11th，HS-grad，教授学校，Assoc-acdm，Assoc-voc，9th，7th-8th，12th，硕士，1st-4th，10th，博士，5th-6th，学前班。
教育编号：连续。
婚姻状况：已婚公民配偶、离婚、未婚、分居、丧偶、已婚配偶缺席、已婚 AF 配偶。
职业：技术支持、工艺维修、其他服务、销售、执行管理、专业教授、处理清洁工、机器操作检查、行政文员、农业-捕鱼、运输-搬家、私人住宅- serv，保护性服务，武装部队。
关系：妻子、自己的孩子、丈夫、非家庭成员、其他亲属、未婚。
种族：白人、亚太岛民、美洲印第安人-爱斯基摩人、其他、黑人。
性别：女，男。
资本收益：持续。
资本损失：持续。
每周小时数：连续。
祖国：美国、柬埔寨、英国、波多黎各、加拿大、德国、美国边远地区（关岛-USVI-etc）、印度、日本、希腊、南部、中国、古巴、伊朗、洪都拉斯、菲律宾、意大利、波兰、牙买加、越南、墨西哥、葡萄牙、爱尔兰、法国、多米尼加共和国、老挝、厄瓜多尔、台湾、海地、哥伦比亚、匈牙利、危地马拉、尼加拉瓜、苏格兰、泰国、南斯拉夫、萨尔瓦多、特立纳达和多巴哥、秘鲁、香港，荷兰-荷兰。
```


### EDA

In [4]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
import torch
import torch.nn as nn
from torchsummary import summary
from torch.utils.tensorboard import SummaryWriter
from torch.optim import Adam
import numpy as np
import torchsnooper
from tqdm import tqdm
from sklearn.metrics import roc_auc_score
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import warnings
import sys
print(torch.__version__)
print(torch.version.cuda)
print(torch.backends.cudnn.version())
print(torch.cuda.get_device_name(0))
sys.path.append("..")


warnings.filterwarnings("ignore")
%matplotlib inline

1.8.1+cu102
10.2
7605
GeForce GTX 950M


In [5]:
!ls ../data/

datasets.py  Income  __init__.py


In [14]:
column_names = ['age', 'workclass', 'fnlwgt', 'education', 'education_num', 'marital_status', 'occupation',
                'relationship', 'race', 'sex', 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country',
                'income_50k']
train_df = pd.read_csv("../data/Income/adult.data", header=None, names=column_names)
train_df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income_50k
0,39,State-gov,77516,Bachelors,13,Never-married,Adm-clerical,Not-in-family,White,Male,2174,0,40,United-States,<=50K
1,50,Self-emp-not-inc,83311,Bachelors,13,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,13,United-States,<=50K
2,38,Private,215646,HS-grad,9,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,40,United-States,<=50K
3,53,Private,234721,11th,7,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,40,United-States,<=50K
4,28,Private,338409,Bachelors,13,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,40,Cuba,<=50K


In [24]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32561 entries, 0 to 32560
Data columns (total 16 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             32561 non-null  int64 
 1   workclass       32561 non-null  object
 2   fnlwgt          32561 non-null  int64 
 3   education       32561 non-null  object
 4   education_num   32561 non-null  int64 
 5   marital_status  32561 non-null  object
 6   occupation      32561 non-null  object
 7   relationship    32561 non-null  object
 8   race            32561 non-null  object
 9   sex             32561 non-null  object
 10  capital_gain    32561 non-null  int64 
 11  capital_loss    32561 non-null  int64 
 12  hours_per_week  32561 non-null  int64 
 13  native_country  32561 non-null  object
 14  income_50k      32561 non-null  object
 15  tag             32561 non-null  int64 
dtypes: int64(7), object(9)
memory usage: 4.0+ MB


In [25]:
test_df = pd.read_csv("../data/Income/adult.test", delimiter=",", names=column_names, header=None
                      )
test_df.head()

Unnamed: 0,age,workclass,fnlwgt,education,education_num,marital_status,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income_50k
0,|1x3 Cross validator,,,,,,,,,,,,,,
1,25,Private,226802.0,11th,7.0,Never-married,Machine-op-inspct,Own-child,Black,Male,0.0,0.0,40.0,United-States,<=50K.
2,38,Private,89814.0,HS-grad,9.0,Married-civ-spouse,Farming-fishing,Husband,White,Male,0.0,0.0,50.0,United-States,<=50K.
3,28,Local-gov,336951.0,Assoc-acdm,12.0,Married-civ-spouse,Protective-serv,Husband,White,Male,0.0,0.0,40.0,United-States,>50K.
4,44,Private,160323.0,Some-college,10.0,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688.0,0.0,40.0,United-States,>50K.


In [26]:
# 设置标记
train_df["tag"] = 1
test_df["tag"] = 0
# 划分数据
test_df.dropna(inplace=True)
# 规范化数据
test_df["income_50k"] = test_df["income_50k"].apply(lambda x: x[:-1])

In [27]:
# 合并数据
data = pd.concat([train_df, test_df])
data.reset_index(inplace=True, drop=True)

### 数据预处理

In [28]:
label_columns = ['income_50k', 'marital_status']

# categorical columns
categorical_columns = ['workclass', 'education', 'occupation',
                       'relationship', 'race', 'sex', 'native_country']
for col in label_columns:
    if col == 'income_50k':
        data[col] = data[col].apply(lambda x: 0 if x == ' <=50K' else 1)
    else:
        data[col] = data[col].apply(
            lambda x: 0 if x == ' Never-married' else 1)

In [29]:
# feature engine
for col in column_names:
    if col not in label_columns + ['tag']:
        if col in categorical_columns:
            le = LabelEncoder()
            data[col] = le.fit_transform(data[col])
        else:
            mm = MinMaxScaler()
            data[col] = mm.fit_transform(data[[col]]).reshape(-1)

data = data[['age', 'workclass', 'fnlwgt', 'education', 'education_num', 'occupation',
             'relationship', 'race', 'sex', 'capital_gain', 'capital_loss', 'hours_per_week', 'native_country',
             'income_50k', 'marital_status', 'tag']]

In [30]:
# 编码用户特征和物品特征
user_feat_dict, item_feat_dict = dict(), dict()

for idx, col in enumerate(data.columns):
    if col not in label_columns + ["tag"]:
        #　用户特征
        if idx < 7:
            if col in categorical_columns:
                user_feat_dict[col] = (data[col].nunique() + 1, idx)
            else:
                user_feat_dict[col] = (1, idx)
        # 物品特征
        else:
            if col in categorical_columns:
                item_feat_dict[col] = (data[col].nunique() + 1, idx)
            else:
                item_feat_dict[col] = (1, idx)

                user_feat_dict, item_feat_dict

In [31]:
# 重新划分数据集
train_data, test_data = data[data["tag"] == 1], data[data["tag"] == 0]

In [32]:
train_data.drop(columns="tag", inplace=True)
test_data.drop(columns="tag", inplace=True)

In [37]:
user_feat_dict

{'age': (1, 0),
 'workclass': (10, 1),
 'fnlwgt': (1, 2),
 'education': (17, 3),
 'education_num': (1, 4),
 'occupation': (16, 5),
 'relationship': (7, 6)}

In [39]:
np.save("../data/Income/user_feat_dict.npy", user_feat_dict)

In [None]:
np.save("../data/Income/item_feat_dict.npy", item_feat_dict)

In [36]:
train_data.head()

Unnamed: 0,age,workclass,fnlwgt,education,education_num,occupation,relationship,race,sex,capital_gain,capital_loss,hours_per_week,native_country,income_50k,marital_status
0,0.30137,7,0.044131,9,0.8,1,1,4,1,0.02174,0.0,0.397959,39,0,0
1,0.452055,6,0.048052,9,0.8,4,0,4,1,0.0,0.0,0.122449,39,0,1
2,0.287671,4,0.137581,11,0.533333,6,1,4,1,0.0,0.0,0.397959,39,0,1
3,0.493151,4,0.150486,1,0.4,6,0,2,1,0.0,0.0,0.397959,39,0,1
4,0.150685,4,0.220635,9,0.8,10,5,2,0,0.0,0.0,0.397959,5,0,1


### 自定义数据格式

In [23]:
class TrainDateSet(Dataset):

    def __init__(self, data):

        self.features = data[0]
        self.label1 = data[1]
        self.label2 = data[2]

    def __getitem__(self, index):

        return self.features[index], self.label1[index], self.label2[index]

    def __len__(self):

        return len(self.features)

In [25]:
# 加载Dataloader
train_datasets = (train_data.iloc[:, :-2].values,
                  train_data.iloc[:, -2].values, train_data.iloc[:, -1].values)
test_datasets = (test_data.iloc[:, :-2].values,
                 test_data.iloc[:, -2].values, test_data.iloc[:, -1].values)
train_datasets = TrainDateSet(train_datasets)
test_datasets = TrainDateSet(test_datasets)

## 算法

### SharedBottom

In [26]:
from layer import DNN

In [44]:
class SharedBottom(nn.Module):

    def __init__(self, user_feature_dict, item_feature_dict, emb_dim=128, activation="relu",
                 bottom_hidden_size=[256, 128], tower_hidden_size=[128, 64], num_tasks=2, tasks_name=["ctr", "cvr"],
                 use_bn=False, dropout_rate=0, seed=1024):
        """

        :param user_feature_dict:
        :param item_feature_dict:
        :param emb_dim:
        :param activation:
        :param bottom_hidden_size:
        :param tower_hidden_size:
        :param num_tasks:
        :param tasks_num:
        :param use_bn:
        :param dropout_rate:
        :param seed:
        """

        super(SharedBottom, self).__init__()
        if user_feature_dict is None or item_feature_dict is None:
            Exception("用户特征和物品特征不能为空！")
        if isinstance(user_feature_dict, dict) is False or isinstance(item_feature_dict, dict):
            Exception("输入数据类型必须为字典类型！")

        self.user_feature_dict = user_feature_dict
        self.item_feature_dict = item_feature_dict
        self.num_tasks = num_tasks
        self.tasks_name = tasks_name

        # 构建Embedding输入
        user_cate_feature_nums, item_cate_feature_nums = 0, 0
        for user_cate, num in self.user_feature_dict.items():
            # 必须为Spase Feature
            if num[0] > 1:
                user_cate_feature_nums += 1
                setattr(self, user_cate, nn.Embedding(num[0], emb_dim))
        # 物品特征
        for item_cate, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_cate_feature_nums += 1
                setattr(self, item_cate, nn.Embedding(num[0], emb_dim))

        # Spase feat + Dense feat
        input_size = emb_dim * (user_cate_feature_nums + item_cate_feature_nums) \
            + (len(self.user_feature_dict) - user_cate_feature_nums) \
            + (len(self.item_feature_dict) - item_cate_feature_nums)
        # 共享层
        self.shared_bottom_layer = DNN(input_dim=input_size, hidden_units=bottom_hidden_size,
                                       activation=activation, use_bn=use_bn, dropout_rate=dropout_rate)
        # 子任务层
        for i in range(num_tasks):
            setattr(self, "tower_{}_dnn".format(tasks_name[i]), nn.Sequential(DNN(input_dim=bottom_hidden_size[-1],
                                                                                  hidden_units=tower_hidden_size,
                                                                                  activation=activation,
                                                                                  use_bn=use_bn,
                                                                                  dropout_rate=dropout_rate),
                                                                              nn.Linear(tower_hidden_size[-1], 1)))

    def forward(self, x):
        user_embed_list, item_embed_list = list(), list()
        for user_feature, num in self.user_feature_dict.items():
            if num[0] > 1:
                user_embed_list.append(
                    getattr(self, user_feature)(x[:, num[1]].long()))
            else:
                user_embed_list.append(x[:, num[1]].unsqueeze(1))
        for item_feature, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_embed_list.append(
                    getattr(self, item_feature)(x[:, num[1]].long()))
            else:
                item_embed_list.append(x[:, num[1]].unsqueeze(1))
        # 拼接向量
        user_embed = torch.cat(user_embed_list, dim=1)
        item_embed = torch.cat(item_embed_list, dim=1)
        dnn_input = torch.cat([user_embed, item_embed], axis=1).float()
        # bottom_layer
        shared_bottom_output = self.shared_bottom_layer(dnn_input)
        # tower_layer
        task_outputs = []
        for i in range(self.num_tasks):
            net = getattr(self, "tower_{}_dnn".format(self.tasks_name[i]))
            dnn_output = net(shared_bottom_output)
            task_outputs.append(dnn_output)

        return task_outputs

In [45]:
a = torch.from_numpy(np.array([[1, 2, 4, 2, 0.5, 0.1],
                               [4, 5, 3, 8, 0.6, 0.43],
                               [6, 3, 2, 9, 0.12, 0.32],
                               [9, 1, 1, 1, 0.12, 0.45],
                               [8, 3, 1, 4, 0.21, 0.67]]))

user_cate_dict = {'user_id': (11, 0), 'user_list': (12, 3), 'user_num': (1, 4)}
item_cate_dict = {'item_id': (8, 1), 'item_cate': (6, 2), 'item_num': (1, 5)}
sharedbottom = SharedBottom(user_cate_dict, item_cate_dict)
sharedbottom

SharedBottom(
  (user_id): Embedding(11, 128)
  (user_list): Embedding(12, 128)
  (item_id): Embedding(8, 128)
  (item_cate): Embedding(6, 128)
  (shared_bottom_layer): DNN(
    (dropout): Dropout(p=0, inplace=False)
    (linears): ModuleList(
      (0): Linear(in_features=514, out_features=256, bias=True)
      (1): Linear(in_features=256, out_features=128, bias=True)
    )
    (activation_layers): ModuleList(
      (0): ReLU(inplace=True)
      (1): ReLU(inplace=True)
    )
  )
  (tower_ctr_dnn): Sequential(
    (0): DNN(
      (dropout): Dropout(p=0, inplace=False)
      (linears): ModuleList(
        (0): Linear(in_features=128, out_features=128, bias=True)
        (1): Linear(in_features=128, out_features=64, bias=True)
      )
      (activation_layers): ModuleList(
        (0): ReLU(inplace=True)
        (1): ReLU(inplace=True)
      )
    )
    (1): Linear(in_features=64, out_features=1, bias=True)
  )
  (tower_cvr_dnn): Sequential(
    (0): DNN(
      (dropout): Dropout(p=0, in

In [46]:
sharedbottom(a)

[tensor([[0.0296],
         [0.0277],
         [0.0427],
         [0.0281],
         [0.0316]], grad_fn=<AddmmBackward>),
 tensor([[-0.0012],
         [-0.0032],
         [-0.0080],
         [ 0.0052],
         [-0.0049]], grad_fn=<AddmmBackward>)]

In [47]:
w = SummaryWriter(log_dir="./log", comment="sharedbottom")
w.add_graph(sharedbottom, a)
w.close()

### ESMM模型
![](./imgs/ESMM.png)

In [16]:
class ESMM(nn.Module):

    def __init__(self, user_feature_dict, item_feature_dict, emb_dim=128, hidden_dim=[128, 64], dropouts=[0.5, 0.5],
                 output_size=1, task_name=["ctr", "cvr"]):
        """

        :param user_feature_dict: 用户特征
        :param item_feature_dict:　物品特征
        :param emb_dim: 128
        :param hidden_dim: [128, 64]
        :param dropout: 0.5
        :param output_size: 1
        :param num_tasks:2
        """
        super(ESMM, self).__init__()

        if user_feature_dict is None or item_feature_dict is None:
            Exception("用户特征和物品特征不能为空！")
        if isinstance(user_feature_dict, dict) is False or isinstance(item_feature_dict, dict):
            Exception("输入数据类型必须为字典类型！")

        self.user_feature_dict = user_feature_dict
        self.item_feature_dict = item_feature_dict
        self.num_tasks = len(task_name)
        self.task_name = task_name

        # 共享Embedding(Share bottom)
        user_cate_feature_nums, item_cate_feature_nums = 0, 0
        # 用户特征Embedding编码
        for user_cate, num in self.user_feature_dict.items():
            # 必须为Spase Feature
            if num[0] > 1:
                user_cate_feature_nums += 1
                setattr(self, user_cate, nn.Embedding(num[0], emb_dim))
        # 物品特征
        for item_cate, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_cate_feature_nums += 1
                setattr(self, item_cate, nn.Embedding(num[0], emb_dim))

        # 构建独立任务（tower）
        # Spase feat + Dense feat
        hidden_size = emb_dim * (user_cate_feature_nums + item_cate_feature_nums) \
            + (len(self.user_feature_dict) - user_cate_feature_nums) \
            + (len(self.item_feature_dict) - item_cate_feature_nums)

        for i in range(self.num_tasks):
            setattr(self, 'task_{}_dnn'.format(
                self.task_name[i]), nn.ModuleList())
            hid_dim = [hidden_size] + hidden_dim
            for j in range(len(hid_dim) - 1):
                getattr(self, 'task_{}_dnn'.format(self.task_name[i])).add_module('hidden_{}'.format(j),
                                                                                  nn.Linear(hid_dim[j], hid_dim[j + 1]))
                getattr(self, 'task_{}_dnn'.format(self.task_name[i])).add_module('batchnorm_{}'.format(j),
                                                                                  nn.BatchNorm1d(hid_dim[j + 1]))
                getattr(self, "task_{}_dnn".format(self.task_name[i])).add_module(
                    "{}_activation".format(task_name[i]), nn.ReLU())
                getattr(self, 'task_{}_dnn'.format(self.task_name[i])).add_module('dropout_{}'.format(j),
                                                                                  nn.Dropout(dropouts[j]))
            getattr(self, 'task_{}_dnn'.format(self.task_name[i])).add_module('task_{}_last_layer'.format(j),
                                                                              nn.Linear(hid_dim[-1], output_size))

    def forward(self, x):
        #         assert x.size()[1] != len(self.item_feature_dict) + len(self.user_feature_dict)
        # 编码Embedding向量
        user_embed_list, item_embed_list = list(), list()
        for user_feature, num in self.user_feature_dict.items():
            if num[0] > 1:
                user_embed_list.append(
                    getattr(self, user_feature)(x[:, num[1]].long()))
            else:
                user_embed_list.append(x[:, num[1]].unsqueeze(1))
        for item_feature, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_embed_list.append(
                    getattr(self, item_feature)(x[:, num[1]].long()))
            else:
                item_embed_list.append(x[:, num[1]].unsqueeze(1))
        # 拼接向量
        user_embed = torch.cat(user_embed_list, dim=1)
        item_embed = torch.cat(item_embed_list, dim=1)
        # hidden_input
        hidden = torch.cat([user_embed, item_embed], axis=1).float()

        # 子网络
        task_outputs = list()
        for i in range(self.num_tasks):
            x = hidden
            # 　Module list
            for mod in getattr(self, 'task_{}_dnn'.format(self.task_name[i])):
                x = mod(x)
            task_outputs.append(x)

        if self.num_tasks == 2:

            pCTCVR = torch.mul(task_outputs[0], task_outputs[1])
            pCVR = task_outputs[0]

            return pCTCVR, pCVR
        elif len(self.num_tasks) == 1:
            return task_outputs
        else:
            Exception("目标数目为：1或２!")

In [17]:
a = torch.from_numpy(np.array([[1, 2, 4, 2, 0.5, 0.1],
                               [4, 5, 3, 8, 0.6, 0.43],
                               [6, 3, 2, 9, 0.12, 0.32],
                               [9, 1, 1, 1, 0.12, 0.45],
                               [8, 3, 1, 4, 0.21, 0.67]]))

user_cate_dict = {'user_id': (11, 0), 'user_list': (12, 3), 'user_num': (1, 4)}
item_cate_dict = {'item_id': (8, 1), 'item_cate': (6, 2), 'item_num': (1, 5)}
esmm = ESMM(user_cate_dict, item_cate_dict)
esmm

ESMM(
  (user_id): Embedding(11, 128)
  (user_list): Embedding(12, 128)
  (item_id): Embedding(8, 128)
  (item_cate): Embedding(6, 128)
  (task_ctr_dnn): ModuleList(
    (hidden_0): Linear(in_features=514, out_features=128, bias=True)
    (batchnorm_0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (ctr_activation): ReLU()
    (dropout_0): Dropout(p=0.5, inplace=False)
    (hidden_1): Linear(in_features=128, out_features=64, bias=True)
    (batchnorm_1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (dropout_1): Dropout(p=0.5, inplace=False)
    (task_1_last_layer): Linear(in_features=64, out_features=1, bias=True)
  )
  (task_cvr_dnn): ModuleList(
    (hidden_0): Linear(in_features=514, out_features=128, bias=True)
    (batchnorm_0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (cvr_activation): ReLU()
    (dropout_0): Dropout(p=0.5, inplace=False)
    (hidden_1): Linea

In [18]:
tasks = esmm(a)
print(tasks)

(tensor([[-0.1273],
        [-0.0721],
        [ 1.5535],
        [-1.1940],
        [ 0.3143]], grad_fn=<MulBackward0>), tensor([[ 0.1851],
        [-0.0953],
        [ 1.0790],
        [-0.7394],
        [-0.7482]], grad_fn=<AddmmBackward>))


In [19]:
# w = SummaryWriter(log_dir="./log", comment="model info")
# w.add_graph(esmm, a)
# w.close()

### MMoE
![](./imgs/mmoe.png)

In [20]:
# @torchsnooper.snoop()
class MMoE(nn.Module):

    def __init__(self, user_feature_dict, item_feature_dict, emb_dim=128, n_expert=3, mmoe_hidden_dim=128,
                 hidden_dim=[128, 64], output_size=1, num_tasks=2, expert_activation=None):
        """

        :param user_feature_dict:
        :param item_feature_dict:
        :param emb_dim:
        :param n_expert:
        :param mmoe_hidden_dim:
        :param hidden_dim:
        :param output_size:
        :param num_tasks:
        """
        super(MMoE, self).__init__()

        if user_feature_dict is None or item_feature_dict is None:
            Exception("用户特征和物品特征不能为空！")
        if isinstance(user_feature_dict, dict) is False or isinstance(item_feature_dict, dict):
            Exception("输入数据类型必须为字典类型！")

        self.user_feature_dict = user_feature_dict
        self.item_feature_dict = item_feature_dict
        self.num_tasks = num_tasks

        # 共享Embedding(Share bottom)
        user_cate_feature_nums, item_cate_feature_nums = 0, 0
        # 用户特征Embedding编码
        for user_cate, num in self.user_feature_dict.items():
            # 必须为Spase Feature
            if num[0] > 1:
                user_cate_feature_nums += 1
                setattr(self, user_cate, nn.Embedding(num[0], emb_dim))
        # 物品特征
        for item_cate, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_cate_feature_nums += 1
                setattr(self, item_cate, nn.Embedding(num[0], emb_dim))

        # 构建独立任务（tower）
        # Spase feat + Dense feat
        hidden_size = emb_dim * (user_cate_feature_nums + item_cate_feature_nums) \
            + (len(self.user_feature_dict) - user_cate_feature_nums) \
            + (len(self.item_feature_dict) - item_cate_feature_nums)

        # 专家网络
        self.erperts = torch.nn.Parameter(torch.rand(
            hidden_size, mmoe_hidden_dim, n_expert), requires_grad=True)
        self.erperts.data.normal_(0, 1)
        self.erperts_bias = torch.nn.Parameter(torch.rand(
            mmoe_hidden_dim, n_expert), requires_grad=True)

        # 门控网络
        self.gates = torch.nn.ParameterList([torch.nn.Parameter(torch.rand(hidden_size, n_expert), requires_grad=True)
                                             for _ in range(num_tasks)])
        for gate in self.gates:
            gate.data.normal_(0, 1,)

        self.gate_bias = torch.nn.ParameterList([torch.nn.Parameter(
            torch.rand(n_expert), requires_grad=True) for _ in range(num_tasks)])

        for i in range(self.num_tasks):
            setattr(self, 'task_{}_dnn'.format(i + 1), nn.ModuleList())
            # input: mmoe_hidden_dim + hidden_dim
            hid_dim = [mmoe_hidden_dim] + hidden_dim
            for j in range(len(hid_dim) - 1):
                getattr(self, 'task_{}_dnn'.format(i + 1)).add_module('hidden_{}'.format(j),
                                                                      nn.Linear(hid_dim[j], hid_dim[j + 1]))
                getattr(self, 'task_{}_dnn'.format(i + 1)).add_module('batchnorm_{}'.format(j),
                                                                      nn.BatchNorm1d(hid_dim[j + 1]))
            getattr(self, 'task_{}_dnn'.format(i + 1)).add_module('task_last_layer',
                                                                  nn.Linear(hid_dim[-1], output_size))

        self.Softmax = nn.Softmax(dim=-1)

    def forward(self, x):

        assert x.size()[1] == len(self.item_feature_dict) + \
            len(self.user_feature_dict)
        # 编码Embedding向量
        user_embed_list, item_embed_list = list(), list()
        for user_feature, num in self.user_feature_dict.items():
            if num[0] > 1:
                user_embed_list.append(
                    getattr(self, user_feature)(x[:, num[1]].long()))
            else:
                user_embed_list.append(x[:, num[1]].unsqueeze(1))
        for item_feature, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_embed_list.append(
                    getattr(self, item_feature)(x[:, num[1]].long()))
            else:
                item_embed_list.append(x[:, num[1]].unsqueeze(1))
        # 拼接向量
        user_embed = torch.cat(user_embed_list, dim=1)
        item_embed = torch.cat(item_embed_list, dim=1)
        # hidden_input
        # B*hidden
        hidden = torch.cat([user_embed, item_embed], dim=1).float()
        # MMoE
        expert_outs = torch.matmul(hidden, self.erperts.permute(
            1, 0, 2)).permute(1, 0, 2)  # B*mmoe_hidden_dim*experts
        expert_outs += self.erperts_bias
        # 门控单元
        gates_out = list()
        for idx, gate in enumerate(self.gates):
            gate_out = torch.mm(hidden, gate)  # B * num_experts
            if self.gate_bias:
                gate_out += self.gate_bias[idx]
            # 归一化
            gate_out = self.Softmax(gate_out)
            gates_out.append(gate_out)
        # 各个模块
        outs = list()
        for gate_out in gates_out:
            expand_gate_out = torch.unsqueeze(
                gate_out, dim=1)  # B * 1 * experts
            weighted_expert_output = expert_outs * \
                expand_gate_out.expand_as(
                    expert_outs)  # B * mmoe_hidden * expert
            # B * mmoe_hidden
            outs.append(torch.sum(weighted_expert_output, 2))

        # task_tower
        task_outputs = list()
        for i in range(self.num_tasks):
            x = outs[i]
            for mod in getattr(self, 'task_{}_dnn'.format(i + 1)):
                x = mod(x)
            task_outputs.append(x)

        return task_outputs

In [21]:
a = torch.from_numpy(np.array([[1, 2, 4, 2, 0.5, 0.1],
                               [4, 5, 3, 8, 0.6, 0.43],
                               [6, 3, 2, 9, 0.12, 0.32],
                               [9, 1, 1, 1, 0.12, 0.45],
                               [8, 3, 1, 4, 0.21, 0.67]]))

user_cate_dict = {'user_id': (11, 0), 'user_list': (12, 3), 'user_num': (1, 4)}
item_cate_dict = {'item_id': (8, 1), 'item_cate': (6, 2), 'item_num': (1, 5)}
mmoe = MMoE(user_cate_dict, item_cate_dict)
mmoe

MMoE(
  (user_id): Embedding(11, 128)
  (user_list): Embedding(12, 128)
  (item_id): Embedding(8, 128)
  (item_cate): Embedding(6, 128)
  (gates): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 514x3]
      (1): Parameter containing: [torch.FloatTensor of size 514x3]
  )
  (gate_bias): ParameterList(
      (0): Parameter containing: [torch.FloatTensor of size 3]
      (1): Parameter containing: [torch.FloatTensor of size 3]
  )
  (task_1_dnn): ModuleList(
    (hidden_0): Linear(in_features=128, out_features=128, bias=True)
    (batchnorm_0): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (hidden_1): Linear(in_features=128, out_features=64, bias=True)
    (batchnorm_1): BatchNorm1d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (task_last_layer): Linear(in_features=64, out_features=1, bias=True)
  )
  (task_2_dnn): ModuleList(
    (hidden_0): Linear(in_features=128, out_features=128, bias=True)
  

In [22]:
outs = mmoe(a)
outs

[tensor([[ 0.2559],
         [ 0.3708],
         [-1.9316],
         [ 1.0627],
         [-0.3213]], grad_fn=<AddmmBackward>),
 tensor([[ 0.2990],
         [-0.6616],
         [ 0.7363],
         [-0.4751],
         [ 0.6892]], grad_fn=<AddmmBackward>)]

In [24]:
# w = SummaryWriter(log_dir="./log", comment="model info")
# w.add_graph(mmoe, a)
# w.close()

### PLE

#### 问题
1. 如何解决负迁移和跷跷板现象（模型角度）？  
答：与MMOE相比，CGC消除了任务的塔式网络与其他任务的特定任务专家之间的连接，使不同类型的专家能够不受干扰地集中精力学习不同的知识
2. 如何设计损失函数（损失函数角度）？

#### 模型架构
说明：多层多任务网络结构中，底层特征抽取结构中，给上层A任务提供的独有专家网络的信号来自于A专家网络和共享网络，但给上层结构提供共享专家网络的信号来源于A+B+共享网络。然后，最后一层连接多塔的结构跟CGC是一样的。PLE采用渐进式分离路由，吸收所有底层专家的信息，提取高层共享知识，逐步分离任务相关参数。  
<img src = "./imgs/ple.png" width = 500 align="middle" />

#### 模型代码
<font color="red">注意</font> ：门控网络的输入输出选择

In [25]:
class Tower(nn.Module):

    def __init__(self, input_size, output_size, hidden_size, drouout=0.5):
        super(Tower, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(drouout)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)

        return x

In [37]:
class Expert_shared(nn.Module):
    def __init__(self, input_shape, output_shape):\
        
        super(Expert_shared, self).__init__()
        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)


class Expert_task1(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Expert_task1, self).__init__()

        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)


class Expert_task2(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Expert_task2, self).__init__()

        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)


class Gate_shared(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Gate_shared, self).__init__()
        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)


class Gate_task1(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Gate_task1, self).__init__()
        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):
        return self.fc1(x)


class Gate_task2(nn.Module):
    def __init__(self, input_shape, output_shape):
        super(Gate_task2, self).__init__()
        self.fc1 = nn.Linear(input_shape, output_shape)

    def forward(self, x):

        return self.fc1(x)

In [38]:
class GatingNetwork(nn.Module):

    def __init__(self, input_units, units, num_experts, selectors):
        super(GatingNetwork, self).__init__()

        self.experts_shared = nn.ModuleList([Expert_shared(input_units, units)
                                             for i in range(num_experts)])
        self.experts_task1 = nn.ModuleList([Expert_task1(input_units, units)
                                            for i in range(num_experts)])
        self.experts_task2 = nn.ModuleList([Expert_task2(input_units, units)
                                            for i in range(num_experts)])
        self.expert_activation = nn.ReLU()

        self.gate_shared = Gate_shared(input_units, num_experts * 3)
        # B*n*hidden B*Expert*Hiddeng
        self.gate_task1 = Gate_task1(input_units, selectors * num_experts)
        self.gate_task2 = Gate_task2(input_units, selectors * num_experts)

        self.gate_activation = nn.Softmax(dim=-1)
        self.units = units
        self.num_expers = num_experts

    def forward(self, gate_output_shared_final, gate_output_task1_final, gate_output_task2_final):
        # expert shared
        expert_shared_o = [e(gate_output_shared_final)
                           for e in self.experts_shared]
        expert_shared_tensors = torch.cat(expert_shared_o, dim=0)
        expert_shared_tensors = expert_shared_tensors.view(
            -1, self.num_expers, self.units)
        expert_shared_tensors = self.expert_activation(expert_shared_tensors)
        # expert task1
        expert_task1_o = [e(gate_output_task1_final)
                          for e in self.experts_task1]
        expert_task1_tensors = torch.cat(expert_task1_o, dim=0)
        expert_task1_tensors = expert_task1_tensors.view(
            -1, self.num_expers, self.units)
        expert_task1_tensors = self.expert_activation(expert_task1_tensors)
        # expert task2
        expert_task2_o = [e(gate_output_task2_final)
                          for e in self.experts_task2]
        expert_task2_tensors = torch.cat(expert_task2_o, dim=0)
        expert_task2_tensors = expert_task2_tensors.view(
            -1, self.num_expers, self.units)
        expert_task2_tensors = self.expert_activation(expert_task2_tensors)

        # gate task1
        # 每一个门控接收来自上一门控的输出作为输入：task1 --> task2
        gate_output_task1 = self.gate_task1(gate_output_task1_final)
        # 获取相对应的权值
        gate_output_task1 = self.gate_activation(gate_output_task1)
        # 选择向量
        gate_expert_output1 = torch.cat(
            [expert_shared_tensors, expert_task1_tensors], dim=1)
        # 计算加权求和
        gate_output_task1 = torch.einsum(
            'be,beu ->beu', gate_output_task1, gate_expert_output1)
        gate_output_task1 = gate_output_task1.sum(dim=1)
        # gate task2
        gate_output_task2 = self.gate_task2(gate_output_task2_final)
        gate_output_task2 = self.gate_activation(gate_output_task2)

        gate_expert_output2 = torch.cat(
            [expert_shared_tensors, expert_task2_tensors], dim=1)

        gate_output_task2 = torch.einsum(
            'be,beu ->beu', gate_output_task2, gate_expert_output2)
        gate_output_task2 = gate_output_task2.sum(dim=1)

        # gate shared
        gate_output_shared = self.gate_shared(gate_output_shared_final)
        gate_output_shared = self.gate_activation(gate_output_shared)

        gate_expert_output_shared = torch.cat(
            [expert_task1_tensors, expert_shared_tensors, expert_task2_tensors], dim=1)

        gate_output_shared = torch.einsum(
            'be,beu ->beu', gate_output_shared, gate_expert_output_shared)
        gate_output_shared = gate_output_shared.sum(dim=1)

        return gate_output_shared, gate_output_task1, gate_output_task2

In [39]:
class PLE(nn.Module):

    def __init__(self, user_feature_dict, item_feature_dict, emb_dim=128, hidden_out_size=64, num_experts=8,
                 selectors=2):

        super(PLE, self).__init__()
        if user_feature_dict is None or item_feature_dict is None:
            Exception("用户特征和物品特征不能为空！")
        if isinstance(user_feature_dict, dict) is False or isinstance(item_feature_dict, dict):
            Exception("输入数据类型必须为字典类型！")

        self.user_feature_dict = user_feature_dict
        self.item_feature_dict = item_feature_dict

        # 共享Embedding(Share bottom)
        user_cate_feature_nums, item_cate_feature_nums = 0, 0

        # 用户特征Embedding编码
        for user_cate, num in self.user_feature_dict.items():
            # 必须为Spase Feature
            if num[0] > 1:
                user_cate_feature_nums += 1
                setattr(self, user_cate, nn.Embedding(num[0], emb_dim))

        # 物品特征
        for item_cate, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_cate_feature_nums += 1
                setattr(self, item_cate, nn.Embedding(num[0], emb_dim))

        # 构建独立任务（tower）
        # Spase feat + Dense feat
        input_size = emb_dim * (user_cate_feature_nums + item_cate_feature_nums) \
            + (len(self.user_feature_dict) - user_cate_feature_nums) \
            + (len(self.item_feature_dict) - item_cate_feature_nums)
        # 实例Multi Layer
        self.gate1 = GatingNetwork(
            input_size, hidden_out_size, num_experts, selectors)

        self.gate2 = GatingNetwork(
            hidden_out_size, hidden_out_size, num_experts, selectors)

        # 实例Tower
        self.towers = nn.ModuleList(
            [Tower(hidden_out_size, 1, 16) for _ in range(num_experts)])

    def forward(self, x):
        user_embed_list, item_embed_list = list(), list()
        for user_feature, num in self.user_feature_dict.items():
            if num[0] > 1:
                user_embed_list.append(
                    getattr(self, user_feature)(x[:, num[1]].long()))
            else:
                user_embed_list.append(x[:, num[1]].unsqueeze(1))
        for item_feature, num in self.item_feature_dict.items():
            if num[0] > 1:
                item_embed_list.append(
                    getattr(self, item_feature)(x[:, num[1]].long()))
            else:
                item_embed_list.append(x[:, num[1]].unsqueeze(1))
        # 拼接向量
        user_embed = torch.cat(user_embed_list, dim=1)
        item_embed = torch.cat(item_embed_list, dim=1)
        # hidden_input
        hidden = torch.cat([user_embed, item_embed], axis=1).float()

        gate_output_shared, gate_output_task1, gate_output_task2 = self.gate1(
            hidden, hidden, hidden)
        _, task1_o, task2_o = self.gate2(
            gate_output_shared, gate_output_task1, gate_output_task2)

        final_output = [tower(task) for tower, task in zip(
            self.towers, [task1_o, task2_o])]

        return final_output

In [40]:
a = torch.from_numpy(np.array([[1, 2, 4, 2, 0.5, 0.1],
                               [4, 5, 3, 8, 0.6, 0.43],
                               [6, 3, 2, 9, 0.12, 0.32],
                               [9, 1, 1, 1, 0.12, 0.45],
                               [8, 3, 1, 4, 0.21, 0.67]]))

user_cate_dict = {'user_id': (11, 0), 'user_list': (12, 3), 'user_num': (1, 4)}
item_cate_dict = {'item_id': (8, 1), 'item_cate': (6, 2), 'item_num': (1, 5)}
ple = PLE(user_cate_dict, item_cate_dict)
ple

PLE(
  (user_id): Embedding(11, 128)
  (user_list): Embedding(12, 128)
  (item_id): Embedding(8, 128)
  (item_cate): Embedding(6, 128)
  (gate1): GatingNetwork(
    (experts_shared): ModuleList(
      (0): Expert_shared(
        (fc1): Linear(in_features=514, out_features=64, bias=True)
      )
      (1): Expert_shared(
        (fc1): Linear(in_features=514, out_features=64, bias=True)
      )
      (2): Expert_shared(
        (fc1): Linear(in_features=514, out_features=64, bias=True)
      )
      (3): Expert_shared(
        (fc1): Linear(in_features=514, out_features=64, bias=True)
      )
      (4): Expert_shared(
        (fc1): Linear(in_features=514, out_features=64, bias=True)
      )
      (5): Expert_shared(
        (fc1): Linear(in_features=514, out_features=64, bias=True)
      )
      (6): Expert_shared(
        (fc1): Linear(in_features=514, out_features=64, bias=True)
      )
      (7): Expert_shared(
        (fc1): Linear(in_features=514, out_features=64, bias=True)
     

In [41]:
print(ple(a))

[tensor([[0.1539],
        [0.1186],
        [0.1474],
        [0.1750],
        [0.1511]], grad_fn=<AddmmBackward>), tensor([[ 0.0960],
        [ 0.0164],
        [ 0.0123],
        [ 0.0523],
        [-0.0026]], grad_fn=<AddmmBackward>)]


In [42]:
# 可视化
# w = SummaryWriter(log_dir="./log", comment="model info")
# w.add_graph(ple, a)
# w.close()

## 训练与评估

In [43]:
# 定义超参数
learning_rate = 0.01
epochs = 100
count = 0
writer = SummaryWriter("../log", comment="mertics")
device = torch.device("cuda")

model = PLE(user_feat_dict, item_feat_dict)
model.to(device)

optimizer = Adam(model.parameters(), lr=learning_rate)
loss_fun = nn.BCEWithLogitsLoss()

train_dataload = DataLoader(train_datasets, batch_size=128, shuffle=True)
test_dataload = DataLoader(test_datasets, batch_size=128)

RuntimeError: CUDA error: device-side assert triggered

In [33]:
for epoch in tqdm(range(epochs)):
    y_train_income_true = []
    y_train_income_predict = []
    y_train_marry_true = []
    y_train_marry_predict = []
    total_loss, count = 0, 0
    for x, y1, y2 in train_dataload:
        x, y1, y2 = x.to(device), y1.to(device), y2.to(device)
        predict = model(x)
        y_train_income_true += list(y1.squeeze().cpu().numpy())
        y_train_marry_true += list(y2.squeeze().cpu().numpy())

        y_train_income_predict += list(
            predict[0].squeeze().cpu().detach().numpy())
        y_train_marry_predict += list(
            predict[1].squeeze().cpu().detach().numpy())

        loss1 = loss_fun(predict[0], y1.unsqueeze(1).float())
        loss2 = loss_fun(predict[1], y2.unsqueeze(1).float())
        loss = loss1 + loss2
        # 梯度更新
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += float(loss)
        count += 1

    y1_auc = roc_auc_score(y_train_income_true, y_train_income_predict)
    y2_auc = roc_auc_score(y_train_marry_true, y_train_marry_predict)
    loss_value = total_loss / count
    print("Epoch %d train loss is %.3f, y1_auc is %.3f and y2_auc is %.3f" % (epoch + 1, loss_value,
                                                                              y1_auc, y2_auc))
    writer.add_scalar("Train loss", loss_value, global_step=epoch + 1)
    writer.add_scalar("Train_y1_auc", y1_auc, global_step=epoch + 1)
    writer.add_scalar("Train_y2_auc", y2_auc, global_step=epoch + 1)

    # 验证
    total_eval_loss = 0
    model.eval()
    count_eval = 0
    y_val_income_true = []
    y_val_marry_true = []
    y_val_income_predict = []
    y_val_marry_predict = []
    for x, y1, y2 in test_dataload:
        x, y1, y2 = x.to(device), y1.to(device), y2.to(device)
        predict = model(x)
        y_val_income_true += list(y1.squeeze().cpu().numpy())
        y_val_marry_true += list(y2.squeeze().cpu().numpy())

        y_val_income_predict += list(
            predict[0].squeeze().cpu().detach().numpy())
        y_val_marry_predict += list(
            predict[1].squeeze().cpu().detach().numpy())
        loss_1 = loss_fun(predict[0], y1.unsqueeze(1).float())
        loss_2 = loss_fun(predict[1], y2.unsqueeze(1).float())
        loss = loss_1 + loss_2
        total_eval_loss += float(loss)
        count_eval += 1

    y1_val_auc = roc_auc_score(y_val_income_true, y_val_income_predict)
    y2_val_auc = roc_auc_score(y_val_marry_true, y_val_marry_predict)
    val_loss_value = total_eval_loss / count_eval
    print("Epoch %d val loss is %.3f, y1_auc is %.3f and y2_auc is %.3f" % (epoch + 1, val_loss_value,
                                                                            y1_auc, y2_auc))
    writer.add_scalar("Val loss", val_loss_value, global_step=epoch + 1)
    writer.add_scalar("Val_y1_auc", y1_val_auc, global_step=epoch + 1)
    writer.add_scalar("Val_y2_auc", y2_val_auc, global_step=epoch + 1)

writer.close()

  0%|          | 0/100 [00:02<?, ?it/s]


RuntimeError: CUDA error: device-side assert triggered

# 知识点

## nn.Sequential和nn.ModuleList
[Pytorch学习（三）： Sequential 和ModuleList学习](https://blog.csdn.net/happyday_d/article/details/85629119?spm=1001.2101.3001.6650.6&utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7Edefault-6.fixedcolumn&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7Edefault-6.fixedcolumn)

## BCELoss和BCEWithLogitsLoss的区别
[BCELoss和BCEWithLogitsLoss](https://blog.csdn.net/qq_22210253/article/details/85222093)

$$
BCELoss = − n/1∑(y_{n}×lnx_{n}+(1−y_{n})×ln(1−x_{n}))
$$

In [51]:
import torch
from torch import nn

In [52]:
input_ = torch.rand(3, 3)
input_

tensor([[0.0158, 0.8989, 0.3383],
        [0.8715, 0.3984, 0.7840],
        [0.4525, 0.0914, 0.8614]])

In [53]:
target = torch.FloatTensor([[0, 1, 1], [0, 0, 1], [1, 0, 1]])
activation = nn.Sigmoid()
predict = activation(input_)
predict

tensor([[0.5040, 0.7107, 0.5838],
        [0.7051, 0.5983, 0.6865],
        [0.6112, 0.5228, 0.7029]])

In [54]:
loss = nn.BCELoss()
loss(predict, target)

tensor(0.6305)

In [55]:
# Sigmoid + BCEloss
loss = nn.BCEWithLogitsLoss()
loss(input_, target)

tensor(0.6305)

## einsum函数
[一文学会 Pytorch 中的 einsum](https://zhuanlan.zhihu.com/p/361209187)

In [56]:
import numpy as np
A = torch.tensor([[5], [3]])

B = torch.tensor([[[0, 1, 0],
                   [1, 1, 0],

                   [1, 1, 1]]])

In [None]:
A.shape, B.shape

In [None]:
torch.einsum('ij,jkl->ikl', A, B)

In [None]:
C = B.permute(1, 0, 2)
C

In [None]:
torch.matmul(A, C).permute(1, 0, 2)

## nn.Parameter的使用
[pytorch学习笔记（十六）：Parameters](https://blog.csdn.net/qq_43328040/article/details/107761093?utm_medium=distribute.pc_relevant.none-task-blog-2~default~baidujs_title~default-1.no_search_link&spm=1001.2101.3001.4242.2)

## AUC计算

In [None]:
from sklearn import metrics

In [None]:
a = np.array([[0.8, 0.2], [0.9, 0.1], [0.2, 0.7],
              [0.2, 0.5], [0.2, 0.8], [0.2, 0.8]])
b = np.array([[1, 0], [1, 0], [0, 1], [1, 0], [0, 1], [1, 0]])
print(metrics.roc_auc_score(b, a))

In [None]:
a1 = [0.8, 0.9, 0.2, 0.2, 0.2, 0.2]
b1 = [1, 1, 0, 1, 1, 0]
print(metrics.roc_auc_score(b1, a1))

In [None]:
a2 = [0.2, 0.1, 0.7, 0.5, 0.8, 0.8]
b2 = [0, 0, 1, 0, 1, 0]
print(metrics.roc_auc_score(b2, a2))

## 教学

In [None]:
import torch
import torch.nn as nn
from sklearn.preprocessing import LabelEncoder

In [None]:
feat2 = ["male", "famale"]
feat1 = ["small", "large", "mid", "samler", "larger"]

dense1 = [178, 155]

smaple = ["feat0":male, "feat1":mid, "dense1":178]

# 预处理 str->int
# 编码：labelecode
# multi_onehot
# feat: {A, B}
[1, 1, 0, 0, 0, 0...]
{A}
samlpe_pro = [0, 3, 0.8]

smaple = [[1.1, 1.5, 6.3], [1.2, 0.4, 0.3], [8.6]]

In [None]:
la = LabelEncoder()
feat_ecode = la.fit_transform(feat)
feat_ecode

In [50]:
embedding = nn.Embedding(5, 3)  # 定义一个具有5个单词，维度为3的查询矩阵
print(embedding.weight)  # 展示该矩阵的具体内容
test = torch.LongTensor(feat_ecode)  # 该test矩阵用于被embed，其size为[2, 4]
# 其中的第一行为[0, 2, 0, 1]，表示获取查询矩阵中ID为0, 2, 0, 1的查询向量
# 可以在之后的test输出中与embed的输出进行比较
test = embedding(test[0])
print("Embedding：")
print(test.size())  # 输出embed后test的size，为[2, 4, 3]，增加
# 的3，是因为查询向量的维度为3
print(test)  # 输出embed后的test的内容a

## Python 基础