# 数据迭代器

In [103]:
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

class XDataset(Dataset):
    '''load csv data with feature name ad first row'''
    def __init__(self, datafile):
        super(XDataset,self).__init__()
        self.feature_names=[]
        self.datafile = datafile
        self.data = []
        self._load_data()
    
    def _load_data(self):
        print("start load data from: {} ".format(self.datafile))
        count = 0
        with open(self.datafile) as f:
            self.feature_names = f.readline().strip().split(",")[2:]
            for line in f:
                count +=1
                line = line.strip().split(",")
                line = [int(v) for v in line]
                self.data.append(line)
#         print("self.data.shape", len(self.data))
        print("load data from {} finished".format(self.datafile))
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self,idx):
        line = self.data[idx]
        click = line[0]
        conversion = line[1]
        features = dict(zip(self.feature_names, line[2:]))
        return click,conversion, features


In [99]:
# test
from torch.utils.data import DataLoader

datafile = './data/ctr_cvr.dev'
dataset = XDataset(datafile)

dataloader = DataLoader(dataset, batch_size=1000, shuffle=False)
for i, value in enumerate(dataloader):
    click, conversion, features = value
    print(click.shape)
    print(conversion.shape)
    for key in features.keys():
        print(key, features[key].shape)

start load data from: ./data/ctr_cvr.dev 
load data from ./data/ctr_cvr.dev finished
torch.Size([1000])
torch.Size([1000])
101 torch.Size([1000])
121 torch.Size([1000])
122 torch.Size([1000])
124 torch.Size([1000])
125 torch.Size([1000])
126 torch.Size([1000])
127 torch.Size([1000])
128 torch.Size([1000])
129 torch.Size([1000])
205 torch.Size([1000])
206 torch.Size([1000])
207 torch.Size([1000])
216 torch.Size([1000])
508 torch.Size([1000])
509 torch.Size([1000])
702 torch.Size([1000])
853 torch.Size([1000])
301 torch.Size([1000])
torch.Size([1000])
torch.Size([1000])
101 torch.Size([1000])
121 torch.Size([1000])
122 torch.Size([1000])
124 torch.Size([1000])
125 torch.Size([1000])
126 torch.Size([1000])
127 torch.Size([1000])
128 torch.Size([1000])
129 torch.Size([1000])
205 torch.Size([1000])
206 torch.Size([1000])
207 torch.Size([1000])
216 torch.Size([1000])
508 torch.Size([1000])
509 torch.Size([1000])
702 torch.Size([1000])
853 torch.Size([1000])
301 torch.Size([1000])
torch.Size(

# 模型定义

In [80]:
class Tower(nn.Module):
    def __init__(self, 
                input_dim:int,
                dims=[128,64,32],
                drop_prob=[0.1,0.3,0.3]
                ):
        super(Tower, self).__init__()
        self.dims = dims
        self.drop_prob = drop_prob
        self.layer = nn.Sequential(nn.Linear(input_dim,dims[0]),
                                   nn.ReLU(),nn.Dropout(drop_prob[0]),
                                   nn.Linear(dims[0],dims[1]),
                                   nn.ReLU(),nn.Dropout(drop_prob[1]),
                                   nn.Linear(dims[1],dims[2]),
                                   nn.ReLU(),nn.Dropout(drop_prob[2]),
                                  )
    def forward(self,x):
        x = torch.flatten(x, start_dim=1)
        x = self.layer(x)
        return x

class Attention(nn.Module):
    def __init__(self, dim=32):
        super(Attention, self).__init__()
        self.dim = dim
        self.q_layer = nn.Linear(dim, dim, bias=False)
        self.k_layer = nn.Linear(dim, dim, bias=False)
        self.v_layer = nn.Linear(dim, dim, bias=False)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, inputs):
        Q = self.q_layer(inputs)
        K = self.k_layer(inputs)
        V = self.v_layer(inputs)
        a = torch.sum(torch.mul(Q, K), -1) / torch.sqrt(torch.tensor(self.dim,dtype=torch.float))
        a = self.softmax(a)
        outputs = torch.sum(torch.mul(torch.unsqueeze(a, -1), V), dim=1)
        return outputs

class AITM(nn.Module):
    def __init__(self,
               feature_vocabulary,#: dict[str, int],
               embedding_size: int,
               tower_dims=[128, 64, 32],
               drop_prob=[0.1, 0.3, 0.3]):
        super(AITM, self).__init__()
        self.feature_vocabulary = feature_vocabulary
        self.feature_names = sorted(list(feature_vocabulary.keys()))
        self.embedding_size = embedding_size
        self.embedding_dict = nn.ModuleDict()
        self.__init_weight()

        self.tower_input_size = len(feature_vocabulary) * embedding_size
        self.click_tower = Tower(self.tower_input_size, tower_dims, drop_prob)
        self.conversion_tower = Tower(self.tower_input_size, tower_dims, drop_prob)
        self.attention_layer = Attention(tower_dims[-1])

        self.info_layer = nn.Sequential(nn.Linear(tower_dims[-1], 32), nn.ReLU(),
                                        nn.Dropout(drop_prob[-1]))

        self.click_layer = nn.Sequential(nn.Linear(tower_dims[-1], 1),
                                         nn.Sigmoid())
        self.conversion_layer = nn.Sequential(nn.Linear(tower_dims[-1], 1),
                                              nn.Sigmoid())
     
    def __init_weight(self,):
        for name, size in self.feature_vocabulary.items():
            emb = nn.Embedding(size, self.embedding_size)
            nn.init.normal_(emb.weight, mean=0.0, std=0.01)
            self.embedding_dict[name] = emb
    
    def forward(self,x):
        feature_embedding = []
        for name in self.feature_names:
            embed = self.embedding_dict[name](x[name])
            feature_embedding.append(embed)
        feature_embedding = torch.cat(feature_embedding, 1)
        tower_click = self.click_tower(feature_embedding)

        tower_conversion = torch.unsqueeze(
            self.conversion_tower(feature_embedding), 1)

        info = torch.unsqueeze(self.info_layer(tower_click), 1)

        ait = self.attention_layer(torch.cat([tower_conversion, info], 1))

        click = torch.squeeze(self.click_layer(tower_click), dim=1)
        conversion = torch.squeeze(self.conversion_layer(ait), dim=1)

        return click, conversion
    
    def loss(self,
            click_label, click_pred, conversion_label,conversion_pred,
            constraint_weight=0.6,
            device="cuda"):
#         if torch.cuda.is_available():
#             device = "gpu:0"
#         else:
#             device = "cpu"

        device = torch.device(device)
        click_label = click_label.to(device)
        conversion_label = conversion_label.to(device)
        
        click_loss = nn.functional.binary_cross_entropy(click_pred, 
                                                        click_label)
        conversion_loss = nn.functional.binary_cross_entropy(conversion_pred,
                                                            conversion_label)
        label_constraint = torch.max(conversion_pred-click_pred,
                                        torch.zeros_like(click_label))
        constraint_loss = torch.sum(label_constraint)
        
        loss = click_loss + conversion_loss + constraint_weight * constraint_weight
        return loss
    
        

In [83]:
# model test

import torch


def test_model():

    feature_vocab = {"0": 10, "1": 12, "2": 20}
    embedding_size = 4
    m = AITM(feature_vocab, embedding_size)
    inputs = {
      "0": torch.tensor([[1], [2]]),
      "1": torch.tensor([[2], [3]]),
      "2": torch.tensor([[10], [11]])
    }
    click, conversion = m(inputs)
    print("click_pred:", click.shape)
    print("covnersion_pred:", conversion.shape)

    click_label = torch.tensor([1.0, 1.0])
    conversion_label = torch.tensor([1.0, 0.0])

    loss = m.loss(click_label, click, conversion_label, conversion, device="cpu")
    print("loss: ", loss)


if __name__ == "__main__":
    test_model()

click_pred: torch.Size([2])
covnersion_pred: torch.Size([2])
loss:  tensor(1.7078, grad_fn=<AddBackward0>)


In [59]:
print(torch.cuda.is_available() )# cuda是否可用
print(torch.cuda.device_count() )# gpu数量
print(torch.cuda.current_device())# 当前设备索引, 从0开始
print(torch.cuda.get_device_name(0))# 返回gpu名字


True
2
0
TITAN X (Pascal)


# 训练入口 Train

In [85]:
import sys
import random
import torch
from torch import nn
from torch.utils.data import DataLoader
import numpy as np
from sklearn.metrics import roc_auc_score


In [114]:
# 超参数设置

batch_size = 10 #500
embedding_size = 5
learning_rate = 0.0001
total_epoch = 3 #10
earlystop_epoch = 1

vocabulary_size = {
    '101': 238635,
    '121': 98,
    '122': 14,
    '124': 3,
    '125': 8,
    '126': 4,
    '127': 4,
    '128': 3,
    '129': 5,
    '205': 467298,
    '206': 6929,
    '207': 263942,
    '216': 106399,
    '508': 5888,
    '509': 104830,
    '702': 51878,
    '853': 37148,
    '301': 4
}

model_file = './out/AITM.model'



In [115]:
def get_dataloader(filename, batch_size, shuffle):
    data = XDataset(filename)
    loader = DataLoader(data, batch_size=batch_size, shuffle=shuffle)
    return loader

In [116]:
def cal_auc(label:list,pred:list):
    label = torch.cat(label)
    pred = torch.cat(pred)
    label = label.detach().numpy()
    pred = pred.detach().numpy()
    auc = roc_auc_score(label,pred,label=np.array([0.0, 1.0]))
    return auc
    

def train():
    train_dataloader = get_dataloader("./data/ctr_cvr.train",
                                     batch_size,shuffle=True)
    dev_dataloader = get_dataloader("./data/ctr_cvr.dev",
                                     batch_size,shuffle=True)
    model = AITM(vocabulary_size, embedding_size)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    optimizer = torch.optim.Adam(model.parameters(),
                               lr=learning_rate,
                               weight_decay=1e-6)
    model.to(device)
    
    best_auc = 0.0
    earlystop_count = 0
    best_epoch = 0
    
    for epoch in range(total_epoch):
        total_loss = 0.
        nb_sample = 0
        # 训练
        model.train()
        for step,batch in enumerate(train_dataloader):
            click,conversion,features = batch
            for key in features.keys():
                features[key] = features[key].to(device)
            click_pred, conversion_pred = model(features)
            loss = model.loss(click.float(),
                             click_pred,
                             conversion.float(),
                             conversion_pred,
                             device=device)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            # 变成数值
            total_loss += loss.cpu().detach().numpy()
            nb_sample += click.shape[0]
            if step%20000 ==0:
                print('[%d] Train loss on step %d: %.6f' 
                      %(nb_sample, (step+1), total_loss/(step+1)))
            
        # 验证
        print("start validation...")
        click_pred = []
        click_label = []
        conversion_pred = []
        conversion_label = []
        model.eval()
        for step, batch in enumerate(dev_dataloader):
            click, conversion, features = batch
            for key in features.keys():
                features[key] = features[key].to(device)
            
            with torch.no_grad():
                click_prob, conversion_prob = model(features)
            
            click_pred.append(click_prob.cpu())
            conversion_pred.append(conversion_pred.cpu())
            
            click_label.append(click)
            conversion_label.append(conversion)
            
        click_auc = cal_auc(click_label, click_pred)
        conversion_auc = cal_auc(conversion_label, conversion_pred)

        print("Epoch: {} click_auc:{} conversion_auc:{}".format(
        epoch+1, click_auc, conversion_auc))
            
        auc = click_auc + conversion_auc
        if best_acc<auc:
            best_auc = auc
            best_epoch = epoch +1 
            troch.save(model.state_dict(), model_file)
            earlystop_count = 0
        else:
            print("train stop at Epoch %d based on the base validation Epoch %d" %
                (epoch + 1, best_epoch))
            return 
    
def test():
    print("Start Test ...")
    test_loader = get_dataloader('./data/ctr_cvr.test',
                               batch_size=batch_size,
                               shuffle=False)
    model = AITM(vocabulary_size, 5)
    model.load_state_dict(torch.load(model_file))
    model.eval()
    click_List = []
    conversion_list = []
    click_pred_list = []
    conversion_pred_list = []
    
    for i, batch in enumerate(test_loader):
        if i%1000 ==0:
            sys.stdout.write("test step:{}\r".format(i))
            sys.stdout.flush()
        click,conversion,features = batch
        with torch.no_gard():
            click_pred, conversion_pred = moel(features)
        click_list.append(click)
        conversion_list.append(conversion)
        click_pred_list.append(click_pred)
        conversion_pred_list.append(conversion_pred)
        
    click_auc = cal_auc(click_list, click_pred_list)
    conversion_auc = cal_auc(conversion_list, conversion_pred_list)
    print("Test Resutt: click AUC: {} conversion AUC:{}".format(
      click_auc, conversion_auc))
    

In [117]:
if __name__=="__main__":
    train()
    test()

start load data from: ./data/ctr_cvr.train 
load data from ./data/ctr_cvr.train finished
start load data from: ./data/ctr_cvr.dev 
load data from ./data/ctr_cvr.dev finished
[10] Train loss on step 1: 1.679778
[2010] Train loss on step 201: 1.591234
[4010] Train loss on step 401: 1.255069
[6010] Train loss on step 601: 1.021385
[8010] Train loss on step 801: 0.896552
[10010] Train loss on step 1001: 0.827602
[12010] Train loss on step 1201: 0.779701
[14010] Train loss on step 1401: 0.745888
[16010] Train loss on step 1601: 0.720511
[18010] Train loss on step 1801: 0.701214
[20010] Train loss on step 2001: 0.685085
[22010] Train loss on step 2201: 0.672638
[24010] Train loss on step 2401: 0.660547
[26010] Train loss on step 2601: 0.650254
[28010] Train loss on step 2801: 0.643052
[30010] Train loss on step 3001: 0.637154
[32010] Train loss on step 3201: 0.630877
[34010] Train loss on step 3401: 0.625400
[36010] Train loss on step 3601: 0.619663
[38010] Train loss on step 3801: 0.614679


IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [23]:
class AITM(nn.Module):
    def __init__(self,
               feature_vocabulary,#: dict[str, int],
               embedding_size,#: int,
               tower_dims=[128, 64, 32],
               drop_prob=[0.1, 0.3, 0.3]):
        super(AITM, self).__init__()
        self.feature_vocabulary = feature_vocabulary
        self.feature_names = sorted(list(feature_vocabulary.keys()))
        self.embedding_size = embedding_size
        self.embedding_dict = nn.ModuleDict()
        self.__init_weight()
        
        self.towner_input_size = len(feature_vocabulary) * embedding_size 
        self.click_tower = Tower(self.tower_input_size, towner_dims, drop_prob)
        self.conversion_towner = Tower(self.tower_input_size, tower_dims, drop_prob)
        self.attention_layer = Attention(towner_dims[-1])
        
        
        self.info_layer = nn.Sequential(nn.Linear(tower_dims[-1],32), nn.ReLU(),
                                       nn.Dropout(drop_prob[-1])
                                       )
        self.click_layer = nn.Sequential(nn.Linear(tower_dims[-1],1), nn.Sigmoid())
        self.conversion_layer = nn.Sequential(nn.Linear(tower_dims[-1],1), nn.Sigmoid())
     
    def __init_weight(self,):
        for name,size in self.feature_vocabulary.items():
            emb = nn.Embedding(size, self.embedding_size)
            nn.init.normal_(emb.weight, mean=0.0, std=0.01)
            self.embedding_dict[name] = emb
    
    def forward(self,x):
        feature_embedding = []
        for name in self.feature_names:
            embed = self.embedding_dict[name](x[name])
            feature_embedding.append(embed)
        feature_embedding = torch.cat(feature_embedding,1)
        
        tower_click = self.click_tower(feature_embedding)
        
        tower_conversion = torch.unsqueeze(self.conversion_tower(feature_embedding,1))
        
        info = torch.unsequeeze(self.info_layer(tower_click),1)
        
        ait = self.attention_layer(torch.cat([tower_conversion,info],1))
        
        click = torch.squeeze(self.click_layer(tower_click),dim=1)
        conversion = torch.squeeze(self.conversion_layer(ait), dim=1)
        
        return click,conversion
    
    def loss(self,
            click_label, click_pred, conversion_label,conversion_pred,
            constraint_weight=0.6,
            device="gpu:1"):
        click_label = click_label.to(device)
        conversion_label = conversion_label.to(device)
        
        click_loss = nn.functional.binary_cross_entropy(click_pred, 
                                                        click_label)
        conversion_loss = nn.functional.binary_cross_entropy(conversion_pred,
                                                            conversion_label)
        label_constraint = torch.maximum(conversion_pred,click_pred,
                                        torch.seros_like(click_label))
        constraint_loss = torch.sum(label_constraint)
        
        loss = click_loss + conversion_loss + constraint_weight * constraint_weight
        return loss
    
       

# 在线服务

In [None]:
import torch

vocabulary_size = {
    '101': 238635,
    '121': 98,
    '122': 14,
    '124': 3,
    '125': 8,
    '126': 4,
    '127': 4,
    '128': 3,
    '129': 5,
    '205': 467298,
    '206': 6929,
    '207': 263942,
    '216': 106399,
    '508': 5888,
    '509': 104830,
    '702': 51878,
    '853': 37148,
    '301': 4
}

embedding_size = 5
model_file = './out/AITM.model'
model = AITM(vocabulary_size, embedding_size)
model.load_state_dict(torch.load(model_file))
example = {key: torch.tensor([[0]]) for key in vocabulary_size}
traced_script_module = torch.jit.trace(model, example)
traced_script_module.save("./out/AITM.model.pt")
