In [1]:
from utils.datasets import read_avazu, read_avazu_ml, read_criteo

In [2]:
features, label, train_dataloader, val_dataloader, test_dataloader, unique_values = read_avazu('/root/autodl-tmp/', batch_size=4096, shuffle=True, num_workers=16)

start reading avazu...
finish reading avazu.
features: hour
mix: 0 max: 239
features: C1
mix: 0 max: 6
features: banner_pos
mix: 0 max: 6
features: site_id
mix: 0 max: 4736
features: site_domain
mix: 0 max: 7744
features: site_category
mix: 0 max: 25
features: app_id
mix: 0 max: 8551
features: app_domain
mix: 0 max: 558
features: app_category
mix: 0 max: 35
features: device_id
mix: 0 max: 2686407
features: device_ip
mix: 0 max: 6729485
features: device_model
mix: 0 max: 8250
features: device_type
mix: 0 max: 4
features: device_conn_type
mix: 0 max: 3
features: C14
mix: 0 max: 2625
features: C15
mix: 0 max: 7
features: C16
mix: 0 max: 8
features: C17
mix: 0 max: 434
features: C18
mix: 0 max: 3
features: C19
mix: 0 max: 67
features: C20
mix: 0 max: 171
features: C21
mix: 0 max: 59
[240, 7, 7, 4737, 7745, 26, 8552, 559, 36, 2686408, 6729486, 8251, 5, 4, 2626, 8, 9, 435, 4, 68, 172, 60]


In [3]:
import torch
import torch.nn as nn
import numpy as np
class mlp(nn.Module):
    def __init__(self, input_dim, unique_values, embed_dims=[16, 8], dropout=0.2, output_layer=True):
        super().__init__()
        
        self.embedding = nn.Embedding(sum(unique_values), 16)
        torch.nn.init.xavier_uniform_(self.embedding.weight.data)
        self.offsets = np.array((0, *np.cumsum(unique_values)[:-1]))
        
        layers = list()
        self.mlps = nn.ModuleList()
        self.out_layer = output_layer
        for embed_dim in embed_dims:
            layers.append(nn.Linear(input_dim, embed_dim))
            layers.append(nn.BatchNorm1d(embed_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(p=dropout))
            input_dim = embed_dim
            self.mlps.append(nn.Sequential(*layers))
            layers = list()
        if self.out_layer:
            self.out = nn.Linear(input_dim, 1)

    def forward(self, x):
        """
        :param x: Float tensor of size ``(batch_size, embed_dim)``
        """
        x = self.embedding(x + x.new_tensor(self.offsets).unsqueeze(0))
        b = x.shape[0]
        x = x.reshape(b, -1)
        for layer in self.mlps:
            x = layer(x)
        if self.out_layer:
            x = self.out(x)
        x = torch.sigmoid(x)
        return x

In [4]:
class LR(nn.Module):
    """Logistic Regression Module. It is the one Non-linear 
    transformation for input feature.

    Args:
        input_dim (int): input size of Linear module.
        sigmoid (bool): whether to add sigmoid function before output.

    Shape:
        - Input: `(batch_size, input_dim)`
        - Output: `(batch_size, 1)`
    """

    def __init__(self, input_dim, sigmoid=False):
        super().__init__()
        self.sigmoid = sigmoid
        self.fc = nn.Linear(input_dim, 1, bias=True)

    def forward(self, x):
        if self.sigmoid:
            return torch.sigmoid(self.fc(x))
        else:
            return self.fc(x)
class CrossNetwork(nn.Module):
    """CrossNetwork  mentioned in the DCN paper.

    Args:
        input_dim (int): input dim of input tensor
    
    Shape:
        - Input: `(batch_size, *)`
        - Output: `(batch_size, *)`
        
    """

    def __init__(self, input_dim, num_layers):
        super().__init__()
        self.num_layers = num_layers
        self.w = torch.nn.ModuleList([torch.nn.Linear(input_dim, 1, bias=False) for _ in range(num_layers)])
        self.b = torch.nn.ParameterList([torch.nn.Parameter(torch.zeros((input_dim,))) for _ in range(num_layers)])

    def forward(self, x):
        """
        :param x: Float tensor of size ``(batch_size, num_fields, embed_dim)``
        """
        x0 = x
        for i in range(self.num_layers):
            xw = self.w[i](x)
            x = x0 * xw + self.b[i] + x
        return x
class MLP(nn.Module):
    """Multi Layer Perceptron Module, it is the most widely used module for 
    learning feature. Note we default add `BatchNorm1d` and `Activation` 
    `Dropout` for each `Linear` Module.

    Args:
        input dim (int): input size of the first Linear Layer.
        output_layer (bool): whether this MLP module is the output layer. If `True`, then append one Linear(*,1) module. 
        dims (list): output size of Linear Layer (default=[]).
        dropout (float): probability of an element to be zeroed (default = 0.5).
        activation (str): the activation function, support `[sigmoid, relu, prelu, dice, softmax]` (default='relu').

    Shape:
        - Input: `(batch_size, input_dim)`
        - Output: `(batch_size, 1)` or `(batch_size, dims[-1])`
    """

    def __init__(self, input_dim, output_layer=True, dims=[16,16], dropout=0, activation="relu"):
        super().__init__()
        if dims is None:
            dims = []
        layers = list()
        for i_dim in dims:
            layers.append(nn.Linear(input_dim, i_dim))
            layers.append(nn.BatchNorm1d(i_dim))
            layers.append(nn.ReLU(inplace=True))
            layers.append(nn.Dropout(p=dropout))
            input_dim = i_dim
        if output_layer:
            layers.append(nn.Linear(input_dim, 1))
        self.mlp = nn.Sequential(*layers)

    def forward(self, x):
        return self.mlp(x)
class DCN(torch.nn.Module):
    """Deep & Cross Network

    Args:
        features (list[Feature Class]): training by the whole module.
        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
    """

    def __init__(self, unique_values, n_cross_layers):
        super().__init__()
        self.dims = 16 * len(unique_values)
        self.embedding = torch.nn.Embedding(sum(unique_values), 16)
        self.cn = CrossNetwork(self.dims, n_cross_layers)
        self.mlp = MLP(self.dims, output_layer=False)
        self.linear = LR(self.dims + 16)
        self.offsets = np.array((0, *np.cumsum(unique_values)[:-1]))

    def forward(self, x):
        b = x.shape[0]
        embed_x = self.embedding(x + x.new_tensor(self.offsets).unsqueeze(0)).reshape(b,-1)
        cn_out = self.cn(embed_x)
        mlp_out = self.mlp(embed_x)
        x_stack = torch.cat([cn_out, mlp_out], dim=1)
        y = self.linear(x_stack)
        return torch.sigmoid(y)

In [4]:
from tqdm.notebook import tqdm
from sklearn.metrics import roc_auc_score, log_loss
# train
device = torch.device('cuda')
model = mlp(input_dim=len(features)*16, unique_values=unique_values).to(device)
# model = DCN(unique_values, 2).to(device)
optimizer = torch.optim.Adam(params=model.parameters(), lr=0.0001, weight_decay=1e-5)
criterion = torch.nn.BCELoss()
best_auc = 0.0
patience = 0

for epoch in range(100):
    model.train()
    total_loss = 0
    tk0 = tqdm(train_dataloader, desc="train", smoothing=0, mininterval=1.0)
    for i, (x, y) in enumerate(tk0):
        optimizer.zero_grad()
        x = x.to(device)
        y = y.to(device)
        y_pred = model(x)
        loss = criterion(y_pred, y.float().reshape(-1, 1))
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
        if (i + 1) % 100 == 0:
            tk0.set_postfix(loss=total_loss / 100)
            total_loss = 0
    model.eval()
    targets, predicts = list(), list()
    with torch.no_grad():
        tk0 = tqdm(val_dataloader, desc="validation", smoothing=0, mininterval=1.0)
        for i, (x, y) in enumerate(tk0):
            x = x.to(device)
            # x_dict = {k: v.to(self.device) for k, v in x_dict.items()}
            y = y.to(device)
            y_pred = model(x) # current_epoch=None means not in training mode
            targets.extend(y.tolist())
            predicts.extend(y_pred.tolist())
    auc =  roc_auc_score(targets, predicts)
    print('epoch ',epoch, ' auc ', auc)
    if auc > best_auc:
        best_auc = auc
        patience = 0
        torch.save(model.state_dict(), "model.pth")
    else:
        patience += 1
        if patience > 3:
            print('early stop. best auc:', best_auc)
            break
    

# test
model.load_state_dict(torch.load('model.pth'))
model.eval()
targets, predicts = list(), list()
with torch.no_grad():
    tk0 = tqdm(test_dataloader, desc="test", smoothing=0, mininterval=1.0)
    for i, (x, y) in enumerate(tk0):
        x = x.to(device)
        # x_dict = {k: v.to(self.device) for k, v in x_dict.items()}
        y = y.to(device)
        y_pred = model(x) # current_epoch=None means not in training mode
        targets.extend(y.tolist())
        predicts.extend(y_pred.tolist())
auc =  roc_auc_score(targets, predicts)
print('test auc:', auc)

train:   0%|          | 0/7897 [00:00<?, ?it/s]

validation:   0%|          | 0/988 [00:00<?, ?it/s]

epoch  0  auc  0.7796314166598058


train:   0%|          | 0/7897 [00:00<?, ?it/s]

validation:   0%|          | 0/988 [00:00<?, ?it/s]

epoch  1  auc  0.78402917347738


train:   0%|          | 0/7897 [00:00<?, ?it/s]

validation:   0%|          | 0/988 [00:00<?, ?it/s]

epoch  2  auc  0.7819189192129827


train:   0%|          | 0/7897 [00:00<?, ?it/s]

validation:   0%|          | 0/988 [00:00<?, ?it/s]

epoch  3  auc  0.78032092596661


train:   0%|          | 0/7897 [00:00<?, ?it/s]

validation:   0%|          | 0/988 [00:00<?, ?it/s]

epoch  4  auc  0.779318800044741


train:   0%|          | 0/7897 [00:00<?, ?it/s]

validation:   0%|          | 0/988 [00:00<?, ?it/s]

epoch  5  auc  0.7789293513186053
early stop. best auc: 0.78402917347738


test:   0%|          | 0/988 [00:00<?, ?it/s]

test auc: 0.7844399425526216
