# script to do experiments described in paper: Deep Interest Evolution Network for Click-Through Rate Prediction

## how to run
please download data.tar.gz, data1.tar.gz and data2.tar.gz from https://github.com/mouna99/dien and decompress them to the same folder with this script.


In [1]:
SEQ_MAX_LEN = 100 # maximum sequence length
BATCH_SIZE = 128
MBEDDING_DIM = 18
DNN_HIDDEN_SIZE = [200, 80]
DNN_DROPOUT = 0.0
TEST_RUN = False
EPOCH = 1

In [2]:
%matplotlib inline

import itertools
from collections import Counter

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score

from prediction_flow.features import Number, Category, Sequence, Features
from prediction_flow.transformers.column import (
    StandardScaler, CategoryEncoder, SequenceEncoder)

from prediction_flow.pytorch.data import Dataset
from prediction_flow.pytorch import WideDeep, DeepFM, DNN, DIN, DIEN, AttentionGroup, EmbeddingRef

from prediction_flow.pytorch.functions import fit, predict, create_dataloader_fn

In [3]:
train_df = pd.read_csv(
    "./local_train.csv", sep='\t')

valid_df = pd.read_csv(
    "./local_test.csv", sep='\t')

In [4]:
if TEST_RUN:
    train_df = train_df.sample(1000)
    valid_df = valid_df.sample(1000)

In [5]:
train_df.head()

Unnamed: 0,label,uid,mid,cat,hist_mids,hist_cats,neg_hist_mids,neg_hist_cats
840224,0,A21T1KXN9MOBWL,1577782100,Books,1908147016B00APARP6MB00DUUMC8U0982582900,BooksHistoricalBiographicalBooks,0765326574055356092109704029021935711121,BooksBooksBooksBooks
444413,1,A1IITSYATPP1OW,1499646704,Books,14767182021491276029B00DRL96CC099214220214...,BooksBooksLiterature & FictionBooksBooksB...,B00L70DD2E16016253751463607946034529863209...,Weight LossBooksBooksBooksBooksBooksBooks
51871,1,A9GY8PLOTA94I,141448125X,Books,082542537900605149570805098542060623840907...,BooksBooksBooksBooksBooksBooksBooksBook...,159052748806700592181936608588061550579116...,BooksBooksBooksBooksBooksBooksBooksBook...
575727,1,A2T9MK9KT26RRR,0671318373,Books,B00760GICG05533840310316069159039332597005...,Literature & FictionBooksBooksBooksBooksB...,0793584973B00B0XERBE0972838708033371829114...,BooksLiterature & FictionBooksBooksBooksB...
254549,1,A1Q5JZF6CBQ1PM,1495979237,Books,193840400914767352470425265951042526596X14...,BooksBooksBooksBooksBooksBooksBooksBook...,1423107233B0094Z4NIC1600594654034540476904...,BooksContemporary WomenBooksBooksBooksBoo...


In [6]:
valid_df.head()

Unnamed: 0,label,uid,mid,cat,hist_mids,hist_cats,neg_hist_mids,neg_hist_cats
46841,1,A1AEK70KPFQQFB,781758513,Books,0060847344006113837100613638470323066607,BooksBooksBooksBooks,B00G3GNUXO061589849119300048181603582649,United StatesBooksBooksBooks
52846,0,AUTQ8523N1DD,1476783284,Books,1420103423142011037307860213650446559245,BooksBooksBooksBooks,0505525755188431346916052942251573441449,BooksBooksBooksBooks
32948,0,A28J1OWDX35U77,778315568,Books,044639230808986219920345432282006057453415...,BooksBooksBooksBooksBooksBooksBooksBooks,145550956614000714960786303611044050683214...,BooksBooksBooksBooksBooksLiterature & Fic...
66158,0,AFX1VHGZKFGWV,1455510084,Books,042525727404252572820061348171014062119905...,BooksBooksBooksBooksBooks,16006612970375714472B009VM0L8E159059908X00...,BooksBooksContemporaryBooksBooks
50146,0,A2RQDQWUUKMAQ0,545096766,Books,098319633803073871350061147958031257164X04...,BooksBooksBooksBooksBooksBooksSingle Aut...,074147453004252172130345456300048623908XB0...,BooksBooksBooksBooksIndividual ArtistsBoo...


# EDA

In [7]:
def scale_eda(df):
    print(df.shape)
    print(df.uid.nunique())
    print(df.mid.nunique())
    print(df.groupby('label', as_index=False).uid.count())

In [8]:
scale_eda(train_df)
scale_eda(valid_df)

(1000, 8)
1000
987
   label  uid
0      0  510
1      1  490
(1000, 8)
997
987
   label  uid
0      0  506
1      1  494


In [9]:
train_df.values[0][4].split("")

['1908147016', 'B00APARP6M', 'B00DUUMC8U', '0982582900']

**This data set is well balanced. Each user has two samples, pos sample and neg sample.**

In [10]:
unique_cats = Counter(train_df.cat.values.tolist())

In [11]:
unique_cats_in_hist = Counter(
    itertools.chain(*train_df.hist_cats.apply(lambda x: x.split("")).values.tolist()))

In [12]:
print(len(unique_cats), len(unique_cats_in_hist),
      len(np.intersect1d(list(unique_cats.keys()), list(unique_cats_in_hist.keys()))))

65 251 49


**All categorys also appear in history categorys.**

In [13]:
unique_mids = Counter(train_df.mid.values.tolist())

In [14]:
unique_mids_in_hist = Counter(
    itertools.chain(*train_df.hist_mids.apply(lambda x: x.split("")).values.tolist()))

In [15]:
print(len(unique_mids), len(unique_mids_in_hist),
      len(np.intersect1d(list(unique_mids.keys()), list(unique_mids_in_hist.keys()))))

987 12329 147


**Most mids appears in history mids.**

In [16]:
print("There are {}% mid overlap between train and valid".format(
    100 * len(np.intersect1d(train_df.mid.unique(), valid_df.mid.unique())) / len(valid_df.mid.unique())))

There are 2.43161094224924% mid overlap between train and valid


In [17]:
print("There are {}% mid overlap between train and valid".format(
    100 * len(np.intersect1d(train_df.cat.unique(), valid_df.cat.unique())) / len(valid_df.cat.unique())))

There are 45.0% mid overlap between train and valid


# define features

In [18]:
cat_enc = SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN)

In [19]:
cat_enc.fit(train_df.hist_cats.values)

<prediction_flow.transformers.column.sequence_encoder.SequenceEncoder at 0x14c1e9550>

In [20]:
cat_word2idx, cat_idx2word = cat_enc.word2idx, cat_enc.idx2word

In [21]:
print(len(cat_word2idx))

253


In [22]:
mid_enc = SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN)

In [23]:
mid_enc.fit(np.vstack([train_df.mid.values, train_df.hist_mids.values]))

<prediction_flow.transformers.column.sequence_encoder.SequenceEncoder at 0x14a375da0>

In [24]:
mid_word2idx, mid_idx2word = mid_enc.word2idx, mid_enc.idx2word

In [25]:
print(len(mid_word2idx))

13171


In [26]:
number_features = []

category_features = [
    Category('mid', CategoryEncoder(min_cnt=1, word2idx=mid_word2idx, idx2word=mid_idx2word)),
    Category('cat', CategoryEncoder(min_cnt=1, word2idx=cat_word2idx, idx2word=cat_idx2word)),
]

sequence_features = [
    Sequence('hist_mids', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=mid_word2idx, idx2word=mid_idx2word)),
    Sequence('hist_cats', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=cat_word2idx, idx2word=cat_idx2word))
]

features, train_loader, valid_loader = create_dataloader_fn(
    number_features, category_features, sequence_features, BATCH_SIZE, train_df, 'label', valid_df, 4)

In [27]:
def evaluation(df, dataloader):
    preds = predict(model, dataloader)
    return roc_auc_score(df['label'], preds.ravel())

In [28]:
embedding_ref = EmbeddingRef(
    {'hist_mids': 'mid', 'hist_cats': 'cat'})

din_attention_groups = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0)]

gru_attention_groups = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0, gru_type='GRU')]

aigru_attention_groups = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0, gru_type='AIGRU')]

agru_attention_groups = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0, gru_type='AGRU')]

augru_attention_groups = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0, gru_type='AUGRU')]

models = [
    DNN(features, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
        final_activation='sigmoid', dropout=DNN_DROPOUT,
        embedding_ref=embedding_ref),
    
    WideDeep(features,
             wide_features=['mid', 'hist_mids', 'cat', 'hist_cats'],
             deep_features=['mid', 'hist_mids', 'cat', 'hist_cats'],
             cross_features=[('mid', 'hist_mids'), ('cat', 'hist_cats')],
             num_classes=2, embedding_size=MBEDDING_DIM, hidden_layers=DNN_HIDDEN_SIZE,
             final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref),
    
    DeepFM(features, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE, 
           final_activation='sigmoid', dropout=DNN_DROPOUT,
           embedding_ref=embedding_ref),
    
    DIN(features, din_attention_groups, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
        final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref),
    
    DIEN(features, gru_attention_groups, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
         final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref),
    
    DIEN(features, aigru_attention_groups, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
         final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref),
    
    DIEN(features, agru_attention_groups, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
         final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref),
    
    DIEN(features, augru_attention_groups, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
         final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref)
]

In [29]:
scores = []
for model in models:
    print(model)
    loss_func = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=optimizer, mode='min', factor=0.5, patience=5)
    
    fit(EPOCH, model, loss_func, optimizer,
        train_loader, valid_loader, scheduler, notebook=True, auxiliary_loss_rate=1)
    
    scores.append(evaluation(valid_df, valid_loader))

DNN(
  (embedding:mid): Embedding(13171, 18, padding_idx=0)
  (embedding:cat): Embedding(253, 18, padding_idx=0)
  (pooling:hist_mids): MaxPooling()
  (pooling:hist_cats): MaxPooling()
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)
      (batchnorm0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation0): ReLU()
      (dense1): Linear(in_features=200, out_features=80, bias=True)
      (batchnorm1): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation1): ReLU()
    )
  )
  (final_layer): Linear(in_features=80, out_features=1, bias=True)
)


HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='valid', max=8, style=ProgressStyle(description_width='initial…

WideDeep(
  (embedding:mid): Embedding(13171, 18, padding_idx=0)
  (embedding:cat): Embedding(253, 18, padding_idx=0)
  (pooling:hist_mids): SumPooling()
  (pooling:hist_cats): SumPooling()
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)
      (batchnorm0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation0): ReLU()
      (dense1): Linear(in_features=200, out_features=80, bias=True)
      (batchnorm1): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation1): ReLU()
    )
  )
  (final_layer): Linear(in_features=188, out_features=1, bias=True)
)


HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='valid', max=8, style=ProgressStyle(description_width='initial…

DeepFM(
  (embedding:mid): Embedding(13171, 18, padding_idx=0)
  (embedding:cat): Embedding(253, 18, padding_idx=0)
  (pooling:hist_mids): MaxPooling()
  (pooling:hist_cats): MaxPooling()
  (fm): FM()
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)
      (batchnorm0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation0): ReLU()
      (dense1): Linear(in_features=200, out_features=80, bias=True)
      (batchnorm1): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation1): ReLU()
    )
  )
  (final_layer): Linear(in_features=81, out_features=1, bias=True)
)


HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='valid', max=8, style=ProgressStyle(description_width='initial…

DIN(
  (embedding:mid): Embedding(13171, 18, padding_idx=0)
  (embedding:cat): Embedding(253, 18, padding_idx=0)
  (attention_pooling:group1): Attention(
    (mlp): MLP(
      (_sequential): Sequential(
        (dense0): Linear(in_features=144, out_features=80, bias=True)
        (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation0): PReLU(num_parameters=1)
        (dense1): Linear(in_features=80, out_features=40, bias=True)
        (batchnorm1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation1): PReLU(num_parameters=1)
      )
    )
    (fc): Linear(in_features=40, out_features=1, bias=True)
  )
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)
      (batchnorm0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation0): PReLU(num_parameters=1)
      (dense1): Linear(

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='valid', max=8, style=ProgressStyle(description_width='initial…

DIEN(
  (embedding:mid): Embedding(13171, 18, padding_idx=0)
  (embedding:cat): Embedding(253, 18, padding_idx=0)
  (attention_pooling:group1): Interest(
    (interest_extractor): GRU(36, 36, batch_first=True)
    (interest_evolution): GRU(36, 36, batch_first=True)
    (attention): Attention(
      (mlp): MLP(
        (_sequential): Sequential(
          (dense0): Linear(in_features=144, out_features=80, bias=True)
          (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation0): PReLU(num_parameters=1)
          (dense1): Linear(in_features=80, out_features=40, bias=True)
          (batchnorm1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation1): PReLU(num_parameters=1)
        )
      )
      (fc): Linear(in_features=40, out_features=1, bias=True)
    )
  )
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)
 

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='valid', max=8, style=ProgressStyle(description_width='initial…

DIEN(
  (embedding:mid): Embedding(13171, 18, padding_idx=0)
  (embedding:cat): Embedding(253, 18, padding_idx=0)
  (attention_pooling:group1): Interest(
    (interest_extractor): GRU(36, 36, batch_first=True)
    (attention): Attention(
      (mlp): MLP(
        (_sequential): Sequential(
          (dense0): Linear(in_features=144, out_features=80, bias=True)
          (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation0): PReLU(num_parameters=1)
          (dense1): Linear(in_features=80, out_features=40, bias=True)
          (batchnorm1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation1): PReLU(num_parameters=1)
        )
      )
      (fc): Linear(in_features=40, out_features=1, bias=True)
    )
    (interest_evolution): GRU(36, 36, batch_first=True)
  )
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)
 

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='valid', max=8, style=ProgressStyle(description_width='initial…

DIEN(
  (embedding:mid): Embedding(13171, 18, padding_idx=0)
  (embedding:cat): Embedding(253, 18, padding_idx=0)
  (attention_pooling:group1): Interest(
    (interest_extractor): GRU(36, 36, batch_first=True)
    (attention): Attention(
      (mlp): MLP(
        (_sequential): Sequential(
          (dense0): Linear(in_features=144, out_features=80, bias=True)
          (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation0): PReLU(num_parameters=1)
          (dense1): Linear(in_features=80, out_features=40, bias=True)
          (batchnorm1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation1): PReLU(num_parameters=1)
        )
      )
      (fc): Linear(in_features=40, out_features=1, bias=True)
    )
    (interest_evolution): DynamicGRU(
      (rnn): AttentionGRUCell()
    )
  )
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_featur

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='valid', max=8, style=ProgressStyle(description_width='initial…

DIEN(
  (embedding:mid): Embedding(13171, 18, padding_idx=0)
  (embedding:cat): Embedding(253, 18, padding_idx=0)
  (attention_pooling:group1): Interest(
    (interest_extractor): GRU(36, 36, batch_first=True)
    (attention): Attention(
      (mlp): MLP(
        (_sequential): Sequential(
          (dense0): Linear(in_features=144, out_features=80, bias=True)
          (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation0): PReLU(num_parameters=1)
          (dense1): Linear(in_features=80, out_features=40, bias=True)
          (batchnorm1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation1): PReLU(num_parameters=1)
        )
      )
      (fc): Linear(in_features=40, out_features=1, bias=True)
    )
    (interest_evolution): DynamicGRU(
      (rnn): AttentionUpdateGateGRUCell()
    )
  )
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, 

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='valid', max=8, style=ProgressStyle(description_width='initial…

In [30]:
print(scores)

[0.5343889520090892, 0.4807652301931478, 0.476430606007265, 0.4800511273623401, 0.518710694339985, 0.5068409851018547, 0.5230373173737018, 0.4922608855675218]


In [31]:
number_features = []

category_features = [
    Category('mid', CategoryEncoder(min_cnt=1, word2idx=mid_word2idx, idx2word=mid_idx2word)),
    Category('cat', CategoryEncoder(min_cnt=1, word2idx=cat_word2idx, idx2word=cat_idx2word)),
]

sequence_features = [
    Sequence('hist_mids', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=mid_word2idx, idx2word=mid_idx2word)),
    Sequence('hist_cats', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=cat_word2idx, idx2word=cat_idx2word)),
    Sequence('neg_hist_mids', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=mid_word2idx, idx2word=mid_idx2word)),
    Sequence('neg_hist_cats', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=cat_word2idx, idx2word=cat_idx2word))
]

features, train_loader, valid_loader = create_dataloader_fn(
    number_features, category_features, sequence_features, BATCH_SIZE, train_df, 'label', valid_df, 4)

In [32]:
augru_attention_groups_with_neg = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids', 'neg_hist': 'neg_hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats', 'neg_hist': 'neg_hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0, gru_type='AUGRU')]

In [33]:
models = [
    DIEN(features, augru_attention_groups_with_neg, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
         final_activation='sigmoid', dropout=DNN_DROPOUT, use_negsampling=True,
         embedding_ref=embedding_ref)
]

In [34]:
scores = []
for model in models:
    print(model)
    loss_func = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=optimizer, mode='min', factor=0.5, patience=5)
    
    fit(EPOCH, model, loss_func, optimizer,
        train_loader, valid_loader, scheduler, notebook=True, auxiliary_loss_rate=1)
    
    scores.append(evaluation(valid_df, valid_loader))

DIEN(
  (embedding:mid): Embedding(13171, 18, padding_idx=0)
  (embedding:cat): Embedding(253, 18, padding_idx=0)
  (embedding:neg_hist_mids): Embedding(13171, 18, padding_idx=0)
  (embedding:neg_hist_cats): Embedding(253, 18, padding_idx=0)
  (attention_pooling:group1): Interest(
    (interest_extractor): GRU(36, 36, batch_first=True)
    (auxiliary_net): AuxiliaryNet(
      (_sequential): Sequential(
        (dense0): Linear(in_features=72, out_features=50, bias=True)
        (activation0): Sigmoid()
        (dense1): Linear(in_features=50, out_features=20, bias=True)
        (activation1): Sigmoid()
        (final_layer): Linear(in_features=20, out_features=1, bias=True)
      )
    )
    (attention): Attention(
      (mlp): MLP(
        (_sequential): Sequential(
          (dense0): Linear(in_features=144, out_features=80, bias=True)
          (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation0): PReLU(num_parameters=

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8, style=ProgressStyle(description_width='initial…

HBox(children=(IntProgress(value=0, description='valid', max=8, style=ProgressStyle(description_width='initial…

In [35]:
print(scores)

[0.5005380774831576]
