# script to do experiments described in paper: Deep Interest Evolution Network for Click-Through Rate Prediction

## how to run
please download data.tar.gz, data1.tar.gz and data2.tar.gz from https://github.com/mouna99/dien and decompress them to the same folder with this script.


In [1]:
SEQ_MAX_LEN = 100 # maximum sequence length
BATCH_SIZE = 128
MBEDDING_DIM = 18
DNN_HIDDEN_SIZE = [200, 80]
DNN_DROPOUT = 0.0
TEST_RUN = False
EPOCH = 1

In [2]:
%matplotlib inline

import itertools
from collections import Counter

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score

from prediction_flow.features import Number, Category, Sequence, Features
from prediction_flow.transformers.column import (
    StandardScaler, CategoryEncoder, SequenceEncoder)

from prediction_flow.pytorch.data import Dataset
from prediction_flow.pytorch import WideDeep, DeepFM, DNN, DIN, DIEN, AttentionGroup, EmbeddingRef

from prediction_flow.pytorch.functions import fit, predict, create_dataloader_fn

In [3]:
train_df = pd.read_csv(
    "./local_train.csv", sep='\t')

valid_df = pd.read_csv(
    "./local_test.csv", sep='\t')

In [4]:
if TEST_RUN:
    train_df = train_df.sample(1000)
    valid_df = valid_df.sample(1000)

In [5]:
train_df.head()

Unnamed: 0,label,uid,mid,cat,hist_mids,hist_cats,neg_hist_mids,neg_hist_cats
0,0,AZPJ9LUT0FEPY,B00AMNNTIA,Literature & Fiction,030774443400622483910470530707097892462215...,BooksBooksBooksBooksBooks,B0085JHNRG0807592072B00K6WT986007147533808...,EspionageBooksLiterature & FictionBooksBooks
1,1,AZPJ9LUT0FEPY,0800731603,Books,030774443400622483910470530707097892462215...,BooksBooksBooksBooksBooks,082542673119357261880985350962072786265008...,BooksBooksBooksBooksBooks
2,0,A2NRV79GKAU726,B003NNV10O,Russian,0814472869007146207415839423000812538366B0...,BooksBooksBooksBooksBakingBooksBooks,1609801385B00GY4OTHQ1479256447B009FRSCTKB0...,BooksLiterature & FictionBooksLiterature & ...
3,1,A2NRV79GKAU726,B000UWJ91O,Books,0814472869007146207415839423000812538366B0...,BooksBooksBooksBooksBakingBooksBooks,0373362463B00I9FTOWI1591797101052595318318...,"BooksHealth, Fitness & DietingBooksBooksBo..."
4,0,A2GEQVDX2LL4V3,0321334094,Books,0743596870037428099114391406340976475731,BooksBooksBooksBooks,08129818980312306644B004QGYE5Q0762313056,BooksBooksGayBooks


In [6]:
valid_df.head()

Unnamed: 0,label,uid,mid,cat,hist_mids,hist_cats,neg_hist_mids,neg_hist_cats
0,0,A3BI7R43VUZ1TY,B00JNHU0T2,Literature & Fiction,0989464105B00B01691C14778097321608442845,BooksLiterature & FictionBooksBooks,0140114335141659786705171482930425198855,BooksBooksBooksBooks
1,1,A3BI7R43VUZ1TY,0989464121,Books,0989464105B00B01691C14778097321608442845,BooksLiterature & FictionBooksBooks,00616625850310328195B00HIBIAS80800732049,BooksBooksTravelers & ExplorersBooks
2,0,A2Z3AHJPXG3ZNP,B0072YSPJ0,Literature & Fiction,147831096014922314521477603425B00FRKLA6Q,BooksBooksBooksUrban,B00BA99IJE159963593300608978481595554386,EroticaBooksBooksBooks
3,1,A2Z3AHJPXG3ZNP,B00G4I4I5U,Urban,147831096014922314521477603425B00FRKLA6Q,BooksBooksBooksUrban,B007K1OF1C1590771540B00DZV25QS1600571476,BreedsBooksLiterature & FictionBooks
4,0,A2KDDPJUNWC5CA,0316228532,Books,0141326085031026622X0316077046098864917914...,BooksBooksBooksBooksBooks,B00LANEVEWB009HUVEGIB00A90PN120671692658B0...,Urban LifeSports MedicineContemporary Women...


# EDA

In [7]:
def scale_eda(df):
    print(df.shape)
    print(df.uid.nunique())
    print(df.mid.nunique())
    print(df.groupby('label', as_index=False).uid.count())

In [8]:
scale_eda(train_df)
scale_eda(valid_df)

(1086120, 8)
543060
261895
   label     uid
0      0  543060
1      1  543060
(121216, 8)
60608
75053
   label    uid
0      0  60608
1      1  60608


In [9]:
train_df.values[0][4].split("")

['0307744434', '0062248391', '0470530707', '0978924622', '1590516400']

**This data set is well balanced. Each user has two samples, pos sample and neg sample.**

In [10]:
unique_cats = Counter(train_df.cat.values.tolist())

In [11]:
unique_cats_in_hist = Counter(
    itertools.chain(*train_df.hist_cats.apply(lambda x: x.split("")).values.tolist()))

In [12]:
print(len(unique_cats), len(unique_cats_in_hist),
      len(np.intersect1d(list(unique_cats.keys()), list(unique_cats_in_hist.keys()))))

1459 1600 1459


**All categorys also appear in history categorys.**

In [13]:
unique_mids = Counter(train_df.mid.values.tolist())

In [14]:
unique_mids_in_hist = Counter(
    itertools.chain(*train_df.hist_mids.apply(lambda x: x.split("")).values.tolist()))

In [15]:
print(len(unique_mids), len(unique_mids_in_hist),
      len(np.intersect1d(list(unique_mids.keys()), list(unique_mids_in_hist.keys()))))

261895 367788 261701


**Most mids appears in history mids.**

In [16]:
print("There are {}% mid overlap between train and valid".format(
    100 * len(np.intersect1d(train_df.mid.unique(), valid_df.mid.unique())) / len(valid_df.mid.unique())))

There are 86.27769709405354% mid overlap between train and valid


In [17]:
print("There are {}% mid overlap between train and valid".format(
    100 * len(np.intersect1d(train_df.cat.unique(), valid_df.cat.unique())) / len(valid_df.cat.unique())))

There are 97.91208791208791% mid overlap between train and valid


# define features

In [18]:
cat_enc = SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN)

In [19]:
cat_enc.fit(train_df.hist_cats.values)

<prediction_flow.transformers.column.sequence_encoder.SequenceEncoder at 0x12ecaf470>

In [20]:
cat_word2idx, cat_idx2word = cat_enc.word2idx, cat_enc.idx2word

In [21]:
print(len(cat_word2idx))

1602


In [22]:
mid_enc = SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN)

In [23]:
mid_enc.fit(np.vstack([train_df.mid.values, train_df.hist_mids.values]))

<prediction_flow.transformers.column.sequence_encoder.SequenceEncoder at 0x133284eb8>

In [24]:
mid_word2idx, mid_idx2word = mid_enc.word2idx, mid_enc.idx2word

In [25]:
print(len(mid_word2idx))

367984


In [26]:
number_features = []

category_features = [
    Category('mid', CategoryEncoder(min_cnt=1, word2idx=mid_word2idx, idx2word=mid_idx2word)),
    Category('cat', CategoryEncoder(min_cnt=1, word2idx=cat_word2idx, idx2word=cat_idx2word)),
]

sequence_features = [
    Sequence('hist_mids', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=mid_word2idx, idx2word=mid_idx2word)),
    Sequence('hist_cats', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=cat_word2idx, idx2word=cat_idx2word))
]

features, train_loader, valid_loader = create_dataloader_fn(
    number_features, category_features, sequence_features, BATCH_SIZE, train_df, 'label', valid_df, 4)

In [27]:
def evaluation(df, dataloader):
    preds = predict(model, dataloader)
    return roc_auc_score(df['label'], preds.ravel())

In [28]:
embedding_ref = EmbeddingRef(
    {'hist_mids': 'mid', 'hist_cats': 'cat'})

din_attention_groups = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0)]

gru_attention_groups = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0, gru_type='GRU')]

aigru_attention_groups = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0, gru_type='AIGRU')]

agru_attention_groups = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0, gru_type='AGRU')]

augru_attention_groups = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0, gru_type='AUGRU')]

models = [
    DNN(features, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
        final_activation='sigmoid', dropout=DNN_DROPOUT,
        embedding_ref=embedding_ref),
    
    WideDeep(features,
             wide_features=['mid', 'hist_mids', 'cat', 'hist_cats'],
             deep_features=['mid', 'hist_mids', 'cat', 'hist_cats'],
             cross_features=[('mid', 'hist_mids'), ('cat', 'hist_cats')],
             num_classes=2, embedding_size=MBEDDING_DIM, hidden_layers=DNN_HIDDEN_SIZE,
             final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref),
    
    DeepFM(features, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE, 
           final_activation='sigmoid', dropout=DNN_DROPOUT,
           embedding_ref=embedding_ref),
    
    DIN(features, din_attention_groups, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
        final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref),
    
    DIEN(features, gru_attention_groups, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
         final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref),
    
    DIEN(features, aigru_attention_groups, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
         final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref),
    
    DIEN(features, agru_attention_groups, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
         final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref),
    
    DIEN(features, augru_attention_groups, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
         final_activation='sigmoid', dropout=DNN_DROPOUT, embedding_ref=embedding_ref)
]

In [29]:
scores = []
for model in models:
    print(model)
    loss_func = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=optimizer, mode='min', factor=0.5, patience=5)
    
    fit(EPOCH, model, loss_func, optimizer,
        train_loader, valid_loader, scheduler, notebook=True, auxiliary_loss_rate=1)
    
    scores.append(evaluation(valid_df, valid_loader))

DNN(
  (embedding:mid): Embedding(367984, 18, padding_idx=0)
  (embedding:cat): Embedding(1602, 18, padding_idx=0)
  (pooling:hist_mids): MaxPooling()
  (pooling:hist_cats): MaxPooling()
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)
      (batchnorm0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation0): ReLU()
      (dense1): Linear(in_features=200, out_features=80, bias=True)
      (batchnorm1): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation1): ReLU()
    )
  )
  (final_layer): Linear(in_features=80, out_features=1, bias=True)
)


HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8486, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='valid', max=947, style=ProgressStyle(description_width='initi…

WideDeep(
  (embedding:mid): Embedding(367984, 18, padding_idx=0)
  (embedding:cat): Embedding(1602, 18, padding_idx=0)
  (pooling:hist_mids): SumPooling()
  (pooling:hist_cats): SumPooling()
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)
      (batchnorm0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation0): ReLU()
      (dense1): Linear(in_features=200, out_features=80, bias=True)
      (batchnorm1): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation1): ReLU()
    )
  )
  (final_layer): Linear(in_features=188, out_features=1, bias=True)
)


HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8486, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='valid', max=947, style=ProgressStyle(description_width='initi…

DeepFM(
  (embedding:mid): Embedding(367984, 18, padding_idx=0)
  (embedding:cat): Embedding(1602, 18, padding_idx=0)
  (pooling:hist_mids): MaxPooling()
  (pooling:hist_cats): MaxPooling()
  (fm): FM()
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)
      (batchnorm0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation0): ReLU()
      (dense1): Linear(in_features=200, out_features=80, bias=True)
      (batchnorm1): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation1): ReLU()
    )
  )
  (final_layer): Linear(in_features=81, out_features=1, bias=True)
)


HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8486, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='valid', max=947, style=ProgressStyle(description_width='initi…

DIN(
  (embedding:mid): Embedding(367984, 18, padding_idx=0)
  (embedding:cat): Embedding(1602, 18, padding_idx=0)
  (attention_pooling:group1): Attention(
    (mlp): MLP(
      (_sequential): Sequential(
        (dense0): Linear(in_features=144, out_features=80, bias=True)
        (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation0): PReLU(num_parameters=1)
        (dense1): Linear(in_features=80, out_features=40, bias=True)
        (batchnorm1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation1): PReLU(num_parameters=1)
      )
    )
    (fc): Linear(in_features=40, out_features=1, bias=True)
  )
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)
      (batchnorm0): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (activation0): PReLU(num_parameters=1)
      (dense1): Linea

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8486, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='valid', max=947, style=ProgressStyle(description_width='initi…

DIEN(
  (embedding:mid): Embedding(367984, 18, padding_idx=0)
  (embedding:cat): Embedding(1602, 18, padding_idx=0)
  (attention_pooling:group1): Interest(
    (interest_extractor): GRU(36, 36, batch_first=True)
    (interest_evolution): GRU(36, 36, batch_first=True)
    (attention): Attention(
      (mlp): MLP(
        (_sequential): Sequential(
          (dense0): Linear(in_features=144, out_features=80, bias=True)
          (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation0): PReLU(num_parameters=1)
          (dense1): Linear(in_features=80, out_features=40, bias=True)
          (batchnorm1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation1): PReLU(num_parameters=1)
        )
      )
      (fc): Linear(in_features=40, out_features=1, bias=True)
    )
  )
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8486, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='valid', max=947, style=ProgressStyle(description_width='initi…

DIEN(
  (embedding:mid): Embedding(367984, 18, padding_idx=0)
  (embedding:cat): Embedding(1602, 18, padding_idx=0)
  (attention_pooling:group1): Interest(
    (interest_extractor): GRU(36, 36, batch_first=True)
    (attention): Attention(
      (mlp): MLP(
        (_sequential): Sequential(
          (dense0): Linear(in_features=144, out_features=80, bias=True)
          (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation0): PReLU(num_parameters=1)
          (dense1): Linear(in_features=80, out_features=40, bias=True)
          (batchnorm1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation1): PReLU(num_parameters=1)
        )
      )
      (fc): Linear(in_features=40, out_features=1, bias=True)
    )
    (interest_evolution): GRU(36, 36, batch_first=True)
  )
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_features=200, bias=True)

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8486, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='valid', max=947, style=ProgressStyle(description_width='initi…

DIEN(
  (embedding:mid): Embedding(367984, 18, padding_idx=0)
  (embedding:cat): Embedding(1602, 18, padding_idx=0)
  (attention_pooling:group1): Interest(
    (interest_extractor): GRU(36, 36, batch_first=True)
    (attention): Attention(
      (mlp): MLP(
        (_sequential): Sequential(
          (dense0): Linear(in_features=144, out_features=80, bias=True)
          (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation0): PReLU(num_parameters=1)
          (dense1): Linear(in_features=80, out_features=40, bias=True)
          (batchnorm1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation1): PReLU(num_parameters=1)
        )
      )
      (fc): Linear(in_features=40, out_features=1, bias=True)
    )
    (interest_evolution): DynamicGRU(
      (rnn): AttentionGRUCell()
    )
  )
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72, out_feat

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8486, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='valid', max=947, style=ProgressStyle(description_width='initi…

DIEN(
  (embedding:mid): Embedding(367984, 18, padding_idx=0)
  (embedding:cat): Embedding(1602, 18, padding_idx=0)
  (attention_pooling:group1): Interest(
    (interest_extractor): GRU(36, 36, batch_first=True)
    (attention): Attention(
      (mlp): MLP(
        (_sequential): Sequential(
          (dense0): Linear(in_features=144, out_features=80, bias=True)
          (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation0): PReLU(num_parameters=1)
          (dense1): Linear(in_features=80, out_features=40, bias=True)
          (batchnorm1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation1): PReLU(num_parameters=1)
        )
      )
      (fc): Linear(in_features=40, out_features=1, bias=True)
    )
    (interest_evolution): DynamicGRU(
      (rnn): AttentionUpdateGateGRUCell()
    )
  )
  (mlp): MLP(
    (_sequential): Sequential(
      (dense0): Linear(in_features=72

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8486, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='valid', max=947, style=ProgressStyle(description_width='initi…

In [30]:
print(scores)

[0.6959951799469094, 0.6801293430003456, 0.6877024650842773, 0.7230858916723679, 0.7165479448511594, 0.7239262178293835, 0.7261833117355622, 0.7041949819671834]


In [31]:
number_features = []

category_features = [
    Category('mid', CategoryEncoder(min_cnt=1, word2idx=mid_word2idx, idx2word=mid_idx2word)),
    Category('cat', CategoryEncoder(min_cnt=1, word2idx=cat_word2idx, idx2word=cat_idx2word)),
]

sequence_features = [
    Sequence('hist_mids', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=mid_word2idx, idx2word=mid_idx2word)),
    Sequence('hist_cats', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=cat_word2idx, idx2word=cat_idx2word)),
    Sequence('neg_hist_mids', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=mid_word2idx, idx2word=mid_idx2word)),
    Sequence('neg_hist_cats', SequenceEncoder(sep="", min_cnt=1, max_len=SEQ_MAX_LEN,
                                          word2idx=cat_word2idx, idx2word=cat_idx2word))
]

features, train_loader, valid_loader = create_dataloader_fn(
    number_features, category_features, sequence_features, BATCH_SIZE, train_df, 'label', valid_df, 4)

In [32]:
augru_attention_groups_with_neg = [
    AttentionGroup(
        name='group1',
        pairs=[{'ad': 'mid', 'pos_hist': 'hist_mids', 'neg_hist': 'neg_hist_mids'},
               {'ad': 'cat', 'pos_hist': 'hist_cats', 'neg_hist': 'neg_hist_cats'}],
        hidden_layers=[80, 40], att_dropout=0.0, gru_type='AUGRU')]

In [33]:
models = [
    DIEN(features, augru_attention_groups_with_neg, 2, MBEDDING_DIM, DNN_HIDDEN_SIZE,
         final_activation='sigmoid', dropout=DNN_DROPOUT, use_negsampling=True,
         embedding_ref=embedding_ref)
]

In [34]:
scores = []
for model in models:
    print(model)
    loss_func = nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer=optimizer, mode='min', factor=0.5, patience=5)
    
    fit(EPOCH, model, loss_func, optimizer,
        train_loader, valid_loader, scheduler, notebook=True, auxiliary_loss_rate=1)
    
    scores.append(evaluation(valid_df, valid_loader))

DIEN(
  (embedding:mid): Embedding(367984, 18, padding_idx=0)
  (embedding:cat): Embedding(1602, 18, padding_idx=0)
  (embedding:neg_hist_mids): Embedding(367984, 18, padding_idx=0)
  (embedding:neg_hist_cats): Embedding(1602, 18, padding_idx=0)
  (attention_pooling:group1): Interest(
    (interest_extractor): GRU(36, 36, batch_first=True)
    (auxiliary_net): AuxiliaryNet(
      (_sequential): Sequential(
        (dense0): Linear(in_features=72, out_features=50, bias=True)
        (activation0): Sigmoid()
        (dense1): Linear(in_features=50, out_features=20, bias=True)
        (activation1): Sigmoid()
        (final_layer): Linear(in_features=20, out_features=1, bias=True)
      )
    )
    (attention): Attention(
      (mlp): MLP(
        (_sequential): Sequential(
          (dense0): Linear(in_features=144, out_features=80, bias=True)
          (batchnorm0): BatchNorm1d(80, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (activation0): PReLU(num_paramet

HBox(children=(IntProgress(value=0, description='training routine', max=1, style=ProgressStyle(description_wid…

HBox(children=(IntProgress(value=0, description='train', max=8486, style=ProgressStyle(description_width='init…

HBox(children=(IntProgress(value=0, description='valid', max=947, style=ProgressStyle(description_width='initi…

In [35]:
print(scores)

[0.7231377737568615]
