In [1]:
!pip install deepctr-torch
import pandas as pd
import torch
from sklearn.metrics import log_loss, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler

from deepctr_torch.inputs import SparseFeat, DenseFeat, get_feature_names
from deepctr_torch.models import *

Collecting deepctr-torch
[?25l  Downloading https://files.pythonhosted.org/packages/e7/6b/65430c8704c8608e939085a83a4997a0b60b286ab43f0f393176bfc239e1/deepctr_torch-0.2.6-py3-none-any.whl (63kB)
[K     |█████▏                          | 10kB 26.8MB/s eta 0:00:01[K     |██████████▎                     | 20kB 33.0MB/s eta 0:00:01[K     |███████████████▍                | 30kB 20.3MB/s eta 0:00:01[K     |████████████████████▌           | 40kB 16.5MB/s eta 0:00:01[K     |█████████████████████████▊      | 51kB 14.7MB/s eta 0:00:01[K     |██████████████████████████████▉ | 61kB 13.2MB/s eta 0:00:01[K     |████████████████████████████████| 71kB 6.8MB/s 
Installing collected packages: deepctr-torch
Successfully installed deepctr-torch-0.2.6


In [2]:
from deepctr_torch.layers import *
from deepctr_torch.inputs import *

In [3]:
from deepctr_torch.models.basemodel import BaseModel

In [4]:
class DeepAFM(BaseModel):
  def __init__(self,linear_feature_columns, dnn_feature_columns, use_fm=False,dnn_hidden_units=(256, 128),l2_reg_linear=0.00001, l2_reg_embedding=0.00001, l2_reg_dnn=0, init_std=0.0001, seed=1024,dnn_dropout=0,
        dnn_activation='relu', dnn_use_bn=False, use_attention=True, attention_factor=8, l2_reg_att=1e-5, afm_dropout=0, task='binary', device='cpu', gpus=None):
    super(DeepAFM,self).__init__(linear_feature_columns, dnn_feature_columns, l2_reg_linear=l2_reg_linear,l2_reg_embedding=l2_reg_embedding, init_std=init_std, seed=seed, task=task, device=device, gpus=gpus)
    self.use_attention = use_attention
    self.use_fm = use_fm
    self.use_dnn = len(dnn_feature_columns) > 0 and len(dnn_hidden_units) > 0
    if use_fm:
      self.fm = FM()
    if self.use_dnn:
      self.dnn = DNN(self.compute_input_dim(dnn_feature_columns), dnn_hidden_units,
                           activation=dnn_activation, l2_reg=l2_reg_dnn, dropout_rate=dnn_dropout, use_bn=dnn_use_bn,
                           init_std=init_std, device=device)
      self.dnn_linear = nn.Linear(dnn_hidden_units[-1], 1, bias=False).to(device)

      self.add_regularization_weight(filter(lambda x: 'weight' in x[0] and 'bn' not in x[0], self.dnn.named_parameters()), l2=l2_reg_dnn)
      self.add_regularization_weight(self.dnn_linear.weight, l2=l2_reg_dnn)
    if use_attention:
      self.fm = AFMLayer(self.embedding_size, attention_factor, l2_reg_att, afm_dropout, seed, device)
      self.add_regularization_weight(self.fm.attention_W, l2=l2_reg_att)
    self.to(device)
  def forward(self, X):
    sparse_embedding_list, dense_value_list = self.input_from_feature_columns(X, self.dnn_feature_columns,self.embedding_dict)
    logit = self.linear_model(X)
    if self.use_fm and len(sparse_embedding_list) > 0:
      fm_input = torch.cat(sparse_embedding_list, dim=1)
      logit += self.fm(fm_input)
    if self.use_dnn:
      dnn_input = combined_dnn_input(sparse_embedding_list, dense_value_list)
      dnn_output = self.dnn(dnn_input)
      dnn_logit = self.dnn_linear(dnn_output)
      logit += dnn_logit
    if self.use_attention:
      logit += self.fm(sparse_embedding_list)
    y_pred = self.out(logit)

    return y_pred

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
path = '/content/drive//MyDrive/cs547/'
sparse_features = ['C' + str(i) for i in range(1, 27)]
dense_features = ['I' + str(i) for i in range(1, 14)]
header_list = dense_features+sparse_features
data = pd.read_csv(path+'criteo_sampled_data.csv')
from collections import Counter
Counter(data['label'])

Counter({0: 446178, 1: 153822})

In [7]:
data[sparse_features] = data[sparse_features].fillna('-1', )
data[dense_features] = data[dense_features].fillna(0, )
target = ['label']
for feat in sparse_features:
  lbe = LabelEncoder()
  data[feat] = lbe.fit_transform(data[feat])
mms = MinMaxScaler(feature_range=(0, 1))
data[dense_features] = mms.fit_transform(data[dense_features])

In [8]:
fixlen_feature_columns = [SparseFeat(feat, vocabulary_size=data[feat].max() + 1,embedding_dim=4) for i,feat in enumerate(sparse_features)] + [DenseFeat(feat, 1,) for feat in dense_features]

dnn_feature_columns = fixlen_feature_columns
linear_feature_columns = fixlen_feature_columns

feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

In [9]:
train_model_input = {name:data[name] for name in feature_names}

In [10]:
device = 'cpu'
use_cuda = True
if use_cuda and torch.cuda.is_available():
    print('cuda ready...')
    device = 'cuda:0'

cuda ready...


In [11]:
model = DeepAFM(linear_feature_columns, dnn_feature_columns, task='binary',device=device)

In [13]:
optim = torch.optim.Adagrad(model.parameters(),lr=1e-3)
model.compile(optimizer=optim,loss="binary_crossentropy",metrics=["binary_crossentropy", "auc"], )

In [14]:
history = model.fit(train_model_input, data[target].values, batch_size=256, epochs=2, verbose=2,validation_split=0.2)

cuda:0
Train on 480000 samples, validate on 120000 samples, 1875 steps per epoch
Epoch 1/2
42s - loss:  0.5305 - binary_crossentropy:  0.5305 - auc:  0.7055 - val_binary_crossentropy:  0.5108 - val_auc:  0.7266
Epoch 2/2
42s - loss:  0.4894 - binary_crossentropy:  0.4894 - auc:  0.7557 - val_binary_crossentropy:  0.4979 - val_auc:  0.7432


In [25]:
optim = torch.optim.Adagrad(model.parameters(),lr=1e-4)
model.compile(optimizer=optim,loss="binary_crossentropy",metrics=["binary_crossentropy", "auc"], )
history = model.fit(train_model_input, data[target].values, batch_size=256, epochs=20, verbose=2,validation_split=0.2)

cuda:0
Train on 480000 samples, validate on 120000 samples, 1875 steps per epoch
Epoch 1/20
43s - loss:  0.4616 - binary_crossentropy:  0.4616 - auc:  0.7882 - val_binary_crossentropy:  0.4864 - val_auc:  0.7618
Epoch 2/20
43s - loss:  0.4564 - binary_crossentropy:  0.4564 - auc:  0.7950 - val_binary_crossentropy:  0.4862 - val_auc:  0.7621
Epoch 3/20
42s - loss:  0.4533 - binary_crossentropy:  0.4533 - auc:  0.7990 - val_binary_crossentropy:  0.4861 - val_auc:  0.7623
Epoch 4/20
42s - loss:  0.4508 - binary_crossentropy:  0.4508 - auc:  0.8021 - val_binary_crossentropy:  0.4860 - val_auc:  0.7624
Epoch 5/20
42s - loss:  0.4486 - binary_crossentropy:  0.4486 - auc:  0.8047 - val_binary_crossentropy:  0.4860 - val_auc:  0.7625
Epoch 6/20
43s - loss:  0.4466 - binary_crossentropy:  0.4466 - auc:  0.8071 - val_binary_crossentropy:  0.4860 - val_auc:  0.7626
Epoch 7/20
43s - loss:  0.4448 - binary_crossentropy:  0.4448 - auc:  0.8093 - val_binary_crossentropy:  0.4861 - val_auc:  0.7626
Ep

In [None]:
history = model.fit(train_model_input, data[target].values, batch_size=256, epochs=20, verbose=2,validation_split=0.2)

In [27]:
import json
json.dump(history.history, open(path+'deepafm.json', 'w'))

In [None]:
history.history