In [7]:
%matplotlib inline
import torch
import torchvision
from IPython import display
from matplotlib import pyplot as plt
import numpy as np
import random
import tqdm
import sys
sys.path.insert(0, '../../')
from utils import data_utils
from model import dfm

roc_auc_score

In [19]:
data_set = data_utils.AvazuDataSet('../../data/avazu_sample.txt')
feat_dims = data_set.get_feat_dims()
feat_dims

[999,
 1,
 4,
 2,
 122,
 109,
 10,
 75,
 15,
 7,
 127,
 909,
 341,
 4,
 3,
 105,
 3,
 3,
 68,
 4,
 27,
 54,
 21]

In [21]:
### Train
def run_train(model, optimizer, data_loader, criterion, device, log_interval=1000):
    model.train()
    total_loss = 0
    for i, (fields, target) in enumerate(tqdm.tqdm(data_loader, smoothing=0, mininterval=1.0)):
        fields, target = fields.to(device), target.to(device)
        y = model(fields)
        loss = criterion(y, target.float())
        model.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        if (i + 1) % log_interval == 0:
            print('    - loss:', total_loss / log_interval)
            total_loss = 0

def run_test(model, data_loader, device):
    model.eval()
    targets, predicts = list(), list()
    with torch.no_grad():
        for fields, target in tqdm.tqdm(data_loader, smoothing=0, mininterval=1.0):
            fields, target = fields.to(device), target.to(device)
            y = model(fields)
            targets.extend(target.tolist())
            predicts.extend(y.tolist())
    return roc_auc_score(targets, predicts)

In [22]:
train_length = int(len(data_set) * 0.8)
valid_length = int(len(data_set) * 0.1)
test_length = len(data_set) - train_length - valid_length
train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split(
    data_set, (train_length, valid_length, test_length))
print(len(train_dataset), len(valid_dataset), len(test_dataset))

799 99 101


In [24]:
# config
config = {
    'batch_size': 32,
    'lr':1e-3,
    'weight_decay': 1e-6,
    'epoch':3,
    'device':'cpu'
}
device = torch.device(config['device'])

In [25]:
train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True,num_workers=8)
valid_data_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=8)
test_data_loader = torch.utils.data.DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=True, num_workers=8)

criterion = torch.nn.BCELoss()
optimizer = torch.optim.Adam(params=model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])

In [27]:
model = dfm.DeepFactorizationMachineModel(feat_dims, 16, [16, 16], 0.2)
print(model)

DeepFactorizationMachineModel(
  (embedding): FeaturesEmbedding(
    (embedding): Embedding(3013, 16)
  )
  (linear): FeaturesLinear(
    (fc): Embedding(3013, 1)
  )
  (fm): FactorizationMachine()
  (mlp): MultiLayerPerceptron(
    (mlp): Sequential(
      (0): Linear(in_features=368, out_features=16, bias=True)
      (1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU()
      (3): Dropout(p=0.2, inplace=False)
      (4): Linear(in_features=16, out_features=16, bias=True)
      (5): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (6): ReLU()
      (7): Dropout(p=0.2, inplace=False)
      (8): Linear(in_features=16, out_features=1, bias=True)
    )
  )
)


In [28]:
for epoch_i in range(config['epoch']):
    run_train(model, optimizer, train_data_loader, criterion, device)
    auc = run_test(model, valid_data_loader, device)
    print('epoch:', epoch_i, 'validation: auc:', auc)

100%|██████████| 25/25 [00:01<00:00, 21.98it/s]
100%|██████████| 4/4 [00:00<00:00, 25.48it/s]


NameError: name 'roc_auc_score' is not defined