In [1]:
import os

import logging
logging.basicConfig(level=logging.INFO)

import cudf
import dask_cudf
import nvtabular as nvt
from nvtabular import dataset as ds

In [2]:
dask_client_fn = ds.create_multi_gpu_dask_client_fn(gpu_ids=[0],
                                                    part_mem_frac=0.10,
                                                    dashboard_port="8787",
                                                    dask_dir=os.path.join("/romeyn/data", "dask-space"))

INFO:numba.cuda.cudadrv.driver:init


In [3]:
dataset = ds.Criteo("/romeyn/data", client_fn=dask_client_fn)
data = dataset.transform(part_size=dask_client_fn.part_size, for_training=True)

data

INFO:nvtabular:Loading Transforming dataset from cache...


namespace(train='/romeyn/data/criteo/transformed/394830a4d1d4ecdacdf28690de306e42_12490ee037e0aaafc0cc37e584695fb8',
          eval='/romeyn/data/criteo/transformed/110888a6405644d371f2d19cbda9a35f_12490ee037e0aaafc0cc37e584695fb8')

In [4]:
os.environ["TF_MEMORY_ALLOCATION"] = "0.7"

import tensorflow as tf
from merlin_models import tf as mtf

In [6]:
dataloader_kwargs = dict(batch_size=10000, separate_labels=True)

train_data = mtf.DataLoader.from_directory(data.train, **dataloader_kwargs)
eval_data = mtf.DataLoader.from_directory(data.eval, **dataloader_kwargs)

train_data.output_shapes

{'C1': TensorShape([10000, 1]),
 'C2': TensorShape([10000, 1]),
 'C3': TensorShape([10000, 1]),
 'C4': TensorShape([10000, 1]),
 'C5': TensorShape([10000, 1]),
 'C6': TensorShape([10000, 1]),
 'C7': TensorShape([10000, 1]),
 'C8': TensorShape([10000, 1]),
 'C9': TensorShape([10000, 1]),
 'C10': TensorShape([10000, 1]),
 'C11': TensorShape([10000, 1]),
 'C12': TensorShape([10000, 1]),
 'C13': TensorShape([10000, 1]),
 'C14': TensorShape([10000, 1]),
 'C15': TensorShape([10000, 1]),
 'C16': TensorShape([10000, 1]),
 'C17': TensorShape([10000, 1]),
 'C18': TensorShape([10000, 1]),
 'C19': TensorShape([10000, 1]),
 'C20': TensorShape([10000, 1]),
 'C21': TensorShape([10000, 1]),
 'C22': TensorShape([10000, 1]),
 'C23': TensorShape([10000, 1]),
 'C24': TensorShape([10000, 1]),
 'C25': TensorShape([10000, 1]),
 'C26': TensorShape([10000, 1]),
 'I1': TensorShape([10000, 1]),
 'I2': TensorShape([10000, 1]),
 'I3': TensorShape([10000, 1]),
 'I4': TensorShape([10000, 1]),
 'I5': TensorShape([100

In [7]:
bottom_mlp = mtf.MLPBlock([64, 16])
top_mlp = mtf.MLPBlock([256, 128, 64])
dlrm_block = mtf.DLRMBlock.from_column_group(train_data.columns, bottom_mlp, top_mlp=top_mlp)

head = mtf.Head.from_column_group(train_data.columns)
model = dlrm_block.to_model(head=head)

model

DLRMBlockWithHead(
  (block): DLRMBlock(
    (stack_features): MergeTabular(
      (to_merge): List(
        (0): EmbeddingFeatures(
          (convert_to_sparse): AsSparseFeatures()
          (embeddings): Dict(
            (C1): TableConfig(vocabulary_size=9999999, dim=16, initializer=None, optimizer=None, combiner='mean', name='C1')
            (C2): TableConfig(vocabulary_size=29427, dim=16, initializer=None, optimizer=None, combiner='mean', name='C2')
            (C3): TableConfig(vocabulary_size=15127, dim=16, initializer=None, optimizer=None, combiner='mean', name='C3')
            (C4): TableConfig(vocabulary_size=7295, dim=16, initializer=None, optimizer=None, combiner='mean', name='C4')
            (C5): TableConfig(vocabulary_size=19901, dim=16, initializer=None, optimizer=None, combiner='mean', name='C5')
            (C6): TableConfig(vocabulary_size=3, dim=16, initializer=None, optimizer=None, combiner='mean', name='C6')
            (C7): TableConfig(vocabulary_size=6465, 