**About :** Generates candidates.

**TODO**:

In [None]:
cd ../src

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
import os
import gc
import sys
import cudf
import json
import glob
import pickle
import warnings
import itertools
import numpy as np
import pandas as pd

from tqdm import tqdm
from datetime import datetime
from collections import Counter
from numerize.numerize import numerize

from merlin.io import Dataset
from torch.optim import SparseAdam
from merlin.loader.torch import Loader

warnings.simplefilter(action="ignore", category=FutureWarning)

In [None]:
from params import *

from utils.load import load_sessions
from utils.metrics import get_coverage

### Params

In [None]:
MODE = "test"

In [None]:
if MODE == "val":
    files = glob.glob("../output/full_train_parquet/*") + glob.glob(
        "../output/val_parquet/*"
    )
elif MODE == "test":
    files = glob.glob("../output/full_train_val_parquet/*") + glob.glob(
        "../output/test_parquet/*"
    )
else:
    raise NotImplementedError

In [None]:
if not os.path.exists(f"../output/matrix_factorization/train-proc-1_{MODE}.parquet"):
    train = load_sessions(files)

    train = train.sort_values(["session", "ts"], ascending=[True, True]).reset_index(
        drop=True
    )

    train[f"ts_diff"] = (
        train.groupby("session")["ts"].shift(1).fillna(0).astype("int32")
    )
    train[f"ts_diff"] = train[f"ts"] - train[f"ts_diff"]
    train[f"ts_diff"] = train[f"ts_diff"].clip(0, 24 * 60 * 60)

    train.loc[train.ts_diff < 2 * 60 * 60, f"ts_diff"] = 0
    train.loc[train.ts_diff >= 2 * 60 * 60, f"ts_diff"] = 1

    train[f"subses"] = train.groupby("session")["ts_diff"].cumsum()
    train[f"subses"] = (train[f"session"] * 128 + train[f"subses"]).factorize()[0]

    for lag in range(11):
        train[f"lag{lag}"] = (
            train.groupby("subses")["aid"].shift(lag).fillna(-1).astype("int32")
        )

    del train["ts_diff"], train["aid"]

    train = train.loc[(train["type"] == 0) & (train["lag1"] >= 0)].reset_index(
        drop=True
    )
    del train["type"]

    train["hour"] = ((train["ts"] - train["ts"].min()) // (8 * 60 * 60)).astype("int8")
    del train["ts"], train["subses"]

    gc.collect()

    train.tail()

    gc.collect()
    train.to_pandas().to_parquet(
        f"../output/matrix_factorization/train-proc-1_{MODE}.parquet"
    )
    gc.collect()

### Utils
- TODO : Cart -> Buy / Buy -> Buy

In [None]:
import torch
import torch.nn as nn
from sklearn.metrics import roc_auc_score

In [None]:
class MatrixFactorization(nn.Module):
    def __init__(self, n_aids=1855602 + 1, n_factors=32):
        super().__init__()
        self.aid_emb = nn.Embedding(1855602 + 128, n_factors, sparse=False)
        self.aid_emb.weight.data.normal_(mean=0.0, std=0.001)
        self.head = nn.Linear(11, 1)

    def forward(self, lags, targets):
        targets = torch.repeat_interleave(targets.view(-1, 1), lags.shape[1], dim=1)
        tgt = self.aid_emb(targets)
        tgt = torch.nn.functional.normalize(tgt, p=2.0, dim=2, eps=1e-12)

        e1 = self.aid_emb(lags)
        e1 = torch.nn.functional.normalize(e1, p=2.0, dim=2, eps=1e-12)
        e1 = e1 * tgt
        e1 = e1.sum(2)

        out = self.head(e1)
        return out.sigmoid()

In [None]:
def train_loop(model, iterator, optimizer, clip, device="cuda"):
    criterion = nn.BCELoss().to(device)
    model.train()
    epoch_loss = 0.0
    gc.collect()
    with tqdm(enumerate(iterator), total=len(iterator), miniters=100) as pbar:
        for i, (data, target) in pbar:
            data, target = data.to(device), target.to(device)

            optimizer.zero_grad()
            output_pos = model(data, target[:, 0])

            # Shuffle target to build negative samples
            target = target[torch.randperm(target.shape[0])]
            output_neg = model(data, target[:, 0])

            outputs = torch.cat([output_pos, output_neg])
            targets = torch.cat(
                [torch.ones_like(output_pos), torch.zeros_like(output_neg)]
            )
            loss = criterion(outputs, targets)

            del data, target

            loss.backward()
            # torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
            optimizer.step()

            epoch_loss += loss.item()
            cumloss = epoch_loss / (i + 1)
            pbar.set_description(f"Loss {cumloss:.5f}")

    gc.collect()
    return epoch_loss / len(iterator)


def valid_loop(model, iterator, device="cuda"):
    ypred = []
    ytarget = []
    epoch_loss = 0.0

    model.eval()
    criterion = nn.BCELoss().to(device)
    with torch.no_grad():
        with tqdm(enumerate(iterator), total=len(iterator), miniters=50) as pbar:
            for i, (data, target) in pbar:
                data, target = data.to(device), target.to(device)

                output_pos = model(data, target[:, 0])

                # Shuffle target to build negative samples
                target = target[torch.randperm(target.shape[0])]
                output_neg = model(data, target[:, 0])

                outputs = torch.cat([output_pos, output_neg])
                targets = torch.cat(
                    [torch.ones_like(output_pos), torch.zeros_like(output_neg)]
                )
                loss = criterion(outputs, targets)

                ypred.append(outputs.cpu().numpy())
                ytarget.append(targets.cpu().numpy())
                del data, target

                epoch_loss += loss.item()
                cumloss = epoch_loss / (i + 1)
                pbar.set_description(f"Loss {cumloss:.5f}")

    ypred = np.concatenate(ypred)
    ytarget = np.concatenate(ytarget)
    gc.collect()

    auc = roc_auc_score(ytarget.flatten(), ypred.flatten())

    return epoch_loss / len(iterator), auc

### Main

In [None]:
train = pd.read_parquet(f"../output/matrix_factorization/train-proc-1_{MODE}.parquet")

for i in range(1, 11):
    train[f'lag{i}'] = train[f'lag{i}'].clip(0, None)
    
train['hour'] = train['hour'].astype('int32') + 1855602 + 1

In [None]:
%load_ext lab_black

In [None]:
valid = train.loc[(train.session % 400) == 11].copy().reset_index()
train = train.loc[(train.session % 400) != 11].reset_index()

TRAIN = train[
    [
        "lag1",
        "lag2",
        "lag3",
        "lag4",
        "lag5",
        "lag6",
        "lag7",
        "lag8",
        "lag9",
        "lag10",
        "hour",
    ]
].values.copy()
VALID = valid[
    [
        "lag1",
        "lag2",
        "lag3",
        "lag4",
        "lag5",
        "lag6",
        "lag7",
        "lag8",
        "lag9",
        "lag10",
        "hour",
    ]
].values.copy()
TRAIN_TARGET = train[["lag0"]].values.copy()
VALID_TARGET = valid[["lag0"]].values.copy()
del train, valid
gc.collect()

In [None]:
model = MatrixFactorization(n_aids=1855602 + 1, n_factors=128).to("cuda")
optimizer = torch.optim.Adam(model.parameters(), lr=0.000025)

In [None]:
# lags = (1855603 * torch.rand(9, 11)).long().to("cuda")
# targets = (1855603 * torch.rand(9)).long().to("cuda")
# model(lags, targets)

In [None]:
from torch.utils.data import TensorDataset, DataLoader

BS = 32 * 32 * 32

train_ds = TensorDataset(
    torch.as_tensor(TRAIN).long(), torch.as_tensor(TRAIN_TARGET).long()
)
train_dl = DataLoader(
    train_ds, BS, True, num_workers=4, drop_last=True, pin_memory=True
)

valid_ds = TensorDataset(
    torch.as_tensor(VALID).long(), torch.as_tensor(VALID_TARGET).long()
)
valid_dl = DataLoader(
    valid_ds, BS, False, num_workers=4, drop_last=False, pin_memory=True
)

In [None]:
EPOCHS = 15

for epoch in range(1, EPOCHS + 1):
    train_loop(model, train_dl, optimizer, 1000.0)

    valloss, auc = valid_loop(model, valid_dl)

    torch.save(
        model.state_dict(),
        f"../output/matrix_factorization/model_giba_{epoch}_{auc:.4f}_v10.pt",
    )

    print(f"Epoch {epoch:02d}/{EPOCHS:02d} \t loss={valloss:.3f} \t val_auc={auc:.4f}")

In [None]:
with torch.no_grad():
    embeddings = model.aid_emb.weight
    embeddings = torch.nn.functional.normalize(embeddings, p=2.0, dim=1, eps=1e-12)
    embeddings = embeddings.detach().cpu().numpy()

np.save(f"../output/matrix_factorization/embed_giba_{MODE}.npy", embeddings)
print(
    f"Saved embeddings of shape {embeddings.shape} to "
    + f"../output/matrix_factorization/embed_giba_{MODE}.npy"
)

Done