# **Import packages and open data**

In [1]:
import polars as pl
import numpy as np
from tqdm import tqdm
from scipy.sparse import csr_matrix
# import implicit

RANDOM_STATE = 42
N_PREDICTIONS = 100

In [2]:
import os
import sys
import wandb

import gc

import seaborn as sns
import matplotlib.pyplot as plt

import torch
torch.cuda.empty_cache()

import pytorch_lightning
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import transformers
from transformers import BertModel, BertForMaskedLM, BertConfig, BertTokenizerFast, get_linear_schedule_with_warmup, AdamW

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from typing import Sequence
from functools import partial
from collections import Counter

In [3]:
os.listdir ('/kaggle/input/')

['recsysdata']

In [4]:
pytorch_lightning.seed_everything(56, workers=True)

56

In [5]:
train = pl.read_parquet("/kaggle/input/recsysdata/hh_recsys_train_hh.pq")
test = pl.read_parquet("/kaggle/input/recsysdata/hh_recsys_test_hh.pq")
vac = pl.read_parquet("/kaggle/input/recsysdata/hh_recsys_vacancies.pq")

In [6]:
train_df = train
test_df = test
vac_df = vac

In [7]:
vac_df.shape

(2734129, 13)

In [8]:
vac_df = vac_df[:vac_df.shape[0]//10]

# **Work with vacancies**

In [5]:
vac_df.head()

vacancy_id,name,company.id,description,keySkills.keySkill,compensation.from,compensation.to,compensation.currencyCode,area.id,area.regionId,employment,workSchedule,workExperience
str,str,str,str,list[str],i64,i64,str,str,str,str,str,str
"""v_862116""","""Смотритель муз…","""c_162972""","""<strong>Обязан…","[""Пользователь ПК"", ""Работа в команде"", … ""PR-консультирование""]",16500,,"""RUR""","""a_4761""","""ar_33""","""full""","""fullDay""","""noExperience"""
"""v_288642""","""Ведущий менедж…","""c_208672""","""<p><strong>Воз…","[""Активные продажи"", ""Холодные продажи"", … ""Организация мероприятий""]",50000,,"""RUR""","""a_744""","""ar_2""","""full""","""fullDay""","""noExperience"""
"""v_1840054""","""Бухгалтер (по …","""c_198109""","""<strong>Обязан…",,50000,65000.0,"""RUR""","""a_6223""","""ar_78""","""full""","""fullDay""","""between3And6"""
"""v_2346232""","""Пекарь (Токсов…","""c_6137""","""<p><strong>Для…",,38500,42000.0,"""RUR""","""a_4795""","""ar_51""","""full""","""fullDay""","""noExperience"""
"""v_312507""","""Торговый предс…","""c_206699""","""<p>Компания ТД…","[""Продуктивность"", ""Клиентоориентированность"", … ""Развитие продаж""]",60000,,"""RUR""","""a_6837""","""ar_4""","""full""","""fullDay""","""between1And3"""


In [6]:
vac_df.describe()

statistic,vacancy_id,name,company.id,description,keySkills.keySkill,compensation.from,compensation.to,compensation.currencyCode,area.id,area.regionId,employment,workSchedule,workExperience
str,str,str,str,str,f64,f64,f64,str,str,str,str,str,str
"""count""","""273412""","""273412""","""273412""","""273412""",150175.0,206248.0,123408.0,"""221587""","""273412""","""271270""","""273412""","""273412""","""273412"""
"""null_count""","""0""","""0""","""0""","""0""",123237.0,67164.0,150004.0,"""51825""","""0""","""2142""","""0""","""0""","""0"""
"""mean""",,,,,,72191.681102,102458.696681,,,,,,
"""std""",,,,,,2185000.0,2827500.0,,,,,,
"""min""","""v_1000005""","""""Оператор call…","""c_1""",""" <p align=""ce…",,1.0,10.0,"""BYR""","""a_1""","""ar_0""","""full""","""flexible""","""between1And3"""
"""25%""",,,,,,40000.0,50000.0,,,,,,
"""50%""",,,,,,55000.0,70000.0,,,,,,
"""75%""",,,,,,80000.0,120000.0,,,,,,
"""max""","""v_999983""","""财务总监 Финансовы…","""c_99996""","""❗️❗️ Требуются…",,991788366.0,991788366.0,"""UZS""","""a_999""","""ar_99""","""volunteer""","""shift""","""noExperience"""


In [7]:
vac_df.shape[0]//25_000, vac_df.shape[0]%25_000

(10, 23412)

In [9]:
area_id = sorted(vac_df["area.id"].unique().to_list())
empl = sorted(vac_df["employment"].unique().to_list())
work_sch = sorted(vac_df["workSchedule"].unique().to_list())
work_exp = sorted(vac_df["workExperience"].unique().to_list())
companyid = sorted(vac_df["company.id"].unique().to_list())
area2idx = {area_id[i] : i for i in range(len(area_id))}
empl2idx = {empl[i] : i for i in range(len(empl))}
sch2idx = {work_sch[i] : i for i in range(len(work_sch))}
exp2idx = {work_exp[i] : i for i in range(len(work_exp))}
company2id = {companyid[i] : i for i in range(len(companyid))}
features = pl.DataFrame()
for v in tqdm(range(1, 10)):
    lb = (v-1)*25_000
    rb = v*25_000
    cur = vac_df[lb:rb].drop("name", "compensation.currencyCode", "area.regionId", "description", "keySkills.keySkill")
    cur = cur.with_columns(cur["compensation.from"].fill_null(0).alias("compensation.from"))
    cur = cur.with_columns(cur["compensation.to"].fill_null(0).alias("compensation.to"))
    cur = cur.with_columns(pl.Series("compensation.to", [cur["compensation.to"][i] if cur["compensation.to"][i]!=0 else cur["compensation.from"][i] for i in range(25_000)]))
    cur = cur.with_columns(cur["area.id"].replace(area2idx).cast(int).alias("area.id"))
    cur = cur.with_columns(cur["employment"].replace(empl2idx).cast(int).alias("employment"))
    cur = cur.with_columns(cur["workSchedule"].replace(sch2idx).cast(int).alias("workSchedule"))
    cur = cur.with_columns(cur["workExperience"].replace(exp2idx).cast(int).alias("workExperience"))
    cur = cur.with_columns(cur["company.id"].replace(company2id).cast(int).alias("company.id"))
    features = pl.concat([features, cur])

lb = v*25_000
rb = lb + 23412
cur = vac_df[lb:rb].drop("name", "compensation.currencyCode", "area.regionId", "description", "keySkills.keySkill")
cur = cur.with_columns(cur["compensation.from"].fill_null(0).alias("compensation.from"))
cur = cur.with_columns(cur["compensation.to"].fill_null(0).alias("compensation.to")).with_columns(pl.Series("compensation.to", [cur["compensation.to"][i] if cur["compensation.to"][i]!=0 else cur["compensation.from"][i] for i in range(23_412)]).alias("compensation.to"))
cur = cur.with_columns(cur["area.id"].replace(area2idx).cast(int).alias("area.id"))
cur = cur.with_columns(cur["employment"].replace(empl2idx).cast(int).alias("employment"))
cur = cur.with_columns(cur["workSchedule"].replace(sch2idx).cast(int).alias("workSchedule"))
cur = cur.with_columns(cur["workExperience"].replace(exp2idx).cast(int).alias("workExperience"))
cur = cur.with_columns(cur["company.id"].replace(company2id).cast(int).alias("company.id"))
features = pl.concat([features, cur])
features.head()


100%|██████████| 9/9 [00:02<00:00,  3.55it/s]


vacancy_id,company.id,compensation.from,compensation.to,area.id,employment,workSchedule,workExperience
str,i64,i64,i64,i64,i64,i64,i64
"""v_862116""",20547,16500,16500,2596,0,2,3
"""v_288642""",35568,50000,50000,4183,0,2,3
"""v_1840054""",31975,50000,65000,3606,0,2,1
"""v_2346232""",69501,38500,42000,2621,0,2,3
"""v_312507""",34901,60000,60000,4029,0,2,0


# **Preparing data**

In [10]:
pad_id = 0
unk_id = 1
mask_id = 2
num_special_tokens = 3

n_positions = 256
trim_length = 64

mask_prob = 0.5
accumulation_steps = 4
min_seq_len = 8
min_item_cnt = 8

In [15]:
train_df.shape[0]%50_000

13064

In [11]:
train = pl.DataFrame()
for i in tqdm(range(1, 69)):
    train = pl.concat([train, train_df[(i-1)*50_000:i*50_000].explode(columns=["vacancy_id", "action_type", "action_dt"]).sort("action_dt")])
train = pl.concat([train, train_df[i*50_000:i*50_000+13064].explode(columns=["vacancy_id", "action_type", "action_dt"]).sort("action_dt")])
train.head()

100%|██████████| 68/68 [00:52<00:00,  1.30it/s]


user_id,session_id,vacancy_id,action_type,action_dt
str,str,str,i64,datetime[ns]
"""u_370846""","""s_24698241""","""v_697571""",1,2023-11-01 00:00:00.919
"""u_461521""","""s_7559925""","""v_2514797""",2,2023-11-01 00:00:06.973
"""u_332204""","""s_6570164""","""v_742810""",2,2023-11-01 00:00:12.594
"""u_229153""","""s_23936793""","""v_1411424""",2,2023-11-01 00:00:13.518
"""u_1128173""","""s_14266530""","""v_248154""",2,2023-11-01 00:00:16.114


In [16]:
vacancies = train["vacancy_id"].value_counts().filter((pl.col("count") >= min_item_cnt))["vacancy_id"]
vacancy2id = {}
for vacancy in vacancies:
    vacancy2id[vacancy] = len(vacancy2id)

In [12]:
val_labels = (
    test
    .filter(pl.col("user_id").is_in(train["user_id"]))
    .explode(("vacancy_id", "action_type", "action_dt"))
    .sort("action_dt")
    .filter(pl.col("action_type")==1)
    .group_by(("user_id", "session_id"))
    .agg(pl.col("vacancy_id").first())
    .drop("session_id")
)
print(len(val_labels))
val_labels.head()

26792


user_id,vacancy_id
str,str
"""u_263188""","""v_1649522"""
"""u_923086""","""v_1646563"""
"""u_805160""","""v_246267"""
"""u_698161""","""v_1994688"""
"""u_921533""","""v_1474871"""


In [13]:
val_history = (
    train
    .sort("action_dt")
    .filter(pl.col("user_id").is_in(val_labels["user_id"]))
)
print(len(val_history))
val_history.head()

1834492


user_id,session_id,vacancy_id,action_type,action_dt
str,str,str,i64,datetime[ns]
"""u_229843""","""s_6902690""","""v_2519518""",2,2023-11-01 00:00:00.019
"""u_760080""","""s_6503120""","""v_896765""",2,2023-11-01 00:00:00.557
"""u_597605""","""s_30172193""","""v_382447""",3,2023-11-01 00:00:02.039
"""u_39029""","""s_4994612""","""v_403085""",2,2023-11-01 00:00:04.003
"""u_306107""","""s_33418353""","""v_1221017""",2,2023-11-01 00:00:05.773


In [17]:
val_labels = val_labels.filter(pl.col("vacancy_id").is_in(vacancies))
print(len(val_labels))
val_history = val_history.filter(pl.col("user_id").is_in(val_labels["user_id"]))
print(len(val_history))

13165
798900


In [18]:
val_data = pl.concat([
    val_history.select(("user_id", "vacancy_id", "action_dt")),
    val_labels
], how="diagonal")
val_data = val_data.group_by("user_id").tail(n_positions)

In [63]:
train.shape

(21285044, 5)

In [19]:
train = train.filter(pl.col("vacancy_id").is_in(vac_df["vacancy_id"]))

In [65]:
train.shape

(2081578, 5)

In [20]:
train = train.filter(pl.col("vacancy_id").is_in(train["vacancy_id"].value_counts().filter(pl.col("count") >= min_item_cnt)["vacancy_id"]))
train = train.filter(pl.col("user_id").is_in(train["user_id"].value_counts().filter(pl.col("count") >= min_item_cnt)["user_id"]))
train.head()

user_id,session_id,vacancy_id,action_type,action_dt
str,str,str,i64,datetime[ns]
"""u_757137""","""s_9550840""","""v_513154""",1,2023-11-01 00:03:08.382
"""u_757137""","""s_9550840""","""v_1111908""",1,2023-11-01 00:04:05.463
"""u_1063336""","""s_26465030""","""v_1447898""",2,2023-11-01 00:06:48.406
"""u_1063336""","""s_26465030""","""v_1447898""",1,2023-11-01 00:07:19.600
"""u_845515""","""s_13111736""","""v_2064498""",2,2023-11-01 00:11:14.598


In [90]:
train.shape

(1013297, 5)

In [21]:
val_data = val_data.with_columns(val_data["vacancy_id"].replace(vacancy2id, default=1))
train = train.with_columns(train["vacancy_id"].replace(vacancy2id, default=1))

In [72]:
val_data.head()

user_id,vacancy_id,action_dt
str,i64,datetime[ns]
"""u_196319""",18589,2023-11-10 05:43:23.505
"""u_196319""",1,2023-11-14 11:33:35.645
"""u_196319""",1,2023-11-14 11:34:04.537
"""u_196319""",1,2023-11-14 11:36:37.802
"""u_196319""",44815,


In [91]:
train.head()

user_id,session_id,vacancy_id,action_type,action_dt
str,str,i64,i64,datetime[ns]
"""u_757137""","""s_9550840""",18016,1,2023-11-01 00:03:08.382
"""u_757137""","""s_9550840""",47385,1,2023-11-01 00:04:05.463
"""u_1063336""","""s_26465030""",1558,2,2023-11-01 00:06:48.406
"""u_1063336""","""s_26465030""",1558,1,2023-11-01 00:07:19.600
"""u_845515""","""s_13111736""",15360,2,2023-11-01 00:11:14.598


In [74]:
features.head()

vacancy_id,company.id,compensation.from,compensation.to,area.id,employment,workSchedule,workExperience
str,str,i64,i64,i64,i64,i64,i64
"""v_862116""","""c_162972""",16500,16500,2596,0,2,3
"""v_288642""","""c_208672""",50000,50000,4183,0,2,3
"""v_1840054""","""c_198109""",50000,65000,3606,0,2,1
"""v_2346232""","""c_6137""",38500,42000,2621,0,2,3
"""v_312507""","""c_206699""",60000,60000,4029,0,2,0


# **Dataset**

In [93]:
train.head()

user_id,session_id,vacancy_id,action_type,action_dt
str,str,i64,i64,datetime[ns]
"""u_757137""","""s_9550840""",18016,1,2023-11-01 00:03:08.382
"""u_757137""","""s_9550840""",47385,1,2023-11-01 00:04:05.463
"""u_1063336""","""s_26465030""",1558,2,2023-11-01 00:06:48.406
"""u_1063336""","""s_26465030""",1558,1,2023-11-01 00:07:19.600
"""u_845515""","""s_13111736""",15360,2,2023-11-01 00:11:14.598


In [96]:
train.group_by("user_id").agg(pl.col("vacancy_id")).shape

(59370, 2)

In [22]:
class SpecialTokens:
    def __init__(self, tokens):
        assert len(tokens) == len(set(tokens))
        assert 'pad_token' in tokens
        assert 'mask_token' in tokens
        self.n_tokens = len(tokens)
        for i, x in enumerate(tokens):
            setattr(self, x, i)
    
    def __len__(self):
        return self.n_tokens

In [98]:
train[0]

user_id,session_id,vacancy_id,action_type,action_dt
str,str,i64,i64,datetime[ns]
"""u_757137""","""s_9550840""",18016,1,2023-11-01 00:03:08.382


In [103]:
# datalike = train.group_by("user_id").agg(pl.col("vacancy_id"))

In [106]:
# datalike.head()

user_id,vacancy_id
str,list[i64]
"""u_1022147""","[48322, 48322, … 48322]"
"""u_137725""","[24963, 38643, … 28606]"
"""u_979346""","[17791, 26840, … 4106]"
"""u_1169447""","[8143, 39201, … 16589]"
"""u_939923""","[4241, 48289, … 7957]"


In [57]:
class SadDataset(Dataset):
    def __init__(self,
                 data,
                 force_last_token_mask_prob: float,
                special_tokens: Sequence[str],
                mask_prob: float):
        self.force_last_token_mask_prob = force_last_token_mask_prob
        self.data = data.group_by("user_id").agg(pl.col("vacancy_id"))
        self.special_tokens = SpecialTokens(special_tokens)
        self.mask_prob = mask_prob
        
    def __getitem__(self, index):
        return self._preprocess_sequence(self.data[index])
    def __len__(self):
        return len(self.data)
    def _preprocess_sequence(self, datalike):
        sequence_ids = datalike["vacancy_id"][0]
        input_ids = torch.LongTensor(sequence_ids).clone()
        labels = torch.LongTensor(sequence_ids).clone()
        
        if np.random.random() < self.force_last_token_mask_prob:
            mask = torch.zeros(sequence_ids.shape)
            mask[0] = True
        else:
            mask = torch.rand(sequence_ids.shape)
            mask = (mask < self.mask_prob)
        mask *= (input_ids > len(self.special_tokens)) # don't mask special tokens
        if mask.sum() == 0:
            mask = torch.zeros(sequence_ids.shape)
            mask[0] = True
            mask *= (input_ids > len(self.special_tokens)) # don't mask special tokens
        mask_indices = mask.nonzero().flatten()
        input_ids[mask_indices] = self.special_tokens.mask_token
        labels[input_ids != self.special_tokens.mask_token] = -100 # calculate loss only for masked tokens
        inputs = {'input_ids': input_ids,
                  'labels': labels}
        
#         return inputs
        return input_ids, labels

In [70]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [58]:
dataset = SadDataset(
    train,
    special_tokens=['cls_token', 'sep_token', 'mask_token', 'pad_token'],
    mask_prob=0.2,
    force_last_token_mask_prob=1.0,
    )
val_dataset = SadDataset(
    val_data,
    special_tokens=['cls_token', 'sep_token', 'mask_token', 'pad_token'],
    mask_prob=0.2,
    force_last_token_mask_prob=1.0,
    )

In [135]:
val_dataset[0]

{'input_ids': tensor([    1,     1,     1,     1,  8799,     1, 47083]),
 'labels': tensor([-100, -100, -100, -100, -100, -100, -100])}

In [136]:
dataset[0]

{'input_ids': tensor([    2, 42196, 24829, 24829,  9713,  9713, 47699, 47699, 48595, 48595,
         40116, 40116,  7438,  7438,  4950,  4950, 48595, 41434, 41434, 43569,
          9713,  4950,  9713,  9713]),
 'labels': tensor([42196,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100])}

In [59]:
def collate_fn(batch):
    batch_i, batch_l = [], []
    for i, l in batch:
        batch_i.append(i)
        batch_l.append(l)
    batch_i = nn.utils.rnn.pad_sequence(batch_i, batch_first=True)
    batch_l = nn.utils.rnn.pad_sequence(batch_l, batch_first=True, padding_value=-100)
    return  batch_i, batch_l

In [60]:
dataloader = DataLoader(dataset,
                        batch_size=16,
                        num_workers=4,
                        drop_last=True,
                        shuffle=True,
                        pin_memory=True,
                       collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset,
                            batch_size=16,
                            num_workers=4,
                            drop_last=False,
                            shuffle=False,
                            pin_memory=True,
                           collate_fn=collate_fn)

# **DistilBert🤓**

In [74]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from tqdm.auto import tqdm
from transformers import get_scheduler

In [67]:
EPOCHS = 1
LR = 0.00005

In [75]:
# download pretrained models
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=4).to(device)
# for w in model.distilbert.parameters():
#     w._trainable= False
# for w in model.classifier.parameters():
#     w._trainable = True

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [153]:
optimizer = optim.AdamW(model.parameters(), lr=LR)
num_training_steps = EPOCHS * len(dataloader)
scheduler = get_scheduler(
    name="linear", optimizer=optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

In [155]:
num_training_steps

3710

In [163]:
dataloader.dataset[0]

{'input_ids': tensor([    2, 42196, 24829, 24829,  9713,  9713, 47699, 47699, 48595, 48595,
         40116, 40116,  7438,  7438,  4950,  4950, 48595, 41434, 41434, 43569,
          9713,  4950,  9713,  9713]),
 'labels': tensor([42196,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,  -100,
          -100,  -100,  -100,  -100])}

In [160]:
for batch in dataloader:
    print(batch)
    break

TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
    return self.collate_fn(data)
  File "/tmp/ipykernel_33/2160510880.py", line 6, in collate_fn
    batch_i = nn.utils.rnn.pad_sequence(batch_i, batch_first=True)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/utils/rnn.py", line 400, in pad_sequence
    return torch._C._nn.pad_sequence(sequences, batch_first, padding_value)
TypeError: expected Tensor as element 0 in argument 0, but got str


In [154]:
progress_bar = tqdm(range(num_training_steps))

model.train()
for epoch in range(EPOCHS):
    train_loss = 0
    for batch in dataloader:
        batch = {k: v.to(device) for k, v in batch.items()}
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        train_loss += loss.item()

        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        progress_bar.update(1)
    train_loss /= len(dataloader)
    print(f'EPOCH {epoch}: train_loss={train_loss}')

  0%|          | 0/3710 [00:00<?, ?it/s]

TypeError: Caught TypeError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/worker.py", line 308, in _worker_loop
    data = fetcher.fetch(index)
  File "/opt/conda/lib/python3.10/site-packages/torch/utils/data/_utils/fetch.py", line 54, in fetch
    return self.collate_fn(data)
  File "/tmp/ipykernel_33/2160510880.py", line 6, in collate_fn
    batch_i = nn.utils.rnn.pad_sequence(batch_i, batch_first=True)
  File "/opt/conda/lib/python3.10/site-packages/torch/nn/utils/rnn.py", line 400, in pad_sequence
    return torch._C._nn.pad_sequence(sequences, batch_first, padding_value)
TypeError: expected Tensor as element 0 in argument 0, but got str


In [None]:
from datasets import load_metric

metric = load_metric("accuracy")
model.eval()
for batch in tqdm(test_dataloader):
    batch = {k: v.to(device) for k, v in batch.items()}
    with torch.no_grad():
        outputs = model(**batch)

    logits = outputs.logits
    predictions = torch.argmax(logits, dim=-1)
    metric.add_batch(predictions=predictions, references=batch["labels"])

metric.compute()