In [1]:
import pandas as pd
import polars as pl
import numpy as np
import scipy.sparse as sp
import implicit
import faiss
from tqdm import tqdm
from typing import List, Any
from gensim.models import Word2Vec
import optuna
import random
from tqdm import tqdm
from lightfm import LightFM

  from .autonotebook import tqdm as notebook_tqdm


## Читаем датасет

In [2]:
data = pl.read_parquet('train.parquet')
data

user_id,artist_id
str,str
"""d705b538-1bd8-…","""69c71d72-7ed8-…"
"""d705b538-1bd8-…","""30bf469f-9abd-…"
"""d705b538-1bd8-…","""a26c9335-2459-…"
"""d705b538-1bd8-…","""69c903b5-dff0-…"
"""d705b538-1bd8-…","""af8eef9d-13aa-…"
"""d705b538-1bd8-…","""293a86ee-6ce7-…"
"""d705b538-1bd8-…","""348f4909-1c48-…"
"""d705b538-1bd8-…","""ad2bf122-726e-…"
"""d705b538-1bd8-…","""cc97fc57-30b5-…"
"""d705b538-1bd8-…","""3000b3a4-7435-…"


## Метрики

В этом задании нашей задачей будем оптимизация метрик ndcg@20. Тем не менее, такая метрику сложно интерпретировать и поэтому вам также будет доступно значение метрики hitrate@20, с которой вы уже познакомились в прошлой домашке

In [3]:
TOP_K = 20


def user_hitrate(y_relevant: List[str], y_recs: List[str], k: int = TOP_K) -> int:
    return int(len(set(y_relevant).intersection(y_recs[:k])) > 0)

def user_ndcg(y_rel: List[Any], y_rec: List[Any], k: int = 10) -> float:
    """
    :param y_rel: relevant items
    :param y_rec: recommended items
    :param k: number of top recommended items
    :return: ndcg metric for user recommendations
    """
    dcg = sum([1. / np.log2(idx + 2) for idx, item in enumerate(y_rec[:k]) if item in y_rel])
    idcg = sum([1. / np.log2(idx + 2) for idx, _ in enumerate(zip(y_rel, np.arange(k)))])
    return dcg / idcg

В этом датасете идентификаторы представлены в виде строк, но для работы с ними может быть проще сделать преобразование в числа (например, для алгоритмов матричной факторизации)

In [4]:
user_mapping = {k: v for v, k in enumerate(data['user_id'].unique())}
user_mapping_inverse = {k: v for v, k in user_mapping.items()}

artist_mapping = {k: v for v, k in enumerate(data['artist_id'].unique())}
artist_mapping_inverse = {k: v for v, k in artist_mapping.items()}

In [5]:
grouped_df_with_inds = (
    data
    .with_columns([
        pl.col('user_id').apply(user_mapping.get),
        pl.col('artist_id').apply(artist_mapping.get),
    ])
    # для каждого пользователя оставим последние 3 объекта в качестве тестовой выборки,
    # а остальное будем использовать для тренировки
    .groupby('user_id')
    .agg([
        pl.col('artist_id').apply(lambda x: x[:-3]).alias('train_item_ids'),
        pl.col('artist_id').apply(lambda x: x[-3:]).alias('test_item_ids'),
    ])
)

grouped_df_with_inds

user_id,train_item_ids,test_item_ids
i64,list[i64],list[i64]
756,"[426, 61818, … 8207]","[52169, 46389, 22828]"
30808,"[3786, 43152, … 9217]","[2749, 27358, 16795]"
33158,"[42945, 64201, … 86454]","[64472, 84018, 3579]"
25134,"[86931, 10041, … 84018]","[88205, 62143, 13484]"
39188,"[31711, 53927, … 68013]","[50293, 79856, 115]"
47304,"[85041, 4616, … 27580]","[6261, 35180, 5579]"
49328,"[89114, 66966, … 53338]","[25677, 22715, 5443]"
3044,"[43817, 21957, … 86208]","[40085, 63778, 82886]"
34362,"[11703, 35921, … 58648]","[75968, 35243, 33621]"
47888,"[42208, 1494, … 9478]","[46998, 69395, 63879]"


In [6]:
median_seq_len = int(grouped_df_with_inds['train_item_ids'].apply(len).median())
print(f"средняя длина сессии {median_seq_len}")

средняя длина сессии 42


In [7]:
max_seq_len = int(grouped_df_with_inds['train_item_ids'].apply(len).max())
print(f"средняя длина сессии {max_seq_len}")

средняя длина сессии 116


In [8]:
min_seq_len = int(grouped_df_with_inds['train_item_ids'].apply(len).max())
print(f"средняя длина сессии {min_seq_len}")

средняя длина сессии 116


In [9]:
# соберем строчки для разреженной матрицы
rows = []
cols = []
values = []
for user_id, train_ids, _ in grouped_df_with_inds.rows():
    rows.extend([user_id] * len(train_ids))
    values.extend([1] * len(train_ids))
    cols.extend(train_ids)

user_item_data = sp.csr_matrix((values, (rows, cols)))

In [10]:
grouped_df_for_sub = (
    data
    .with_columns([
        pl.col('user_id').apply(user_mapping.get),
        pl.col('artist_id').apply(artist_mapping.get),
    ])
    # для каждого пользователя оставим последние 3 объекта в качестве тестовой выборки,
    # а остальное будем использовать для тренировки
    .groupby('user_id')
    .agg([
        pl.col('artist_id').alias('hist_item_ids'),
    ])
)

grouped_df_for_sub

user_id,hist_item_ids
i64,list[i64]
7658,"[84711, 31632, … 74233]"
25424,"[62215, 29293, … 17431]"
39780,"[18358, 20394, … 426]"
37262,"[18752, 51752, … 47748]"
41694,"[61995, 6262, … 13449]"
11596,"[89574, 81882, … 5424]"
10550,"[1346, 1933, … 24193]"
41670,"[64884, 3007, … 25192]"
28034,"[30323, 55684, … 7132]"
44508,"[12707, 38720, … 19071]"


In [11]:
# соберем строчки для разреженной матрицы для сабмишна
rows = []
cols = []
values = []
for user_id, train_ids in grouped_df_for_sub.rows():
    rows.extend([user_id] * len(train_ids))
    values.extend([1] * len(train_ids))
    cols.extend(train_ids)

full_user_item_data = sp.csr_matrix((values, (rows, cols)))

## Бейзлайны

В качестве простого бейзлайна будем рекомендовать самый популярных артистов

Мы хотим сначала провалидировать такое решение, а значит в качестве популярных артистов мы возьмем только тех, кто чаще встречается в `train_item_ids`

In [10]:
top_artists = (
    grouped_df_with_inds
    .select(pl.col('train_item_ids').alias('artist_id'))
    .explode('artist_id')
    .groupby('artist_id')
    .count()
    .sort('count', descending=True)
    .head(TOP_K + median_seq_len)
)['artist_id'].to_list()

In [11]:
ndcg_list = []
hitrate_list = []

for user_id, user_history, y_rel in grouped_df_with_inds.rows():
    y_rec = top_artists.copy()
    
    ndcg_list.append(user_ndcg(y_rel, y_rec))
    hitrate_list.append(user_hitrate(y_rel, y_rec))
    
print(f'NDCG@{TOP_K} = {np.mean(ndcg_list):.5f}, Hitrate@{TOP_K} = {np.mean(hitrate_list):.5f}')

NDCG@20 = 0.01401, Hitrate@20 = 0.10248


Не забываем про фильтрацию просмотренного (для разных доменов и подходов это не всегда улучши рекомендации, но в данном случае дало прирост)

In [12]:
ndcg_list = []
hitrate_list = []

for user_id, user_history, y_rel in grouped_df_with_inds.rows():
    y_rec = [artist_id for artist_id in top_artists if artist_id not in user_history]
    
    ndcg_list.append(user_ndcg(y_rel, y_rec))
    hitrate_list.append(user_hitrate(y_rel, y_rec))
    
print(f'NDCG@{TOP_K} = {np.mean(ndcg_list):.5f}, Hitrate@{TOP_K} = {np.mean(hitrate_list):.5f}')

NDCG@20 = 0.01740, Hitrate@20 = 0.11684


## Построим файл с рекомендациями

Для построения рекомендаций теперь можем учесть все возможные данные

In [13]:
top_artists = (
    data
    .groupby('artist_id')
    .count()
    .sort('count', descending=True)
    .head(TOP_K + median_seq_len)
)['artist_id'].to_list()

In [14]:
submission = []

for user_id, user_history in data.groupby('user_id').agg(pl.col('artist_id')).rows():
    y_rec = top_artists.copy()
    
    submission.append((user_id, y_rec))
    
submission = pl.DataFrame(submission, schema=('user_id', 'y_rec'))
submission.write_parquet('sample_submission.parquet')
submission

user_id,y_rec
str,list[str]
"""0444e715-665f-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""973c3b33-0fcc-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""8d7a9d61-db33-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""ac74f8c7-6e45-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""c28d106e-2b07-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""ba7c87b2-7811-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""9b5a4f0c-39f0-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""8e8e2904-02b6-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""12971653-1ed2-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"
"""1298ff82-e532-…","[""5cd0ffb5-0cf2-4ecd-8c5b-ca2102e33198"", ""12289298-d9dc-4b1d-bc27-16480829de75"", … ""98e69a29-ee83-41f1-924e-08a50a32efdc""]"


Не забывайте, что в файле с рекомендациями должны быть **исходные идентификаторы (строки)**, а не преобразованные в числа!

## W2V

In [12]:
def evaluate_model(model, df = grouped_df_with_inds):
    ndcg_list = []
    hitrate_list = []
    for user_id, user_history, y_rel in df.rows():  
        model_preds = model.predict_output_word(
                train_ids[:-model.window], topn=(TOP_K + len(user_history))
            )
    
        if model_preds is None:
            hitrate_list.append(0)
            ndcg_list.append(0)
            continue
        y_rec = [artist_id[0] for artist_id in model_preds if artist_id not in user_history]
        ndcg_list.append(user_ndcg(y_rel, y_rec))
        hitrate_list.append(user_hitrate(y_rel, y_rec))  
    return np.mean(ndcg_list), np.mean(hitrate_list)

In [13]:
grouped_df_with_inds_sampled = grouped_df_with_inds.sample(1_000) 

In [None]:
SEED = 42

def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)

def objective(trial):
    sg = trial.suggest_categorical('sg', [0, 1])
    window = trial.suggest_int('window', 1, 10)
    ns_exponent = trial.suggest_float('ns_exponent', -3, 3)
    negative = trial.suggest_int('negative', 3, 20)
    min_count = trial.suggest_int('min_count', 0, 20)
    vector_size = trial.suggest_categorical('vector_size', [16, 32, 64, 128])
    
    print({
        'sg': sg,
        'window_len': window,
        'ns_exponent': ns_exponent,
        'negative': negative,
        'min_count': min_count,
        'vector_size': vector_size,
    })
    
    set_seed(SEED)
    model = Word2Vec(
        grouped_df_with_inds['train_item_ids'].to_list(),
        window=window,
        sg=sg,
        hs=0,
        min_count=min_count,
        vector_size=vector_size,
        negative=negative,
        ns_exponent=ns_exponent,
        seed=SEED,
        epochs=10,
    )
    
    mean_ndcg, mean_hitrate = evaluate_model(model, grouped_df_with_inds_sampled)
    
    
    print(f'MAP@{TOP_K} = {mean_ndcg:.4f} Hitrate@{TOP_K} = {mean_hitrate:.4f}')
    return mean_ndcg
    
    
study = optuna.create_study(directions=('maximize',))
study.optimize(objective, n_trials=40)

study.best_params

In [29]:
best_w2v = Word2Vec(
    grouped_df_for_sub['hist_item_ids'].to_list(),
    sg=0,
    vector_size=32,
    min_count=18,
    epochs=30,
    negative=17,
    window=10,
    ns_exponent=0.05,
    seed=42,
    hs=0
)
mean_ndcg, mean_hitrate = evaluate_model(best_w2v, grouped_df_with_inds_sampled)
print(f'MAP@{TOP_K} = {mean_ndcg:.4f} Hitrate@{TOP_K} = {mean_hitrate:.4f}')

MAP@20 = 0.0007 Hitrate@20 = 0.0150


In [25]:
submission = []
model = best_w2v
for user_id, user_history in tqdm(grouped_df_for_sub.rows()):
    model_preds = model.predict_output_word(
                user_history, topn=(TOP_K + len(user_history))
            )
    
    y_rec = [artist_mapping_inverse[artist_id[0]] for artist_id in model_preds if artist_id not in user_history]
    
    user_mapped = user_mapping_inverse[user_id]
    submission.append((user_mapped, y_rec))
    
submission = pl.DataFrame(submission, schema=('user_id', 'y_rec'))
submission.write_parquet('w2v_sample_submission.parquet')
submission

100%|██████████| 50000/50000 [03:16<00:00, 253.92it/s]


user_id,y_rec
str,list[str]
"""713a1223-f289-…","[""0f692d11-9a2a-4964-bfaa-3841b4d537cb"", ""277c978f-65fd-4d35-9784-feed640f11b6"", … ""1254fab8-02e2-44d2-b9dc-99427c4142eb""]"
"""b0a343b6-c7a5-…","[""750b1eb3-0ca9-4876-9443-1b96c2169215"", ""ed437fe5-c894-4ddf-b2ce-d1f91988e32d"", … ""b62f62a6-4e33-4a42-91df-e7651374df00""]"
"""5fbfea6f-0c70-…","[""693aefb0-cbaf-4163-8aff-72a22781f8fa"", ""4e1063ab-2b1d-471d-aaa6-cbca88b3cf76"", … ""c9ba26c2-62a4-4b48-a6d7-24e326932b43""]"
"""a2614007-ced2-…","[""ae8e44f9-3b2a-4044-9e63-3d513b4f5cd9"", ""101ceee2-3430-4b63-ac68-f576b9b6cd51"", … ""5e0fa633-c2b1-48e6-b68b-843da0c9d648""]"
"""a9c2109f-2931-…","[""a662889e-d41f-4acd-9712-a983e3c3d91d"", ""e3682d95-ca4c-4749-8882-a3bdaaff0649"", … ""67195fbe-c97a-4f9f-91bc-b1e8cca05364""]"
"""ffa7abbe-864f-…","[""0f692d11-9a2a-4964-bfaa-3841b4d537cb"", ""f3400a8e-d5b3-496a-bd7f-d60f3a0fd397"", … ""3c2346ec-77d9-413f-9c0e-b389b15d93c5""]"
"""49be3592-56ff-…","[""a26c9335-2459-4c89-a00c-fdecbeb2c8c4"", ""69c71d72-7ed8-42c4-b9ec-c33976a310b9"", … ""8586fa4d-5e83-4c53-be27-0efa58ae6499""]"
"""4372c259-eef6-…","[""e7d5fa27-5258-4cd2-91e3-04059d8eb13c"", ""f3400a8e-d5b3-496a-bd7f-d60f3a0fd397"", … ""b6908c95-ed36-4bc1-bd0a-0ad196a2e387""]"
"""c9a7e0b0-9cdd-…","[""e0d0391a-7454-4d3e-a690-950204ef59bf"", ""a6cdae3d-16ae-430a-8716-b82f64ed758d"", … ""4b4890ce-1e14-49ad-93f0-936446c3b812""]"
"""0363a944-4dc6-…","[""c60fe0f5-2e52-45b6-bdb9-13b44f382ca5"", ""94b8478a-913b-4560-a633-1cf4b086647a"", … ""9b8a5964-fede-421a-bd5e-cb7ec011b1ae""]"


#### ALS

In [26]:

RANDOM_STATE = 42

def set_seed():
    random.seed(RANDOM_STATE)
    np.random.seed(RANDOM_STATE)

In [27]:
def get_recommendations(user_embs: np.array, item_embs: np.array, k: int = TOP_K):
    # строим индекс объектов
    index = faiss.IndexFlatIP(item_embs.shape[1])
    index.add(item_embs)

    # строим рекомендации с помощью dot-product расстояния
    # с запасом, чтобы после фильтрации просмотренных осталось как минимум TOP_K
    return index.search(user_embs, TOP_K * 3)

In [28]:
def evaluate_model(model, df):
    # строим рекомендации в виде KNN алгоритма поверх эмбеддингов пользователей и артистов
    _, recs = get_recommendations(
        model.user_factors,
        model.item_factors,
        TOP_K + median_seq_len
    )
    
    # будем отслеживать как ndcg, так и hitrate метрики
    ndcg_list = []
    hitrate_list = []
    
    for user_id, user_history, y_rel in df.select(
        'user_id', 'train_item_ids', 'test_item_ids'
    ).rows():
        y_rec = [
                item_id
                # чтобы точно хватило рекомендаций, добавим еще топовых артистов в конец списка
                for item_id in list(recs[user_id])
                if item_id not in user_history
            ]
        hitrate_list.append(user_hitrate(y_rel, y_rec))
        ndcg_list.append(user_ndcg(y_rel, y_rec))
        
    mean_ndcg = np.mean(ndcg_list)
    mean_hitrate = np.mean(hitrate_list)
    return mean_ndcg, mean_hitrate

In [None]:
def objective(trial):
    factors = trial.suggest_int('factors', 8, 128)
    iterations = trial.suggest_int('iterations', 5, 100)
    alpha = trial.suggest_float('alpha', 0.1, 5.0)
    regularization = trial.suggest_float('regularization', 1e-3, 1e-1)
        
    print({
        'factors': factors,
        'iterations': iterations,
        'alpha': alpha,
        'regularization': regularization,
    })
    
    set_seed()
    als_model = implicit.als.AlternatingLeastSquares(
        factors=factors,
        iterations=iterations,
        random_state=RANDOM_STATE,
        alpha=alpha,
        regularization=regularization
    )
    als_model.fit(user_item_data)
    
    mean_ndcg, mean_hitrate = evaluate_model(als_model, grouped_df_with_inds_sampled)
    print(f'NDCG@{TOP_K} = {mean_ndcg}, Hitrate@{TOP_K} = {mean_hitrate}')
    return mean_ndcg
    
    
study = optuna.create_study(directions=('maximize',))
# тут запускается всего 5 итераций, что может быть очень мало для хороших результатов
study.optimize(objective, n_trials=40)

study.best_params

[I 2024-08-12 22:22:37,646] A new study created in memory with name: no-name-855a6342-d80c-4f13-a6f6-140f64d79ae7


{'factors': 57, 'iterations': 68, 'alpha': 3.5424363552369966, 'regularization': 0.08438954638083798}


100%|██████████| 68/68 [16:17<00:00, 14.38s/it]
[I 2024-08-12 22:39:18,855] Trial 0 finished with value: 0.06330376932238622 and parameters: {'factors': 57, 'iterations': 68, 'alpha': 3.5424363552369966, 'regularization': 0.08438954638083798}. Best is trial 0 with value: 0.06330376932238622.


NDCG@20 = 0.06330376932238622, Hitrate@20 = 0.346
{'factors': 81, 'iterations': 10, 'alpha': 0.38241970268979175, 'regularization': 0.09388649514745363}


100%|██████████| 10/10 [02:29<00:00, 14.91s/it]
[I 2024-08-12 22:42:13,315] Trial 1 finished with value: 0.03926633485939325 and parameters: {'factors': 81, 'iterations': 10, 'alpha': 0.38241970268979175, 'regularization': 0.09388649514745363}. Best is trial 0 with value: 0.06330376932238622.


NDCG@20 = 0.03926633485939325, Hitrate@20 = 0.247
{'factors': 17, 'iterations': 92, 'alpha': 0.6954211934201072, 'regularization': 0.002047605639633383}


100%|██████████| 92/92 [10:22<00:00,  6.77s/it]
[I 2024-08-12 22:52:55,313] Trial 2 finished with value: 0.041756014839206115 and parameters: {'factors': 17, 'iterations': 92, 'alpha': 0.6954211934201072, 'regularization': 0.002047605639633383}. Best is trial 0 with value: 0.06330376932238622.


NDCG@20 = 0.041756014839206115, Hitrate@20 = 0.247
{'factors': 119, 'iterations': 48, 'alpha': 3.5862085957756022, 'regularization': 0.0960049490259032}


100%|██████████| 48/48 [15:10<00:00, 18.97s/it]
[I 2024-08-12 23:08:35,957] Trial 3 finished with value: 0.0638755979459333 and parameters: {'factors': 119, 'iterations': 48, 'alpha': 3.5862085957756022, 'regularization': 0.0960049490259032}. Best is trial 3 with value: 0.0638755979459333.


NDCG@20 = 0.0638755979459333, Hitrate@20 = 0.353
{'factors': 26, 'iterations': 22, 'alpha': 1.666230408714568, 'regularization': 0.08667541424511797}


100%|██████████| 22/22 [02:52<00:00,  7.84s/it]
[I 2024-08-12 23:11:48,539] Trial 4 finished with value: 0.05324983287982135 and parameters: {'factors': 26, 'iterations': 22, 'alpha': 1.666230408714568, 'regularization': 0.08667541424511797}. Best is trial 3 with value: 0.0638755979459333.


NDCG@20 = 0.05324983287982135, Hitrate@20 = 0.303
{'factors': 68, 'iterations': 54, 'alpha': 4.418667990190787, 'regularization': 0.01606067706081465}


100%|██████████| 54/54 [10:23<00:00, 11.55s/it]
[I 2024-08-12 23:22:37,110] Trial 5 finished with value: 0.06690879504157918 and parameters: {'factors': 68, 'iterations': 54, 'alpha': 4.418667990190787, 'regularization': 0.01606067706081465}. Best is trial 5 with value: 0.06690879504157918.


NDCG@20 = 0.06690879504157918, Hitrate@20 = 0.359
{'factors': 92, 'iterations': 73, 'alpha': 1.9911581010196562, 'regularization': 0.09182906195638411}


100%|██████████| 73/73 [19:26<00:00, 15.98s/it]
[I 2024-08-12 23:42:30,163] Trial 6 finished with value: 0.06050029654230965 and parameters: {'factors': 92, 'iterations': 73, 'alpha': 1.9911581010196562, 'regularization': 0.09182906195638411}. Best is trial 5 with value: 0.06690879504157918.


NDCG@20 = 0.06050029654230965, Hitrate@20 = 0.346
{'factors': 18, 'iterations': 100, 'alpha': 1.1360723629745229, 'regularization': 0.05450445690822245}


100%|██████████| 100/100 [11:23<00:00,  6.83s/it]
[I 2024-08-12 23:54:13,715] Trial 7 finished with value: 0.047772194868587504 and parameters: {'factors': 18, 'iterations': 100, 'alpha': 1.1360723629745229, 'regularization': 0.05450445690822245}. Best is trial 5 with value: 0.06690879504157918.


NDCG@20 = 0.047772194868587504, Hitrate@20 = 0.278
{'factors': 102, 'iterations': 55, 'alpha': 0.38347828096244996, 'regularization': 0.08640738821618109}


100%|██████████| 55/55 [16:08<00:00, 17.60s/it]
[I 2024-08-13 00:10:49,544] Trial 8 finished with value: 0.03766623738635565 and parameters: {'factors': 102, 'iterations': 55, 'alpha': 0.38347828096244996, 'regularization': 0.08640738821618109}. Best is trial 5 with value: 0.06690879504157918.


NDCG@20 = 0.03766623738635565, Hitrate@20 = 0.212
{'factors': 49, 'iterations': 54, 'alpha': 0.27716156883349685, 'regularization': 0.08824373086734844}


100%|██████████| 54/54 [12:33<00:00, 13.95s/it]
[I 2024-08-13 00:23:46,757] Trial 9 finished with value: 0.03804619652358699 and parameters: {'factors': 49, 'iterations': 54, 'alpha': 0.27716156883349685, 'regularization': 0.08824373086734844}. Best is trial 5 with value: 0.06690879504157918.


NDCG@20 = 0.03804619652358699, Hitrate@20 = 0.22
{'factors': 42, 'iterations': 34, 'alpha': 4.744440222362728, 'regularization': 0.006010236210257692}


100%|██████████| 34/34 [06:52<00:00, 12.13s/it]
[I 2024-08-13 00:31:01,477] Trial 10 finished with value: 0.062182033514756326 and parameters: {'factors': 42, 'iterations': 34, 'alpha': 4.744440222362728, 'regularization': 0.006010236210257692}. Best is trial 5 with value: 0.06690879504157918.


NDCG@20 = 0.062182033514756326, Hitrate@20 = 0.342
{'factors': 122, 'iterations': 38, 'alpha': 3.4473786221405587, 'regularization': 0.027122121881937147}


100%|██████████| 38/38 [13:08<00:00, 20.76s/it]
[I 2024-08-13 00:44:41,466] Trial 11 finished with value: 0.06531247580790345 and parameters: {'factors': 122, 'iterations': 38, 'alpha': 3.4473786221405587, 'regularization': 0.027122121881937147}. Best is trial 5 with value: 0.06690879504157918.


NDCG@20 = 0.06531247580790345, Hitrate@20 = 0.355
{'factors': 128, 'iterations': 38, 'alpha': 4.797915829682014, 'regularization': 0.022531824345812516}


100%|██████████| 38/38 [10:44<00:00, 16.95s/it]
[I 2024-08-13 00:55:57,953] Trial 12 finished with value: 0.06524316189501804 and parameters: {'factors': 128, 'iterations': 38, 'alpha': 4.797915829682014, 'regularization': 0.022531824345812516}. Best is trial 5 with value: 0.06690879504157918.


NDCG@20 = 0.06524316189501804, Hitrate@20 = 0.357
{'factors': 74, 'iterations': 68, 'alpha': 3.2597491224851525, 'regularization': 0.027315314666303703}


100%|██████████| 68/68 [15:31<00:00, 13.70s/it]
[I 2024-08-13 01:11:55,246] Trial 13 finished with value: 0.06389482274657038 and parameters: {'factors': 74, 'iterations': 68, 'alpha': 3.2597491224851525, 'regularization': 0.027315314666303703}. Best is trial 5 with value: 0.06690879504157918.


NDCG@20 = 0.06389482274657038, Hitrate@20 = 0.361
{'factors': 106, 'iterations': 30, 'alpha': 4.17929210635546, 'regularization': 0.030950376772445956}


100%|██████████| 30/30 [08:52<00:00, 17.74s/it]
[I 2024-08-13 01:21:15,447] Trial 14 finished with value: 0.06791989334015246 and parameters: {'factors': 106, 'iterations': 30, 'alpha': 4.17929210635546, 'regularization': 0.030950376772445956}. Best is trial 14 with value: 0.06791989334015246.


NDCG@20 = 0.06791989334015246, Hitrate@20 = 0.367
{'factors': 100, 'iterations': 8, 'alpha': 4.148036621533336, 'regularization': 0.04288013023597671}


100%|██████████| 8/8 [01:55<00:00, 14.43s/it]
[I 2024-08-13 01:23:37,743] Trial 15 finished with value: 0.06608140025333727 and parameters: {'factors': 100, 'iterations': 8, 'alpha': 4.148036621533336, 'regularization': 0.04288013023597671}. Best is trial 14 with value: 0.06791989334015246.


NDCG@20 = 0.06608140025333727, Hitrate@20 = 0.347
{'factors': 67, 'iterations': 29, 'alpha': 2.7329435821447756, 'regularization': 0.016694777468894886}


100%|██████████| 29/29 [05:34<00:00, 11.54s/it]
[I 2024-08-13 01:29:37,717] Trial 16 finished with value: 0.06349249402204303 and parameters: {'factors': 67, 'iterations': 29, 'alpha': 2.7329435821447756, 'regularization': 0.016694777468894886}. Best is trial 14 with value: 0.06791989334015246.


NDCG@20 = 0.06349249402204303, Hitrate@20 = 0.344
{'factors': 89, 'iterations': 20, 'alpha': 4.277227363918833, 'regularization': 0.039441262930327056}


100%|██████████| 20/20 [05:58<00:00, 17.90s/it]
[I 2024-08-13 01:36:02,660] Trial 17 finished with value: 0.06590262319955105 and parameters: {'factors': 89, 'iterations': 20, 'alpha': 4.277227363918833, 'regularization': 0.039441262930327056}. Best is trial 14 with value: 0.06791989334015246.


NDCG@20 = 0.06590262319955105, Hitrate@20 = 0.367
{'factors': 62, 'iterations': 48, 'alpha': 4.978026986973774, 'regularization': 0.013004366338293771}


100%|██████████| 48/48 [11:37<00:00, 14.52s/it]
[I 2024-08-13 01:48:02,801] Trial 18 finished with value: 0.06697815505112653 and parameters: {'factors': 62, 'iterations': 48, 'alpha': 4.978026986973774, 'regularization': 0.013004366338293771}. Best is trial 14 with value: 0.06791989334015246.


NDCG@20 = 0.06697815505112653, Hitrate@20 = 0.365
{'factors': 35, 'iterations': 43, 'alpha': 4.871478494485655, 'regularization': 0.00955757921341882}


100%|██████████| 43/43 [06:05<00:00,  8.50s/it]
[I 2024-08-13 01:54:28,859] Trial 19 finished with value: 0.06154546454959164 and parameters: {'factors': 35, 'iterations': 43, 'alpha': 4.871478494485655, 'regularization': 0.00955757921341882}. Best is trial 14 with value: 0.06791989334015246.


NDCG@20 = 0.06154546454959164, Hitrate@20 = 0.352
{'factors': 58, 'iterations': 81, 'alpha': 3.9942623030181412, 'regularization': 0.0018217853167680244}


100%|██████████| 81/81 [18:43<00:00, 13.87s/it]
[I 2024-08-13 02:13:35,046] Trial 20 finished with value: 0.06562423000562359 and parameters: {'factors': 58, 'iterations': 81, 'alpha': 3.9942623030181412, 'regularization': 0.0018217853167680244}. Best is trial 14 with value: 0.06791989334015246.


NDCG@20 = 0.06562423000562359, Hitrate@20 = 0.351
{'factors': 67, 'iterations': 59, 'alpha': 4.972518462610493, 'regularization': 0.014246221516102109}


100%|██████████| 59/59 [11:08<00:00, 11.33s/it]
[I 2024-08-13 02:25:07,119] Trial 21 finished with value: 0.0677818037969061 and parameters: {'factors': 67, 'iterations': 59, 'alpha': 4.972518462610493, 'regularization': 0.014246221516102109}. Best is trial 14 with value: 0.06791989334015246.


NDCG@20 = 0.0677818037969061, Hitrate@20 = 0.362
{'factors': 107, 'iterations': 60, 'alpha': 4.790487331899216, 'regularization': 0.01794149953486915}


100%|██████████| 60/60 [17:10<00:00, 17.18s/it]
[I 2024-08-13 02:42:45,750] Trial 22 finished with value: 0.06729728773843249 and parameters: {'factors': 107, 'iterations': 60, 'alpha': 4.790487331899216, 'regularization': 0.01794149953486915}. Best is trial 14 with value: 0.06791989334015246.


NDCG@20 = 0.06729728773843249, Hitrate@20 = 0.359
{'factors': 111, 'iterations': 61, 'alpha': 4.490832982144006, 'regularization': 0.03246000536313355}


 69%|██████▉   | 42/61 [12:36<05:30, 17.41s/it]

In [29]:
best_params = {'factors': 106, 'iterations': 30, 'alpha': 4.17929210635546, 'regularization': 0.030950376772445956}


In [30]:
fin_als_model = implicit.als.AlternatingLeastSquares(
        random_state=RANDOM_STATE,
    **best_params
    )
fin_als_model.fit(full_user_item_data)

100%|██████████| 30/30 [10:46<00:00, 21.56s/it]


In [31]:
submission = []
_, model_preds = get_recommendations(
        fin_als_model.user_factors,
        fin_als_model.item_factors,
        TOP_K + median_seq_len
    )
for user_id, user_history in tqdm(grouped_df_for_sub.rows()):
    y_rec = [artist_mapping_inverse[artist_id] for artist_id in list(model_preds[user_id]) if artist_id not in user_history]
    user_mapped = user_mapping_inverse[user_id]
    submission.append((user_mapped, y_rec))
    
submission = pl.DataFrame(submission, schema=('user_id', 'y_rec'))
submission.write_parquet('als_sample_submission.parquet')
submission

100%|██████████| 50000/50000 [00:06<00:00, 7749.53it/s]


user_id,y_rec
str,list[str]
"""713a1223-f289-…","[""277c978f-65fd-4d35-9784-feed640f11b6"", ""a040d9c1-5e56-462b-bc5b-6ab6cc5b2844"", … ""ce9b8a75-3e4a-4cee-90ad-85eb28409070""]"
"""b0a343b6-c7a5-…","[""ed437fe5-c894-4ddf-b2ce-d1f91988e32d"", ""ae8e44f9-3b2a-4044-9e63-3d513b4f5cd9"", … ""13145656-b46b-4dba-875f-9b6f7bf5d72e""]"
"""5fbfea6f-0c70-…","[""4536ac75-4cc9-40d5-9cd3-0880fedd748c"", ""299638ac-4f88-4a50-a05f-be46a60c49cd"", … ""36b78ebe-ca4e-4a54-97e9-167e98f26f27""]"
"""a2614007-ced2-…","[""d989225d-6423-4eaa-a3d7-c65c78dd43d3"", ""5013361c-4240-442e-855e-19f0185aa442"", … ""ab189245-1685-4f57-9c11-7ebcad118163""]"
"""a9c2109f-2931-…","[""e3682d95-ca4c-4749-8882-a3bdaaff0649"", ""bc6f0982-2adc-41ce-a02e-e82b4fad02a3"", … ""ed6fe0ef-a143-40c2-a91a-1b1616af63d8""]"
"""ffa7abbe-864f-…","[""79e81ed7-39fd-4046-a942-e3a0ded32039"", ""0f692d11-9a2a-4964-bfaa-3841b4d537cb"", … ""ebb32beb-22d5-404e-8b3d-c60370cc633c""]"
"""49be3592-56ff-…","[""b6908c95-ed36-4bc1-bd0a-0ad196a2e387"", ""87a7508f-415e-4080-8bff-8ff94cfec4a6"", … ""392d5791-06c0-4eb5-8b8c-be660a1d634e""]"
"""4372c259-eef6-…","[""ce4b8396-8061-4225-a1ed-0528f1ca3d1a"", ""ef2c57a7-d053-496d-928c-160833e0bb13"", … ""a8d46026-b06b-40b6-bd07-92592f81bbc7""]"
"""c9a7e0b0-9cdd-…","[""8097f9bc-0641-463a-9295-f577bc852cbd"", ""5013361c-4240-442e-855e-19f0185aa442"", … ""226ac710-61d9-4083-8e31-41c354483c86""]"
"""0363a944-4dc6-…","[""a6cdae3d-16ae-430a-8716-b82f64ed758d"", ""fbd6e44d-d56a-45d5-bf6d-49ec4bef01b1"", … ""1c50ce14-9b6a-47f1-af0f-11992de06535""]"


#### LFM

In [63]:
lightfm_model = LightFM(    
    no_components=45,
    loss='warp',)
lightfm_model.fit(user_item_data, epochs=100, verbose=True)
evaluate_model(lightfm_model, grouped_df_with_inds_sampled
    )

Epoch: 100%|██████████| 100/100 [10:23<00:00,  6.23s/it]


(0.03379106941506322, 0.231)

In [13]:
def get_recommendations(user_embs: np.array, item_embs: np.array, k: int = TOP_K):
    # строим индекс объектов
    index = faiss.IndexFlatIP(item_embs.shape[1])
    index.add(item_embs)

    # строим рекомендации с помощью dot-product расстояния
    # с запасом, чтобы после фильтрации просмотренных осталось как минимум TOP_K
    return index.search(user_embs, TOP_K * 3)

In [14]:
def evaluate_model(model, df):
    # строим рекомендации в виде KNN алгоритма поверх эмбеддингов пользователей и артистов
    _, recs = get_recommendations(
        model.user_embeddings,
        model.item_embeddings,
        TOP_K + median_seq_len
    )
    
    # будем отслеживать как ndcg, так и hitrate метрики
    ndcg_list = []
    hitrate_list = []
    
    for user_id, user_history, y_rel in df.select(
        'user_id', 'train_item_ids', 'test_item_ids'
    ).rows():
        y_rec = [
                item_id
                # чтобы точно хватило рекомендаций, добавим еще топовых артистов в конец списка
                for item_id in list(recs[user_id])
                if item_id not in user_history
            ]
        hitrate_list.append(user_hitrate(y_rel, y_rec))
        ndcg_list.append(user_ndcg(y_rel, y_rec))
        
    mean_ndcg = np.mean(ndcg_list)
    mean_hitrate = np.mean(hitrate_list)
    return mean_ndcg, mean_hitrate

In [None]:
def objective(trial):
    no_components = trial.suggest_int('no_components', 8, 64)
    epochs = trial.suggest_int('epochs', 5, 10)
    alpha = trial.suggest_float('alpha', 0., 1e-3)
        
    print({
        'no_components': no_components,
        'alpha': alpha,
    })
    
    set_seed()
    try:
        # модель может бросить исключение, если регуляризация слишком сильная
        lightfm_model = LightFM(
            no_components=no_components,
            loss="warp",
            item_alpha=alpha,
            user_alpha=alpha,
            k=TOP_K,
            random_state=RANDOM_STATE,
        )
        lightfm_model.fit(user_item_data, epochs=epochs, verbose=True)
    except:
        return None
    
    mean_hitrate = evaluate_model(lightfm_model, grouped_df_with_inds_sampled
    )
    print(f'Hitrate@{TOP_K} = {mean_hitrate}')
    return mean_hitrate
    

study = optuna.create_study(directions=('maximize',))
study.optimize(objective, n_trials=5)

study.best_params

In [21]:
fin_lfm_model = LightFM(
    random_state=42,
        no_components=70,
    loss='warp',
    # k=TOP_K,
    # **study.best_params
    )
fin_lfm_model.fit(full_user_item_data, epochs=200, verbose=True)


Epoch: 100%|██████████| 200/200 [27:24<00:00,  8.22s/it]


<lightfm.lightfm.LightFM at 0x7fe41bee92b0>

In [23]:
submission = []
_, model_preds = get_recommendations(
        fin_lfm_model.user_embeddings,
        fin_lfm_model.item_embeddings,
        TOP_K + median_seq_len
    )
for user_id, user_history in tqdm(grouped_df_for_sub.rows()):
    y_rec = [artist_mapping_inverse[artist_id] for artist_id in list(model_preds[user_id]) if artist_id not in user_history]
    user_mapped = user_mapping_inverse[user_id]
    submission.append((user_mapped, y_rec))
    
submission = pl.DataFrame(submission, schema=('user_id', 'y_rec'))
submission.write_parquet('lfm_sample_submission.parquet')
submission

100%|██████████| 50000/50000 [00:05<00:00, 8873.27it/s]


user_id,y_rec
str,list[str]
"""ff5e5c2c-1537-…","[""70e3a077-aaad-4465-b79e-6715737e2f04"", ""7d66b5d1-3be2-46b7-920c-3e579aa8e9fd"", … ""9f5d6343-cc6f-417c-91c5-226e8d261187""]"
"""fcfabec9-f501-…","[""9ab3cad6-d492-4fa4-8683-c50756b51371"", ""bd85ef81-6bc2-4d28-acfa-de354921b11f"", … ""295bd604-37cd-4456-bb05-fe96966611e8""]"
"""60746760-f451-…","[""400fbdf5-3f36-472b-a943-e52e9a2f6dec"", ""0daff9a2-b902-4e64-a90c-8b5f0cb3d0b0"", … ""f7cb238b-ba69-48ce-ae3f-333d3619d497""]"
"""1124f339-61f5-…","[""f022290f-de4b-44be-b021-229d3eb5628a"", ""c34aaa22-16bf-4c3e-8cba-f422ef193288"", … ""5758feb5-5288-41b9-b586-70d343e391f3""]"
"""da08589a-9801-…","[""5c346b00-1039-41b4-9639-7bdc29b8cf02"", ""d2ea9497-3057-4ef8-909e-26a18cd6b6b8"", … ""88a38c77-efca-4aa8-ac0c-6f344985f473""]"
"""c0ac9573-4b8e-…","[""288e8dbe-182b-4b0a-a2da-03244077087e"", ""77091edc-2cd2-4cef-84ff-7c9a5c87516e"", … ""b2f8f6ff-9705-40d6-b954-e80aa6b2ddb4""]"
"""90394ff5-d454-…","[""6dc62a0a-5226-494f-bedf-249bc30ecb4f"", ""01943870-3d38-4d10-a16c-2efd44b7c249"", … ""aecbacdc-bebc-47e0-bcd9-5e4a3708ff05""]"
"""58a2217b-7b2a-…","[""08b167f8-231c-49a5-8d57-65a10f5f8d8d"", ""9949924a-8d58-420d-af3c-bd444085f48a"", … ""f0e56855-988a-4a99-8e06-1140d522097e""]"
"""1d55262a-bed9-…","[""0d0013d0-9fa6-4984-8ef9-a59c3f6b6460"", ""5cfeb0ed-c5ca-464b-9008-e95315dc6315"", … ""758ad232-ed4f-41c3-82f2-55641caaad4e""]"
"""f4b7b9fa-8409-…","[""92f2e07f-24b5-41bd-9d87-15c355cbba08"", ""38779559-5271-4f7b-919c-71cd08bf33ef"", … ""71c5c296-fe1f-46ea-a93b-3a7872e9976e""]"


##### Комбинирование

In [32]:
w2v_preds = pl.read_parquet('w2v_sample_submission.parquet')
als_preds = pl.read_parquet('als_sample_submission.parquet')
lfm_preds = pl.read_parquet('lfm_sample_submission.parquet')

In [37]:
temperature = 40

submissions = ['w2v', 'als', 'lfm']
# веса получены по рузальтатам оценки в LMS
submission_weights = np.array([0.0419,  0.0805,  0.0451])
# в качестве итоговых весов используется softmax с температурой
# чем выше температура, тем больше будет перекос в сторону сильного решения
submission_weights = np.exp(submission_weights * temperature) / \
    np.exp(submission_weights * temperature).sum()
print(submission_weights)

submissions_df = [
    (
        pl.read_parquet(f'{submission}_sample_submission.parquet')
        .with_columns([pl.col('y_rec').alias(f'{submission}_rec')])
        .drop('y_rec')
    )
    for submission in submissions
]

[0.14663103 0.68671504 0.16665393]


In [40]:
joined_submission_df = submissions_df[0]
for df in submissions_df[1:]:
    joined_submission_df = joined_submission_df.join(df, 'user_id')
joined_submission_df.head()

user_id,w2v_rec,als_rec,lfm_rec
str,list[str],list[str],list[str]
"""ff5e5c2c-1537-…","[""9e809110-0e15-4d6d-97b8-7649a99d2bdd"", ""9b8a5964-fede-421a-bd5e-cb7ec011b1ae"", … ""7c5b4e7e-4929-4d34-a55b-e28e28a8ea3c""]","[""ecf1a315-88ef-474d-9976-113f4989bfb1"", ""a20d6155-6010-4221-9a3d-ae629ab36033"", … ""6f1b8a85-c127-40a6-84be-d659aefc99e8""]","[""70e3a077-aaad-4465-b79e-6715737e2f04"", ""7d66b5d1-3be2-46b7-920c-3e579aa8e9fd"", … ""9f5d6343-cc6f-417c-91c5-226e8d261187""]"
"""fcfabec9-f501-…","[""148a5aca-2f10-4577-afe9-ca83e9e56bad"", ""42cee962-0f50-4728-b887-01cb7a207075"", … ""93bf07d3-5233-4270-a3c4-9815e9d786da""]","[""35a3b882-19ee-421c-8135-3bd7f7773b4c"", ""4f0b2eeb-ae54-46d9-b5fa-6f4d315dbe6b"", … ""f251e755-a8c7-424e-a5d6-a245d027c832""]","[""9ab3cad6-d492-4fa4-8683-c50756b51371"", ""bd85ef81-6bc2-4d28-acfa-de354921b11f"", … ""295bd604-37cd-4456-bb05-fe96966611e8""]"
"""60746760-f451-…","[""400fbdf5-3f36-472b-a943-e52e9a2f6dec"", ""abc5ae9c-edd7-4b10-9448-b28b5072ed87"", … ""e76e1635-3220-4549-8d03-88fba0bd1ba0""]","[""400fbdf5-3f36-472b-a943-e52e9a2f6dec"", ""858d2c2a-b2ad-44fe-a0ed-1f228e81fe10"", … ""58d46cd3-89cb-4813-8e96-210e00ea44a1""]","[""400fbdf5-3f36-472b-a943-e52e9a2f6dec"", ""0daff9a2-b902-4e64-a90c-8b5f0cb3d0b0"", … ""f7cb238b-ba69-48ce-ae3f-333d3619d497""]"
"""1124f339-61f5-…","[""da33b22a-7c7d-43b3-b23c-ea33e11acae5"", ""f14d0a11-c600-44b5-8417-dc13d58f3d7d"", … ""771bbc4a-aea7-4dff-8e07-956103bc7a7d""]","[""d9c0f58a-d2d4-4f51-945e-eeb93155fb32"", ""42cee962-0f50-4728-b887-01cb7a207075"", … ""04d1389c-ca3d-405c-9d49-621336239a0c""]","[""f022290f-de4b-44be-b021-229d3eb5628a"", ""c34aaa22-16bf-4c3e-8cba-f422ef193288"", … ""5758feb5-5288-41b9-b586-70d343e391f3""]"
"""da08589a-9801-…","[""2027c6aa-3608-4db5-9c0c-79f74090c245"", ""a20d6155-6010-4221-9a3d-ae629ab36033"", … ""17aebad2-c1fe-47f9-a452-ea87ada80eff""]","[""a20d6155-6010-4221-9a3d-ae629ab36033"", ""0e8c98f2-e72c-453c-b7f2-d8376a80cf2f"", … ""734d6169-9de7-41cc-82c2-1bf5698caad2""]","[""5c346b00-1039-41b4-9639-7bdc29b8cf02"", ""d2ea9497-3057-4ef8-909e-26a18cd6b6b8"", … ""88a38c77-efca-4aa8-ac0c-6f344985f473""]"


In [45]:
for user_id, *submission_recs in tqdm(joined_submission_df.rows()):
    submission_recs

100%|██████████| 50000/50000 [00:00<00:00, 1320117.59it/s]


In [49]:
from collections import defaultdict

In [50]:
submission = []
for user_id, *submission_recs in tqdm(joined_submission_df.rows()):
    item_id_weight = defaultdict(float)

    for i, y_rec in enumerate(submission_recs):
        for pos, item_id in enumerate(y_rec):
            # в качестве веса используем вес позиции в метрике ndcg и метрику качества метода
            item_id_weight[item_id] += submission_weights[i] * 1 / np.log2(pos + 2)

    y_rec = [
        item_id
        for item_id, _ in sorted(item_id_weight.items(), key=lambda x: -x[1])
    ][:TOP_K]
    
    submission.append((user_id, y_rec))
    
submission = pl.DataFrame(submission, schema=('user_id', 'y_rec'))
submission.write_parquet('weighted_submission.parquet')
submission

100%|██████████| 50000/50000 [00:23<00:00, 2133.38it/s]


user_id,y_rec
str,list[str]
"""ff5e5c2c-1537-…","[""ecf1a315-88ef-474d-9976-113f4989bfb1"", ""a20d6155-6010-4221-9a3d-ae629ab36033"", … ""1bf3b917-ebd7-407e-a36b-61f2a449b9d9""]"
"""fcfabec9-f501-…","[""35a3b882-19ee-421c-8135-3bd7f7773b4c"", ""4f0b2eeb-ae54-46d9-b5fa-6f4d315dbe6b"", … ""0e5fe2f7-a00a-42e7-96e3-2e036eab84b8""]"
"""60746760-f451-…","[""400fbdf5-3f36-472b-a943-e52e9a2f6dec"", ""858d2c2a-b2ad-44fe-a0ed-1f228e81fe10"", … ""4abe7731-7e74-4d86-80c8-bb43d832af3a""]"
"""1124f339-61f5-…","[""d9c0f58a-d2d4-4f51-945e-eeb93155fb32"", ""42cee962-0f50-4728-b887-01cb7a207075"", … ""25c6813c-1a83-4479-9587-df9c4458ee49""]"
"""da08589a-9801-…","[""a20d6155-6010-4221-9a3d-ae629ab36033"", ""0e8c98f2-e72c-453c-b7f2-d8376a80cf2f"", … ""e3d909e2-d1f6-4197-9a46-946ca9686de3""]"
"""c0ac9573-4b8e-…","[""24a968fc-b05f-4079-9a70-471f30facca3"", ""0f3684df-3b34-4324-b228-9eb2e53619b8"", … ""ab189245-1685-4f57-9c11-7ebcad118163""]"
"""90394ff5-d454-…","[""8868eefb-4f5b-4d3a-8e71-f3e0d2a71179"", ""8ce3d372-83e6-4001-bb6e-840dbdcd891b"", … ""7f207da8-3297-4f6b-938f-a7e3c87e4c10""]"
"""58a2217b-7b2a-…","[""8412f042-a50b-4dbb-98e5-20421c068777"", ""545b5b92-e96f-40d9-9c19-6f21c91861ae"", … ""f00a572a-1dfa-404f-8614-f12659303b32""]"
"""1d55262a-bed9-…","[""c4f88140-a914-4c43-9d51-ae2cc7488eab"", ""5cfeb0ed-c5ca-464b-9008-e95315dc6315"", … ""bd3d98d4-2b4b-4a7f-9daf-ce68f7a9a469""]"
"""f4b7b9fa-8409-…","[""1d763d72-7915-46df-9924-5a8d8b3c5da6"", ""3bcf5b33-5912-4666-be5e-cbae8d1d363a"", … ""2e8fa39d-3770-4ea7-89f6-3430f8d61bab""]"
