In [1]:
import pandas as pd, numpy as np, polars as pl
from tqdm.notebook import tqdm
import os, sys, pickle, glob, gc
from collections import Counter
from pandarallel import pandarallel
from lightgbm import LGBMRanker
import lightgbm as lgb

pandarallel.initialize(nb_workers=28, progress_bar=False)



INFO: Pandarallel will run on 28 workers.
INFO: Pandarallel will use Memory file system to transfer data between the main process and workers.


### 1 - Init variables

In [2]:
# candidate file path
candidate_file = './data/candidates.pgt'

# rerank model results
# click_rerank_result = './data/'
cart_rerank_result = './data/cart_rerank_result.pgt'
order_rerank_result = './data/order_rerank_result.pgt'

In [3]:
%%time
# loading candidates generate by co visitation matrix
candidates = pl.read_parquet(candidate_file)
candidates

CPU times: user 3.29 s, sys: 913 ms, total: 4.2 s
Wall time: 8.5 s


session_type,labels,__index_level_0__
str,str,i64
"""12899779_click...","""59625 1253524 ...",0
"""12899780_click...","""1142000 736515...",1
"""12899781_click...","""918667 199008 ...",2
"""12899782_click...","""1007613 595994...",3
"""12899783_click...","""1817895 607638...",4
"""12899784_click...","""1190477 22981 ...",5
"""12899785_click...","""1497876 775584...",6
"""12899786_click...","""955252 1632910...",7
"""12899787_click...","""1024433 168275...",8
"""12899788_click...","""1663048 125991...",9


In [4]:
%%time
# loading rerank results
cart_rerank_aids = pl.read_parquet(cart_rerank_result)
order_rerank_aids = pl.read_parquet(order_rerank_result)

CPU times: user 523 ms, sys: 147 ms, total: 671 ms
Wall time: 1.15 s


In [5]:
cart_rerank_aids.head(3)

session_type,labels,__index_level_0__
str,str,i64
"""14571581_carts...","""285653""",0
"""14571580_carts...","""989688 1246483...",1
"""14571578_carts...","""538003 326064 ...",2


In [6]:
order_rerank_aids.head(3)

session_type,labels,__index_level_0__
str,str,i64
"""14571581_order...","""1791780""",0
"""14571580_order...","""871658 1627186...",1
"""14571579_order...","""1286038 105174...",2


In [11]:
# replace cart and order candidates by cart rerank model results
submission_file = (
    cart_rerank_aids.vstack(
        order_rerank_aids
    )
    .vstack(
        candidates.filter(pl.col('session_type').str.contains('clicks'))
    )
    .select(
        pl.col(['session_type', 'labels'])
    )
)

submission_file

session_type,labels
str,str
"""14571581_carts...","""285653"""
"""14571580_carts...","""989688 1246483..."
"""14571578_carts...","""538003 326064 ..."
"""14571577_carts...","""932022 842555"""
"""14571576_carts...","""337471 1198015..."
"""14571575_carts...","""1257071"""
"""14571574_carts...","""1124532 290117..."
"""14571573_carts...","""599995 853357"""
"""14571572_carts...","""1679224 982938..."
"""14571571_carts...","""343267 871938"""


In [13]:
submission_file.write_csv('submission.csv')