Radek posted about this [here](https://www.kaggle.com/competitions/h-and-m-personalized-fashion-recommendations/discussion/309220), and linked to a GitHub repo with the code.

I just transferred that code here to Kaggle notebooks, that's all.

In [1]:
import numpy as np

def apk(actual, predicted, k=10):
    """
    Computes the average precision at k.

    This function computes the average precision at k between two lists of
    items.

    Parameters
    ----------
    actual : list
             A list of elements that are to be predicted (order doesn't matter)
    predicted : list
                A list of predicted elements (order does matter)
    k : int, optional
        The maximum number of predicted elements

    Returns
    -------
    score : double
            The average precision at k over the input lists

    """
    if len(predicted)>k:
        predicted = predicted[:k]

    score = 0.0
    num_hits = 0.0

    for i,p in enumerate(predicted):
        if p in actual and p not in predicted[:i]:
            num_hits += 1.0
            score += num_hits / (i+1.0)

    if not actual:
        return 0.0

    return score / min(len(actual), k)

def mapk(actual, predicted, k=10):
    """
    Computes the mean average precision at k.

    This function computes the mean average prescision at k between two lists
    of lists of items.

    Parameters
    ----------
    actual : list
             A list of lists of elements that are to be predicted 
             (order doesn't matter in the lists)
    predicted : list
                A list of lists of predicted elements
                (order matters in the lists)
    k : int, optional
        The maximum number of predicted elements

    Returns
    -------
    score : double
            The mean average precision at k over the input lists

    """
    return np.mean([apk(a,p,k) for a,p in zip(actual, predicted)])

In [2]:
from sklearn.base import BaseEstimator, TransformerMixin
import numpy as np

# https://www.kaggle.com/c/h-and-m-personalized-fashion-recommendations/discussion/308635
def customer_hex_id_to_int(series):
    return series.str[-16:].apply(hex_id_to_int)

def hex_id_to_int(str):
    return int(str[-16:], 16)

def article_id_str_to_int(series):
    return series.astype('int32')

def article_id_int_to_str(series):
    return '0' + series.astype('str')

class Categorize(BaseEstimator, TransformerMixin):
    def __init__(self, min_examples=0):
        self.min_examples = min_examples
        self.categories = []
        
    def fit(self, X):
        for i in range(X.shape[1]):
            vc = X.iloc[:, i].value_counts()
            self.categories.append(vc[vc > self.min_examples].index.tolist())
        return self

    def transform(self, X):
        data = {X.columns[i]: pd.Categorical(X.iloc[:, i], categories=self.categories[i]).codes for i in range(X.shape[1])}
        return pd.DataFrame(data=data)


def calculate_apk(list_of_preds, list_of_gts):
    # for fast validation this can be changed to operate on dicts of {'cust_id_int': [art_id_int, ...]}
    # using 'data/val_week_purchases_by_cust.pkl'
    apks = []
    for preds, gt in zip(list_of_preds, list_of_gts):
        apks.append(apk(gt, preds, k=12))
    return np.mean(apks)

def eval_sub(sub_csv, skip_cust_with_no_purchases=True):
    sub=pd.read_csv(sub_csv)
    validation_set=pd.read_parquet('../../data/validation_ground_truth.parquet')

    apks = []

    no_purchases_pattern = []
    for pred, gt in zip(sub.prediction.str.split(), validation_set.prediction.str.split()):
        if skip_cust_with_no_purchases and (gt == no_purchases_pattern): continue
        apks.append(apk(gt, pred, k=12))
    return np.mean(apks)

In [3]:
import pandas as pd

In [4]:
%%time

transactions = pd.read_parquet('../../data/h-and-m-personalized-fashion-recommendations/transactions_train.parquet')
customers = pd.read_parquet('../../data/h-and-m-personalized-fashion-recommendations/customers.parquet')
articles = pd.read_parquet('../../data/h-and-m-personalized-fashion-recommendations/articles.parquet')

# sample = 0.05
# transactions = pd.read_parquet(f'data/transactions_train_sample_{sample}.parquet')
# customers = pd.read_parquet(f'data/customers_sample_{sample}.parquet')
# articles = pd.read_parquet(f'data/articles_train_sample_{sample}.parquet')

CPU times: total: 11.8 s
Wall time: 7.95 s


# Prune

In [5]:
from pruning.prune_outdated_items import prune_outdated_items

articles, transactions = prune_outdated_items(articles, transactions, cutoff_days=365)

# Add my own features

## Age group

In [6]:
%%time

from features.add_age_group import add_age_group

customers['age_group'] = add_age_group(customers)

CPU times: total: 31.2 ms
Wall time: 25 ms


## Gender

In [7]:
%%time

from features.add_gender import add_gender

customers['gender'] = add_gender(customers, transactions, articles)

CPU times: total: 20.5 s
Wall time: 20.6 s


# Gender score

In [8]:
%%time

from features.add_gender import add_gender_scores_to_articles

articles['gender_score'] = add_gender_scores_to_articles(articles)

CPU times: total: 15.6 ms
Wall time: 11.3 ms


# Seasonal Score

In [9]:
%%time

from features.add_season_score import add_season_scores
from utils.season import Seasons

for season in Seasons.seasons:
    season.set_max_score_day_range(15)

transactions = transactions.merge(add_season_scores(transactions), on='article_id', how='left')

CPU times: total: 50.4 s
Wall time: 50.8 s


In [10]:
transactions.head()

Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id,spring,summer,fall,winter
0,2018-09-20,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,541518023,0.030492,2,202,28,326,769
1,2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687003,0.016932,2,0,0,314,444
2,2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687004,0.016932,2,47,89,322,253
3,2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687001,0.016932,2,21,17,293,170
4,2018-09-20,00083cda041544b2fbb0e0d2905ad17da7cf1007526fb4...,501323011,0.053373,1,296,193,426,737


# Unique customer popularity

In [11]:
%%time

from features.add_article_unique_customers_popularity import add_article_unique_customers_popularity

articles['unique_customer_popularity'] = add_article_unique_customers_popularity(articles, transactions)

CPU times: total: 30.8 s
Wall time: 31.1 s


# Prepare data

In [12]:
transactions['customer_id'] = customer_hex_id_to_int(transactions['customer_id'])
transactions.t_dat = pd.to_datetime(transactions.t_dat, format='%Y-%m-%d')
transactions['week'] = (transactions['t_dat'] - transactions['t_dat'].min()).dt.days // 7
transactions.article_id = article_id_str_to_int(transactions.article_id)
articles.article_id = article_id_str_to_int(articles.article_id)
transactions.week = transactions.week.astype('int8')
transactions.sales_channel_id = transactions.sales_channel_id.astype('int8')
transactions.price = transactions.price.astype('float32')

customers.customer_id = customer_hex_id_to_int(customers.customer_id)
for col in ['FN', 'Active', 'age']:
    customers[col].fillna(-1, inplace=True)
    customers[col] = customers[col].astype('int8')

customers.club_member_status = Categorize().fit_transform(customers[['club_member_status']]).club_member_status
customers.postal_code = Categorize().fit_transform(customers[['postal_code']]).postal_code
customers.fashion_news_frequency = Categorize().fit_transform(customers[['fashion_news_frequency']]).fashion_news_frequency
customers.gender = Categorize().fit_transform(customers[['gender']]).gender

for col in articles.columns:
    if articles[col].dtype == 'object':
        articles[col] = Categorize().fit_transform(articles[[col]])[col]

for col in articles.columns:
    if articles[col].dtype == 'int64':
        articles[col] = articles[col].astype('int32')

transactions.sort_values(['t_dat', 'customer_id'], inplace=True)

In [13]:
test_week = transactions.week.max() + 1
transactions = transactions[transactions.week > transactions.week.max() - 10]

# Generating candidates

### Last purchase candidates

In [14]:
%%time

c2weeks = transactions.groupby('customer_id')['week'].unique()

CPU times: total: 9.92 s
Wall time: 9.93 s


In [15]:
transactions.groupby('week')['t_dat'].agg(['min', 'max'])

Unnamed: 0_level_0,min,max
week,Unnamed: 1_level_1,Unnamed: 2_level_1
95,2020-07-16,2020-07-22
96,2020-07-23,2020-07-29
97,2020-07-30,2020-08-05
98,2020-08-06,2020-08-12
99,2020-08-13,2020-08-19
100,2020-08-20,2020-08-26
101,2020-08-27,2020-09-02
102,2020-09-03,2020-09-09
103,2020-09-10,2020-09-16
104,2020-09-17,2020-09-22


In [16]:
c2weeks

customer_id
28847241659200          [95, 96, 101, 102]
41318098387474                        [98]
116809474287335                 [100, 103]
200292573348128          [95, 96, 98, 101]
248294615847351                       [95]
                               ...        
18446624797007271432                  [95]
18446630855572834764                 [103]
18446662237889060501                  [99]
18446705133201055310                 [102]
18446737527580148316                 [104]
Name: week, Length: 432676, dtype: object

In [17]:
%%time

c2weeks2shifted_weeks = {}

for c_id, weeks in c2weeks.items():
    c2weeks2shifted_weeks[c_id] = {}
    for i in range(weeks.shape[0]-1):
        c2weeks2shifted_weeks[c_id][weeks[i]] = weeks[i+1]
    c2weeks2shifted_weeks[c_id][weeks[-1]] = test_week

CPU times: total: 328 ms
Wall time: 335 ms


In [18]:
# c2weeks2shifted_weeks[28847241659200]

In [19]:
candidates_last_purchase = transactions.copy()

In [20]:
%%time

weeks = []
for i, (c_id, week) in enumerate(zip(transactions['customer_id'], transactions['week'])):
    weeks.append(c2weeks2shifted_weeks[c_id][week])
    
candidates_last_purchase.week=weeks

CPU times: total: 12.9 s
Wall time: 13 s


In [21]:
# candidates_last_purchase[candidates_last_purchase['customer_id']==272412481300040]

In [22]:
# transactions[transactions['customer_id']==272412481300040]

### Bestsellers candidates

In [23]:
mean_price = transactions \
    .groupby(['week', 'article_id'])['price'].mean()

In [24]:
mean_price

week  article_id
95    108775015     0.003842
      108775044     0.008246
      110065001     0.006085
      110065002     0.006085
      111565001     0.004194
                      ...   
104   952267001     0.013732
      952938001     0.048288
      953450001     0.016932
      953763001     0.021847
      956217002     0.059034
Name: price, Length: 195337, dtype: float32

In [25]:
sales = transactions \
    .groupby('week')['article_id'].value_counts() \
    .groupby('week').rank(method='dense', ascending=False) \
    .groupby('week').head(12).rename('bestseller_rank').astype('int8')

In [26]:
sales

week  article_id
95    866731001      1
      760084003      2
      600886001      3
      706016001      4
      610776002      5
                    ..
104   915529005      8
      915529003      9
      714790020     10
      448509014     10
      762846027     11
Name: bestseller_rank, Length: 120, dtype: int8

In [27]:
sales.loc[95]

article_id
866731001     1
760084003     2
600886001     3
706016001     4
610776002     5
372860002     6
717490064     7
827968001     8
817354001     9
877278002    10
547780003    11
866731003    12
Name: bestseller_rank, dtype: int8

In [28]:
bestsellers_previous_week = pd.merge(sales, mean_price, on=['week', 'article_id']).reset_index()
bestsellers_previous_week.week += 1

In [29]:
bestsellers_previous_week.pipe(lambda df: df[df['week']==96])

Unnamed: 0,week,article_id,bestseller_rank,price
0,96,866731001,1,0.024921
1,96,760084003,2,0.025112
2,96,600886001,3,0.023237
3,96,706016001,4,0.03322
4,96,610776002,5,0.008284
5,96,372860002,6,0.013181
6,96,717490064,7,0.008278
7,96,827968001,8,0.016509
8,96,817354001,9,0.021523
9,96,877278002,10,0.025014


In [30]:
unique_transactions = transactions \
    .groupby(['week', 'customer_id']) \
    .head(1) \
    .drop(columns=['article_id', 'price']) \
    .copy()

In [31]:
unique_transactions

Unnamed: 0,t_dat,customer_id,sales_channel_id,spring,summer,fall,winter,week
26628568,2020-07-16,1520973890714130,2,371,892,111,0,95
26628122,2020-07-16,1827730561464445,2,778,1735,626,504,95
26626529,2020-07-16,2269114073915007,1,0,31,122,0,95
26653971,2020-07-16,5082210391606505,2,0,45,123,0,95
26628186,2020-07-16,6930054433895293,1,0,15,224,0,95
...,...,...,...,...,...,...,...,...
29301924,2020-09-22,18435221511488011015,1,0,0,569,0,104
29323970,2020-09-22,18436859303155335645,1,19,27,1532,82,104
29328987,2020-09-22,18437941771381362708,2,0,0,642,0,104
29317758,2020-09-22,18438270306572912089,1,0,0,647,0,104


In [32]:
transactions.drop_duplicates(['week', 'customer_id'])

Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id,spring,summer,fall,winter,week
26628568,2020-07-16,1520973890714130,808691001,0.025407,2,371,892,111,0,95
26628122,2020-07-16,1827730561464445,764621001,0.011847,2,778,1735,626,504,95
26626529,2020-07-16,2269114073915007,876416001,0.022017,1,0,31,122,0,95
26653971,2020-07-16,5082210391606505,892254002,0.050831,2,0,45,123,0,95
26628186,2020-07-16,6930054433895293,624486091,0.010153,1,0,15,224,0,95
...,...,...,...,...,...,...,...,...,...,...
29301924,2020-09-22,18435221511488011015,573085055,0.033881,1,0,0,569,0,104
29323970,2020-09-22,18436859303155335645,801447001,0.030492,1,19,27,1532,82,104
29328987,2020-09-22,18437941771381362708,907188001,0.050831,2,0,0,642,0,104
29317758,2020-09-22,18438270306572912089,751471043,0.033881,1,0,0,647,0,104


In [33]:
candidates_bestsellers = pd.merge(
    unique_transactions,
    bestsellers_previous_week,
    on='week',
)

In [34]:
test_set_transactions = unique_transactions.drop_duplicates('customer_id').reset_index(drop=True)
test_set_transactions.week = test_week

In [35]:
test_set_transactions

Unnamed: 0,t_dat,customer_id,sales_channel_id,spring,summer,fall,winter,week
0,2020-07-16,1520973890714130,2,371,892,111,0,105
1,2020-07-16,1827730561464445,2,778,1735,626,504,105
2,2020-07-16,2269114073915007,1,0,31,122,0,105
3,2020-07-16,5082210391606505,2,0,45,123,0,105
4,2020-07-16,6930054433895293,1,0,15,224,0,105
...,...,...,...,...,...,...,...,...
432671,2020-09-22,18410229429441241008,2,0,0,593,0,105
432672,2020-09-22,18417769707947924979,2,1629,1763,1008,626,105
432673,2020-09-22,18418054986721795659,2,0,0,476,0,105
432674,2020-09-22,18421175435799911749,2,0,0,647,0,105


In [36]:
candidates_bestsellers_test_week = pd.merge(
    test_set_transactions,
    bestsellers_previous_week,
    on='week'
)

In [37]:
candidates_bestsellers = pd.concat([candidates_bestsellers, candidates_bestsellers_test_week])
candidates_bestsellers.drop(columns='bestseller_rank', inplace=True)

In [38]:
candidates_bestsellers

Unnamed: 0,t_dat,customer_id,sales_channel_id,spring,summer,fall,winter,week,article_id,price
0,2020-07-23,375055163245029,2,1800,1688,1547,1421,96,866731001,0.024921
1,2020-07-23,375055163245029,2,1800,1688,1547,1421,96,760084003,0.025112
2,2020-07-23,375055163245029,2,1800,1688,1547,1421,96,600886001,0.023237
3,2020-07-23,375055163245029,2,1800,1688,1547,1421,96,706016001,0.033220
4,2020-07-23,375055163245029,2,1800,1688,1547,1421,96,610776002,0.008284
...,...,...,...,...,...,...,...,...,...,...
5192107,2020-09-22,18438270306572912089,1,0,0,647,0,105,915529005,0.033418
5192108,2020-09-22,18438270306572912089,1,0,0,647,0,105,915529003,0.033438
5192109,2020-09-22,18438270306572912089,1,0,0,647,0,105,714790020,0.050074
5192110,2020-09-22,18438270306572912089,1,0,0,647,0,105,448509014,0.041563


# Combining transactions and candidates / negative examples

In [39]:
transactions['purchased'] = 1

In [40]:
data = pd.concat([transactions, candidates_last_purchase, candidates_bestsellers])
data.purchased.fillna(0, inplace=True)

In [41]:
data

Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id,spring,summer,fall,winter,week,purchased
26628568,2020-07-16,1520973890714130,808691001,0.025407,2,371,892,111,0,95,1.0
26628122,2020-07-16,1827730561464445,764621001,0.011847,2,778,1735,626,504,95,1.0
26628123,2020-07-16,1827730561464445,764621001,0.011847,2,778,1735,626,504,95,1.0
26628124,2020-07-16,1827730561464445,890683001,0.050831,2,0,850,594,0,95,1.0
26628125,2020-07-16,1827730561464445,890683001,0.050831,2,0,850,594,0,95,1.0
...,...,...,...,...,...,...,...,...,...,...,...
5192107,2020-09-22,18438270306572912089,915529005,0.033418,1,0,0,647,0,105,0.0
5192108,2020-09-22,18438270306572912089,915529003,0.033438,1,0,0,647,0,105,0.0
5192109,2020-09-22,18438270306572912089,714790020,0.050074,1,0,0,647,0,105,0.0
5192110,2020-09-22,18438270306572912089,448509014,0.041563,1,0,0,647,0,105,0.0


In [42]:
data.drop_duplicates(['customer_id', 'article_id', 'week'], inplace=True)

In [43]:
data.purchased.mean()

0.13569719100413363

### Add bestseller information

In [44]:
data = pd.merge(
    data,
    bestsellers_previous_week[['week', 'article_id', 'bestseller_rank']],
    on=['week', 'article_id'],
    how='left'
)

In [45]:
data = data[data.week != data.week.min()]
data.bestseller_rank.fillna(999, inplace=True)

In [46]:
data = pd.merge(data, articles, on='article_id', how='left')
data = pd.merge(data, customers, on='customer_id', how='left')

In [47]:
data.sort_values(['week', 'customer_id'], inplace=True)
data.reset_index(drop=True, inplace=True)

In [48]:
data.columns

Index(['t_dat', 'customer_id', 'article_id', 'price', 'sales_channel_id',
       'spring', 'summer', 'fall', 'winter', 'week', 'purchased',
       'bestseller_rank', 'product_code', 'prod_name', 'product_type_no',
       'product_type_name', 'product_group_name', 'graphical_appearance_no',
       'graphical_appearance_name', 'colour_group_code', 'colour_group_name',
       'perceived_colour_value_id', 'perceived_colour_value_name',
       'perceived_colour_master_id', 'perceived_colour_master_name',
       'department_no', 'department_name', 'index_code', 'index_name',
       'index_group_no', 'index_group_name', 'section_no', 'section_name',
       'garment_group_no', 'garment_group_name', 'detail_desc', 'gender_score',
       'unique_customer_popularity', 'FN', 'Active', 'club_member_status',
       'fashion_news_frequency', 'age', 'postal_code', 'age_group', 'gender'],
      dtype='object')

In [49]:
train = data[data.week != test_week]
test = data[data.week==test_week].drop_duplicates(['customer_id', 'article_id', 'sales_channel_id']).copy()

In [50]:
train_baskets = train.groupby(['week', 'customer_id'])['article_id'].count().values

In [51]:
columns_to_use = ['article_id', 'product_type_no', 'graphical_appearance_no', 'colour_group_code', 'perceived_colour_value_id',
'perceived_colour_master_id', 'department_no', 'index_code',
'index_group_no', 'section_no', 'garment_group_no', 'FN', 'Active',
'club_member_status', 'fashion_news_frequency', 'age', 'postal_code', 'bestseller_rank', 'age_group', 'gender', 'gender_score', 'spring', 'summer', 'fall', 'winter', 'unique_customer_popularity']

In [52]:
%%time

train_X = train[columns_to_use]
train_y = train['purchased']

test_X = test[columns_to_use]

CPU times: total: 312 ms
Wall time: 316 ms


In [53]:
train_X.head()

Unnamed: 0,article_id,product_type_no,graphical_appearance_no,colour_group_code,perceived_colour_value_id,perceived_colour_master_id,department_no,index_code,index_group_no,section_no,...,postal_code,bestseller_rank,age_group,gender,gender_score,spring,summer,fall,winter,unique_customer_popularity
0,887770001,253,1010016,9,4,5,1510,,1,6,...,57896,999.0,20-30,-1,1.0,546,689,388,0,
1,762846001,259,1010016,10,3,9,1515,,1,11,...,57896,999.0,20-30,-1,1.0,1331,1757,1547,865,
2,829308001,273,1010016,9,4,5,8310,,26,5,...,57896,999.0,20-30,-1,0.0,0,866,647,0,
3,866731001,273,1010016,9,4,5,8310,,26,5,...,57896,1.0,20-30,-1,0.0,546,689,388,0,
4,760084003,272,1010016,9,4,5,1747,0.0,2,53,...,57896,2.0,20-30,-1,0.0,546,689,388,0,3240.0


# Model training

In [54]:
from lightgbm.sklearn import LGBMRanker

In [55]:
ranker = LGBMRanker(
    objective="lambdarank",
    metric="ndcg",
    boosting_type="dart",
    n_estimators=1,
    importance_type='gain',
    verbose=10
)

In [56]:
%%time

ranker = ranker.fit(
    train_X,
    train_y,
    group=train_baskets,
)

[LightGBM] [Debug] Dataset::GetMultiBinFromSparseFeatures: sparse rate 0.835338
[LightGBM] [Debug] Dataset::GetMultiBinFromAllFeatures: sparse rate 0.172423
[LightGBM] [Debug] init for col-wise cost 0.091350 seconds, init for row-wise cost 0.350681 seconds
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.215035 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Debug] Using Dense Multi-Val Bin
[LightGBM] [Info] Total Bins 2369
[LightGBM] [Info] Number of data points in the train set: 11162232, number of used features: 26
[LightGBM] [Debug] Trained a tree with leaves = 31 and depth = 10
CPU times: total: 24.6 s
Wall time: 5.04 s


In [57]:
for i in ranker.feature_importances_.argsort()[::-1]:
    print(columns_to_use[i], ranker.feature_importances_[i]/ranker.feature_importances_.sum())

bestseller_rank 0.9871361085107653
winter 0.003121858529387469
article_id 0.0029367596494931725
fall 0.002351930986418698
summer 0.0009292734815739098
product_type_no 0.0008133587362265103
spring 0.0006743839739739422
colour_group_code 0.0006192835772573335
department_no 0.000554091697275655
index_code 0.0004894704752742737
section_no 0.00022387365677871272
graphical_appearance_no 0.00014960672557506373
perceived_colour_value_id 0.0
perceived_colour_master_id 0.0
index_group_no 0.0
unique_customer_popularity 0.0
garment_group_no 0.0
FN 0.0
club_member_status 0.0
fashion_news_frequency 0.0
age 0.0
postal_code 0.0
age_group 0.0
gender 0.0
gender_score 0.0
Active 0.0


# Calculate predictions

In [58]:
%time

test['preds'] = ranker.predict(test_X)

c_id2predicted_article_ids = test \
    .sort_values(['customer_id', 'preds'], ascending=False) \
    .groupby('customer_id')['article_id'].apply(list).to_dict()

bestsellers_last_week = \
    bestsellers_previous_week[bestsellers_previous_week.week == bestsellers_previous_week.week.max()]['article_id'].tolist()

CPU times: total: 0 ns
Wall time: 0 ns


# Create submission

In [59]:
sub = pd.read_csv('../../data/h-and-m-personalized-fashion-recommendations/sample_submission.csv')

In [60]:
%%time
preds = []
for c_id in customer_hex_id_to_int(sub.customer_id):
    pred = c_id2predicted_article_ids.get(c_id, [])
    pred = pred + bestsellers_last_week
    preds.append(pred[:12])

CPU times: total: 2.5 s
Wall time: 2.5 s


In [61]:
preds = [' '.join(['0' + str(p) for p in ps]) for ps in preds]
sub.prediction = preds

In [62]:
sub.to_csv(f'../../data/output/submission.csv.gz', index=False)

In [63]:
import datetime

str(datetime.datetime.now())

'2023-12-22 23:50:41.888392'