In [1]:
import pickle
import pandas as pd
import numpy as np
from functools import lru_cache

In [None]:
!aicrowd login

In [2]:
task = 'task2'

In [3]:
# Cache loading of data for multiple calls

@lru_cache(maxsize=1)
def read_product_data():
    return pd.read_csv('../data/raw-data/products_train.csv')

@lru_cache(maxsize=1)
def read_train_data():
    return pd.read_csv('../data/raw-data/sessions_train.csv')

@lru_cache(maxsize=3)
def read_test_data(task):
    return pd.read_csv(f'../data/raw-data/sessions_test_{task}.csv')

In [4]:
train_sessions = read_train_data()
train_sessions.sample(5)

Unnamed: 0,prev_items,next_item,locale
3567470,['B07F1R2JXD' 'B07F1RRQXQ' 'B07F1LKYFS' 'B07F1...,B0B12CNZ93,IT
2373311,['B0050O7O2S' 'B0746RGDQT' 'B06W57X25S' 'B0842...,B000ICLLPS,UK
3008028,['B00QH7SYSA' 'B08TPTN1B9' 'B09NMQQD57' 'B08TP...,B00QH7T20Y,UK
2629488,['B098JN6LJ6' 'B09D77BQVX' 'B08PF4QV45' 'B09VN...,B07LH5LZF6,UK
2926504,['B07JDSHD4Z' 'B07JG9QZ2B' 'B08K8YTC6B' 'B07XY...,B09V2KN837,UK


In [5]:
test_sessions = read_test_data(task)
test_sessions.sample(5)

Unnamed: 0,prev_items,locale
13644,['B08N5J3W2Y' 'B08D4MK3Z9'],FR
22872,['B07T93CQ51' 'B07DWRCL44' 'B07SN57178'],IT
9666,['B09BP6XRMC' 'B09BP6XRMC' 'B08WX2M9VR' 'B0984...,FR
26137,['B093BTYCRY' 'B08P95B5FR' 'B07YPZ27WT' 'B09NQ...,IT
1505,['B08PL893K8' 'B08PL4426W' 'B08PL6WB1V' 'B077T...,ES


In [7]:
locale = []
result = []

In [10]:
LOCALE = "IT" #change locale name in order for another locale recommendations
with open(f"../SR-GNN/test-result/{LOCALE}-recs.pkl", "rb") as f:
    recs = pickle.load(f)

for l in recs:
    result.append(l)

for i in range(len(recs)):
    locale.append(LOCALE)

In [11]:
predictions = pd.DataFrame(list(zip(locale, result)),
               columns =['locale', 'next_item_prediction'])
predictions

Unnamed: 0,locale,next_item_prediction
0,ES,"[B08CB4Q2YT, B09NQGVSPD, B01LQQQWG2, B07SP4WQV..."
1,ES,"[B08CB4Q2YT, B09NQGVSPD, B01LQQQWG2, B085VKKV7..."
2,ES,"[B08CB4Q2YT, B09NQGVSPD, B01LQQQWG2, B07SP4WQV..."
3,ES,"[B08CB4Q2YT, B09NQGVSPD, B01LQQQWG2, B07SP4WQV..."
4,ES,"[B08CB4Q2YT, B09NQGVSPD, B01LQQQWG2, B00CBE2LY..."
...,...,...
13987,FR,"[B0774TPFL5, B08YYQ57DV, B01ELH6ADI, B07CRNJTB..."
13988,FR,"[B0774TPFL5, B08YYQ57DV, B01ELH6ADI, B07CRNJTB..."
13989,FR,"[B0774TPFL5, B08YYQ57DV, B01ELH6ADI, B07CRNJTB..."
13990,FR,"[B0774TPFL5, B08YYQ57DV, B01ELH6ADI, B07CRNJTB..."


In [12]:
def check_predictions(predictions, check_products=False):
    """
    These tests need to pass as they will also be applied on the evaluator
    """
    test_locale_names = test_sessions['locale'].unique()
    for locale in test_locale_names:
        sess_test = test_sessions.query(f'locale == "{locale}"')
        preds_locale =  predictions[predictions['locale'] == sess_test['locale'].iloc[0]]
        assert sorted(preds_locale.index.values) == sorted(sess_test.index.values), f"Session ids of {locale} doesn't match"

        if check_products:
            # This check is not done on the evaluator
            # but you can run it to verify there is no mixing of products between locales
            # Since the ground truth next item will always belong to the same locale
            # Warning - This can be slow to run
            products = read_product_data().query(f'locale == "{locale}"')
            predicted_products = np.unique( np.array(list(preds_locale["next_item_prediction"].values)) )
            assert np.all( np.isin(predicted_products, products['id']) ), f"Invalid products in {locale} predictions"

In [13]:
check_predictions(predictions, True)

AssertionError: Invalid products in ES predictions

In [None]:
# Its important that the parquet file you submit is saved with pyarrow backend
predictions.to_parquet(f'submission_{task}.parquet', engine='pyarrow')

## Submit to AIcrowd ðŸš€

In [None]:
# You can submit with aicrowd-cli, or upload manually on the challenge page.
!aicrowd submission create -c task-2-next-product-recommendation-for-underrepresented-languages -f "submission_task2.parquet"