# OTTO Upload
This notebook was used to upload results to the kaggle system. Depending on boolean variables load_clicks, load_carts and load_orders, it can load results for one or all of the models. In case of uploading results for a single model, slots for other model's predictions are filled by values from sample submission file, provided by competition organizers.

This notebook was also used to check predictions made at candidate generation stage. Code used to process the top20 generated candidate is kept here, but is marked as comments.
## Imports and definitions

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Transform the session column to a format, required by organizers.
def prepare_2_upload(df, type_string, prediction_column):
    df = df.reset_index()
    df['session_type'] = df['session'].apply(str)
    df['session_type'] = df['session_type'] + type_string
    df['labels'] = df[prediction_column]
    df = df[['session_type', 'labels']]
    return df

## Imports from candidate generation notebooks.

In [3]:
'''
# load candidates
df_candidates = pd.read_parquet('/kaggle/input/otto-click-candidates-validation/candidates_test_20.parquet')
df_candidates = df_candidates.explode('click_predictions').reset_index(drop=True)
df_candidates['click_predictions'] = df_candidates['click_predictions'].apply(str)
df_candidates = (df_candidates.groupby('session').agg({'click_predictions': lambda x: " ".join(x)}))
df_candidates = prepare_2_upload(df_candidates, '_clicks', 'click_predictions')
'''

'\n# load candidates\ndf_candidates = pd.read_parquet(\'/kaggle/input/otto-click-candidates-validation/candidates_test_20.parquet\')\ndf_candidates = df_candidates.explode(\'click_predictions\').reset_index(drop=True)\ndf_candidates[\'click_predictions\'] = df_candidates[\'click_predictions\'].apply(str)\ndf_candidates = (df_candidates.groupby(\'session\').agg({\'click_predictions\': lambda x: " ".join(x)}))\ndf_candidates = prepare_2_upload(df_candidates, \'_clicks\', \'click_predictions\')\n'

In [4]:
'''
#cart-candidates
df_candidates = pd.read_parquet('/kaggle/input/otto-validate-candidates-carts/candidates_cart_test20.parquet')
df_candidates = df_candidates.explode('cart_predictions').reset_index(drop=True)
df_candidates['cart_predictions'] = df_candidates['cart_predictions'].apply(str)
df_candidates = (df_candidates.groupby('session').agg({'cart_predictions': lambda x: " ".join(x)}))
df_candidates = prepare_2_upload(df_candidates, '_carts', 'cart_predictions')
'''

'\n#cart-candidates\ndf_candidates = pd.read_parquet(\'/kaggle/input/otto-validate-candidates-carts/candidates_cart_test20.parquet\')\ndf_candidates = df_candidates.explode(\'cart_predictions\').reset_index(drop=True)\ndf_candidates[\'cart_predictions\'] = df_candidates[\'cart_predictions\'].apply(str)\ndf_candidates = (df_candidates.groupby(\'session\').agg({\'cart_predictions\': lambda x: " ".join(x)}))\ndf_candidates = prepare_2_upload(df_candidates, \'_carts\', \'cart_predictions\')\n'

In [5]:
'''
#order-candidates
df_candidates = pd.read_parquet('/kaggle/input/otto-validate-candidates-orders/candidates_cart_test20.parquet')
df_candidates = df_candidates.explode('order_predictions').reset_index(drop=True)
df_candidates['order_predictions'] = df_candidates['order_predictions'].apply(str)
df_candidates = (df_candidates.groupby('session').agg({'order_predictions': lambda x: " ".join(x)}))
df_candidates = prepare_2_upload(df_candidates, '_orders', 'order_predictions')
'''


'\n#order-candidates\ndf_candidates = pd.read_parquet(\'/kaggle/input/otto-validate-candidates-orders/candidates_cart_test20.parquet\')\ndf_candidates = df_candidates.explode(\'order_predictions\').reset_index(drop=True)\ndf_candidates[\'order_predictions\'] = df_candidates[\'order_predictions\'].apply(str)\ndf_candidates = (df_candidates.groupby(\'session\').agg({\'order_predictions\': lambda x: " ".join(x)}))\ndf_candidates = prepare_2_upload(df_candidates, \'_orders\', \'order_predictions\')\n'

## Import predictions made by reranking models.

In [6]:
# Load clicks model predictions.
load_clicks = True

if load_clicks:
    df_candidates_clicks = pd.read_parquet('/kaggle/input/otto-model-clicks/click_predictions.parquet')
    df_candidates_clicks = prepare_2_upload(df_candidates_clicks, '_clicks', 'click_predictions')


In [7]:
# Load carts model predictions.
load_carts = True

if load_carts:
    df_candidates_carts = pd.read_parquet('/kaggle/input/otto-model-carts-predict/gbdt_predictions.parquet')
    df_candidates_carts = prepare_2_upload(df_candidates_carts, '_carts', 'cart_predictions')

In [8]:
# Load orders model predictions.
load_orders = True

if load_orders:
    df_candidates_orders = pd.read_parquet('/kaggle/input/otto-orders-combine/gbdt_predictions_from_both_cvs.parquet')
    df_candidates_orders = prepare_2_upload(df_candidates_orders, '_orders', 'order_predictions')

## Combine and export the predictions.

In [9]:
# Combine predictions from different models and sample submission file into a single dataframe.
df_sample = pd.read_csv('/kaggle/input/otto-recommender-system/sample_submission.csv')
if load_clicks:
    df_sample = pd.merge(df_sample, df_candidates_clicks, how='left', on = 'session_type')
if load_carts:
    df_sample = pd.merge(df_sample, df_candidates_carts, how='left', on = 'session_type')
if load_orders:
    df_sample = pd.merge(df_sample, df_candidates_orders, how='left', on = 'session_type')

  


In [10]:
if len(df_sample.columns) == 3:
    df_sample['labels_y'] = df_sample['labels_y'].fillna(df_sample['labels_x'])
    df_sample['labels'] = df_sample['labels_y']
if len(df_sample.columns) == 5:
    df_sample['labels'] = np.nan
    df_sample['labels'] = df_sample['labels'].fillna(df_sample.iloc[:,2])
    df_sample['labels'] = df_sample['labels'].fillna(df_sample.iloc[:,3])
    df_sample['labels'] = df_sample['labels'].fillna(df_sample.iloc[:,4])
    
df_sample = df_sample[['session_type', 'labels']]

In [11]:
df_sample.to_csv('submission.csv', index=False)