In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import os
from matplotlib import pyplot as plt
import sys
import time

import random

sys.path.append('/home/juravlik/PycharmProjects/kaggle_hnm_recsys/')

from lightgbm import LGBMRanker, LGBMClassifier

from scripts.utils import combine_train_sets_and_labels
from scripts.metrics.mapk import mapk

from scripts.utils import create_one_hot_encoding

pd.set_option('display.max_columns', 500)

import warnings
warnings.filterwarnings("ignore")

In [2]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [3]:
path_to_set = '/home/juravlik/PycharmProjects/kaggle_hnm_recsys/data/train_set_and_labels/test_4/'
selected_customers = None #list(pd.read_parquet('/home/juravlik/PycharmProjects/kaggle_hnm_recsys/data/train_set_and_labels/test_2/week_0__part_0.parquet')['customer_id'].unique())
weeks_for_train = [1,2,3,4,5,6,7]

# sample_weight_dict = {1: 1, 2: 0.7, 3: 0.7, 4: 0.7, 5: 0.4, 6: 0.4, 7: 0.3}



In [4]:
for i in weeks_for_train:
    if i == weeks_for_train[0]:
        df_train = pd.read_parquet(os.path.join(path_to_set, 'week_{}.parquet'.format(i)))
    else:
        df_train = pd.concat([df_train,
                              pd.read_parquet(os.path.join(path_to_set, 'week_{}.parquet'.format(i)))],
                              ignore_index=True)
    
    if selected_customers:
        df_train = df_train[df_train['customer_id'].isin(selected_customers)]

In [5]:
df_train = reduce_mem_usage(df_train)

Memory usage of dataframe is 5679.92 MB
Memory usage after optimization is: 4197.48 MB
Decreased by 26.1%


In [6]:
df_train

Unnamed: 0,customer_id,article_id,weeks_before_sub,score_ARulesRecommender,score_GruRecommender,score_ItemsPurchasedTogetherRecommender,score_KaggleCustomerAgeRecommender,score_KaggleExponentialDecayRecommender,score_KaggleTrendingRecommender,score_KMeansRecommender,score_LastPurchasesPopularity,score_LightFMRecommender,score_PopularByGroupsRecommender,score_SVDRecommender,colour_Beige,colour_Black,colour_Blue,colour_Bluish Green,colour_Brown,colour_Green,colour_Grey,colour_Khaki green,colour_Lilac Purple,colour_Metal,colour_Mole,colour_Orange,colour_Pink,colour_Red,colour_Turquoise,colour_Unknown,colour_White,colour_Yellow,colour_Yellowish Green,colour_undefined,product_Accessories,product_Bags,product_Cosmetic,product_Fun,product_Furniture,product_Garment Full body,product_Garment Lower body,product_Garment Upper body,product_Garment and Shoe care,product_Interior textile,product_Items,product_Nightwear,product_Shoes,product_Socks & Tights,product_Stationery,product_Swimwear,product_Underwear,product_Underwear/nightwear,product_Unknown,product_name_Accessories,product_name_Bags,product_name_Cosmetic,product_name_Fun,product_name_Furniture,product_name_Garment Full body,product_name_Garment Lower body,product_name_Garment Upper body,product_name_Garment and Shoe care,product_name_Interior textile,product_name_Items,product_name_Nightwear,product_name_Shoes,product_name_Socks & Tights,product_name_Stationery,product_name_Swimwear,product_name_Underwear,product_name_Underwear/nightwear,product_name_Unknown,article__frequency_purchases,article__mean_sales_channel_id,article__num_days_from_first_purchase,article__num_days_from_last_purchase,article__num_purchased_customers,article__num_unique_purchased_customers,article__num_purchased_customers_last90days,article__num_unique_purchased_customers_last90days,article__num_purchased_customers_last30days,article__num_unique_purchased_customers_last30days,article__num_purchased_customers_last7days,article__num_unique_purchased_customers_last7days,article__num_purchased_customers_last1days,article__num_unique_purchased_customers_last1days,article__unique_ratio,article__unique_ratio_last30days,article__unique_ratio_last7days,article__unique_ratio_last1days,article__mean_price,article__last_price,article__last_price_ratio,age,club_member_status_ACTIVE,club_member_status_LEFT CLUB,club_member_status_PRE-CREATE,fashion_news_frequency_Monthly,fashion_news_frequency_Regularly,sex_Woman,sex_Man,sex_Divided,have_children,sport_person,customer__mean_price,customer__mean_sales_channel_id,customer__num_days_from_first_purchase,customer__num_days_from_last_purchase,customer__num_purchased_articles,customer__num_unique_purchased_articles,customer__num_purchased_articles_last90days,customer__num_unique_purchased_articles_last90days,customer__num_purchased_articles_last30days,customer__num_unique_purchased_articles_last30days,customer__num_purchased_articles_last7days,customer__num_unique_purchased_articles_last7days,customer__unique_ratio,customer__unique_ratio_last30days,customer__unique_ratio_last7days,customer_article__num_days_from_last_purchase,customer_article__num_purchased,customer_article__num_purchased_last90days,customer_article__num_purchased_last30days,customer_article__num_purchased_last7days,label
0,1123150,90046,1,5348.0,,1.000000,1.000000,1.000000,1.000000,157798.687500,1.000000,,,0.948242,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.125000,2.000000,6.0,0.0,48.0,39.0,48.0,39.0,48.0,39.0,48.0,39.0,0.0,0.0,0.812500,0.812500,0.812500,0.000000,0.013359,0.013542,1.013672,20.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.024094,1.958008,725.0,4.0,424.0,360.0,41.0,34.0,5.0,4.0,5.0,4.0,0.849121,0.799805,0.799805,4,2,2,2,2,0.0
1,1236614,90046,1,5348.0,,0.625000,0.625000,0.750000,0.111084,54385.339844,0.142822,,,0.964355,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.125000,2.000000,6.0,0.0,48.0,39.0,48.0,39.0,48.0,39.0,48.0,39.0,0.0,0.0,0.812500,0.812500,0.812500,0.000000,0.013359,0.013542,1.013672,31.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.027069,1.420898,726.0,5.0,88.0,80.0,29.0,27.0,14.0,12.0,14.0,12.0,0.909180,0.856934,0.856934,6,1,1,1,1,0.0
2,15553,90046,1,5348.0,,0.958496,0.958496,0.958496,0.500000,146809.609375,0.500000,,,0.494141,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.125000,2.000000,6.0,0.0,48.0,39.0,48.0,39.0,48.0,39.0,48.0,39.0,0.0,0.0,0.812500,0.812500,0.812500,0.000000,0.013359,0.013542,1.013672,19.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.024277,2.000000,308.0,1.0,18.0,18.0,5.0,5.0,2.0,2.0,2.0,2.0,1.000000,1.000000,1.000000,1,1,1,1,1,0.0
3,1236614,89029,1,5348.0,,0.666504,0.666504,0.916504,0.083313,54385.339844,0.333252,,,0.745117,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.103455,2.000000,6.0,0.0,58.0,53.0,58.0,53.0,58.0,53.0,58.0,53.0,0.0,0.0,0.913574,0.913574,0.913574,0.000000,0.025085,0.024948,0.994629,31.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.027069,1.420898,726.0,5.0,88.0,80.0,29.0,27.0,14.0,12.0,14.0,12.0,0.909180,0.856934,0.856934,6,1,1,1,1,0.0
4,1141205,89029,1,5348.0,,0.750000,0.750000,0.916504,0.142822,78899.343750,0.333252,,,0.895020,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.103455,2.000000,6.0,0.0,58.0,53.0,58.0,53.0,58.0,53.0,58.0,53.0,0.0,0.0,0.913574,0.913574,0.913574,0.000000,0.025085,0.024948,0.994629,31.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.020935,1.395508,697.0,0.0,124.0,103.0,26.0,23.0,15.0,14.0,8.0,7.0,0.830566,0.933105,0.875000,4,1,1,1,1,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17465773,1305785,94233,7,,,,,,,,,,,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.720215,1.984375,180.0,5.0,250.0,216.0,86.0,76.0,15.0,12.0,2.0,2.0,0.0,0.0,0.863770,0.799805,1.000000,0.000000,0.059601,0.020325,0.341064,49.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.068115,1.911133,679.0,22.0,112.0,68.0,14.0,10.0,1.0,1.0,0.0,0.0,0.606934,1.000000,0.000000,-1,0,0,0,0,0.0
17465774,246834,92458,7,,,,,,,,,,,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.529297,1.676758,240.0,7.0,68.0,67.0,7.0,7.0,1.0,1.0,1.0,1.0,1.0,1.0,0.985352,1.000000,1.000000,1.000000,0.015137,0.006084,0.401855,21.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.026154,1.000000,80.0,30.0,16.0,16.0,16.0,16.0,1.0,1.0,0.0,0.0,1.000000,1.000000,0.000000,-1,0,0,0,0,0.0
17465775,62461,81866,7,,,,,,,,,,,0.000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,4.640625,2.000000,181.0,31.0,39.0,36.0,15.0,14.0,0.0,0.0,0.0,0.0,0.0,0.0,0.922852,0.000000,0.000000,0.000000,0.024536,0.025406,1.036133,41.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.024261,1.386719,575.0,37.0,62.0,51.0,10.0,9.0,0.0,0.0,0.0,0.0,0.822754,0.000000,0.000000,-1,0,0,0,0,0.0
17465776,686917,71931,7,,,,,,,,,,,0.000000,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,11.351562,2.000000,386.0,5.0,34.0,26.0,5.0,5.0,3.0,3.0,1.0,1.0,0.0,0.0,0.764648,1.000000,1.000000,0.000000,0.055847,0.030487,0.545898,29.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.034302,1.793945,417.0,22.0,34.0,29.0,10.0,10.0,1.0,1.0,0.0,0.0,0.853027,1.000000,0.000000,-1,0,0,0,0,0.0


In [7]:
# sample_weights = df_train['weeks_before_sub'].apply(lambda x: sample_weight_dict[x])

In [8]:
# df_train.fillna(-1000, inplace=True)

In [9]:
features = list(df_train.columns)
features.remove('article_id')
features.remove('customer_id')
features.remove('weeks_before_sub')
features.remove('label')

In [10]:
ranker = LGBMRanker(
    objective="lambdarank",
    metric="ndcg",
    eval_at=12,
    boosting_type="gbdt", #gbdt #goss
    
    force_col_wise=True,
    num_leaves=40, #31 #40
    max_depth=-1, #10 #-1
    learning_rate=0.1, #0.1
    
#     reg_lambda=0.1, #0 #0.3
    
    importance_type='split',
    
    min_split_gain=0, #0 #0.7
    
    colsample_bytree=1, #1
    
    n_estimators=200, #100 #150
    
    random_state=42,
    verbose=1
)

In [11]:
ranker = ranker.fit(
    X=df_train[features],
    y=df_train[['label']],
#     sample_weight=sample_weights,
    group=df_train.groupby(['customer_id'])['article_id'].count().values
)

[LightGBM] [Info] Total Bins 9676
[LightGBM] [Info] Number of data points in the train set: 17465778, number of used features: 110


In [12]:
del df_train

# Make predictions for submition data

In [13]:
path_to_sub = '/home/juravlik/PycharmProjects/kaggle_hnm_recsys/data/train_set_and_labels/test_4/week_0__part_{}.parquet'
path_to_dest = '/home/juravlik/PycharmProjects/kaggle_hnm_recsys/data/ranker_predicts/test_4'

for i in range(10):
    df_sub = pd.read_parquet(path_to_sub.format(i))
    df_sub = reduce_mem_usage(df_sub)
    
    df_sub['predict'] = ranker.predict(
        df_sub[features]
    )
    
    df_sub = df_sub.sort_values(['customer_id', 'predict'], ascending=False).groupby('customer_id').head(12)
    
    df_sub[['customer_id', 'article_id']].to_parquet(os.path.join(path_to_dest, 'week_0__part_{}.parquet'.format(i)),
                                                    index=False)

Memory usage of dataframe is 1251.91 MB
Memory usage after optimization is: 1093.28 MB
Decreased by 12.7%
Memory usage of dataframe is 1251.95 MB
Memory usage after optimization is: 1093.32 MB
Decreased by 12.7%
Memory usage of dataframe is 1251.47 MB
Memory usage after optimization is: 1097.18 MB
Decreased by 12.3%
Memory usage of dataframe is 1252.18 MB
Memory usage after optimization is: 1097.80 MB
Decreased by 12.3%
Memory usage of dataframe is 1251.85 MB
Memory usage after optimization is: 1097.51 MB
Decreased by 12.3%
Memory usage of dataframe is 1251.82 MB
Memory usage after optimization is: 1093.20 MB
Decreased by 12.7%
Memory usage of dataframe is 1251.99 MB
Memory usage after optimization is: 1093.35 MB
Decreased by 12.7%
Memory usage of dataframe is 1250.70 MB
Memory usage after optimization is: 1096.51 MB
Decreased by 12.3%
Memory usage of dataframe is 1252.06 MB
Memory usage after optimization is: 1093.41 MB
Decreased by 12.7%
Memory usage of dataframe is 1251.62 MB
Memory

## concat all predictions

In [14]:
int_article_id = pd.read_pickle('../../data/compressed_dataset/int_article_id.pickle')
int_customer_id = pd.read_pickle('../../data/compressed_dataset/int_customer_id.pickle')

In [15]:
def predict_to_submit(
        predict: pd.DataFrame,
        int_article_id: dict,
        int_customer_id: dict,
):
    predict['article_id'] = predict['article_id'].apply(lambda x: int_article_id[x])
    predict['customer_id'] = predict['customer_id'].apply(lambda x: int_customer_id[x])
    predict['article_id'] = predict['article_id'].astype(str)
    predict['prediction'] = predict.groupby(['customer_id'])['article_id'].transform(lambda x: ' '.join(x))
    predict.drop_duplicates(subset=['customer_id'], inplace=True)
    predict.reset_index(inplace=True, drop=True)
    predict['prediction'] = predict['prediction'].str[:131]
    predict = predict[['customer_id', 'prediction']]

    return predict

In [16]:
path_to_preds = '/home/juravlik/PycharmProjects/kaggle_hnm_recsys/data/ranker_predicts/test_4/week_0__part_{}.parquet'
path_to_dest = '/home/juravlik/PycharmProjects/kaggle_hnm_recsys/data/ranker_predicts/test_4/final_predictions.csv'

for i in range(10):
    print(i)
    df = pd.read_parquet(path_to_preds.format(i))

    df = predict_to_submit(df, int_article_id, int_customer_id)
    
    if i == 0:
        df_all = df
    else:
        df_all = pd.concat([df_all, df], ignore_index=True)

0
1
2
3
4
5
6
7
8
9


In [17]:
df_all

Unnamed: 0,customer_id,prediction
0,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,0706016002 0762846031 0781613006 0924243001 07...
1,ffffbbf78b6eaac697a8a5dfbfd2bfa8113ee5b403e474...,0924243001 0918522001 0822311009 0611415001 08...
2,fffed13f041cc91f74dc6ec4e3267d7bb71bc833d53b7d...,0924243001 0918522001 0909370001 0915529005 05...
3,fffec3dbcc87c78300f37f62cfca4274f1ea6ef59dba10...,0768912001 0372860001 0579541001 0372860002 07...
4,fffeb8c2b8b5541865aa6ecd68d65dbe3f17dc93e2776d...,0448509014 0706016001 0706016003 0706016002 05...
...,...,...
1371975,00048f2f68760664d2d0fa1e7fbfe083f05287f342484c...,0685814063 0685814001 0685813001 0685813003 07...
1371976,000362878a3904e1fe4927bbfcdb10c64a9d85b12a593a...,0685814003 0685813001 0685814001 0685814063 06...
1371977,000172a9c322560c849754ffbdfdb2180d408aa7176b94...,0685814001 0764488006 0685814003 0564309022 05...
1371978,0000d6c053fc8f9389d4565051f12402d5774aa4a9d2e5...,0448509014 0673677002 0751471001 0579541001 05...


In [18]:
df_all.to_csv(path_to_dest,
             index=False)

In [19]:
df = pd.read_csv(path_to_dest)
df

Unnamed: 0,customer_id,prediction
0,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,0706016002 0762846031 0781613006 0924243001 07...
1,ffffbbf78b6eaac697a8a5dfbfd2bfa8113ee5b403e474...,0924243001 0918522001 0822311009 0611415001 08...
2,fffed13f041cc91f74dc6ec4e3267d7bb71bc833d53b7d...,0924243001 0918522001 0909370001 0915529005 05...
3,fffec3dbcc87c78300f37f62cfca4274f1ea6ef59dba10...,0768912001 0372860001 0579541001 0372860002 07...
4,fffeb8c2b8b5541865aa6ecd68d65dbe3f17dc93e2776d...,0448509014 0706016001 0706016003 0706016002 05...
...,...,...
1371975,00048f2f68760664d2d0fa1e7fbfe083f05287f342484c...,0685814063 0685814001 0685813001 0685813003 07...
1371976,000362878a3904e1fe4927bbfcdb10c64a9d85b12a593a...,0685814003 0685813001 0685814001 0685814063 06...
1371977,000172a9c322560c849754ffbdfdb2180d408aa7176b94...,0685814001 0764488006 0685814003 0564309022 05...
1371978,0000d6c053fc8f9389d4565051f12402d5774aa4a9d2e5...,0448509014 0673677002 0751471001 0579541001 05...


In [20]:
df2 = pd.read_csv('/home/juravlik/PycharmProjects/kaggle_hnm_recsys/data/ranker_predicts/test_1/final_predictions.csv')
df2

Unnamed: 0,customer_id,prediction
0,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,0706016002 0762846031 0762846027 0884081001 06...
1,ffffbbf78b6eaac697a8a5dfbfd2bfa8113ee5b403e474...,0557599022 0611415001 0924243001 0918522001 07...
2,fffed13f041cc91f74dc6ec4e3267d7bb71bc833d53b7d...,0909370001 0923758001 0918522001 0924243002 09...
3,fffec3dbcc87c78300f37f62cfca4274f1ea6ef59dba10...,0782734005 0579541001 0866731001 0372860002 07...
4,fffeb8c2b8b5541865aa6ecd68d65dbe3f17dc93e2776d...,0715624001 0448509014 0751471001 0573085028 07...
...,...,...
1371975,00048f2f68760664d2d0fa1e7fbfe083f05287f342484c...,0685814001 0685814063 0685813003 0783346001 06...
1371976,000362878a3904e1fe4927bbfcdb10c64a9d85b12a593a...,0685814001 0685813003 0685814003 0715624001 06...
1371977,000172a9c322560c849754ffbdfdb2180d408aa7176b94...,0685814001 0764488006 0685814003 0564309022 06...
1371978,0000d6c053fc8f9389d4565051f12402d5774aa4a9d2e5...,0751471001 0762846006 0715624001 0706016001 09...
