In [1]:
!pip install recbole

Collecting recbole
  Downloading recbole-1.0.1-py3-none-any.whl (2.0 MB)
     |████████████████████████████████| 2.0 MB 218 kB/s            
[?25hCollecting scipy==1.6.0
  Downloading scipy-1.6.0-cp37-cp37m-manylinux1_x86_64.whl (27.4 MB)
     |████████████████████████████████| 27.4 MB 16.2 MB/s            
Collecting colorlog==4.7.2
  Downloading colorlog-4.7.2-py2.py3-none-any.whl (10 kB)
Installing collected packages: scipy, colorlog, recbole
  Attempting uninstall: scipy
    Found existing installation: scipy 1.7.3
    Uninstalling scipy-1.7.3:
      Successfully uninstalled scipy-1.7.3
  Attempting uninstall: colorlog
    Found existing installation: colorlog 6.6.0
    Uninstalling colorlog-6.6.0:
      Successfully uninstalled colorlog-6.6.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
pymc3 3.11.5 requires scipy<1.8.0,>=1.7.3, 

## Importing Libraries

In [2]:
import os
import gc
import numpy as np
import pandas as pd

In [3]:
data_path = r'../input/h-and-m-personalized-fashion-recommendations/transactions_train.csv'
customer_data_path = r'../input/h-and-m-personalized-fashion-recommendations/customers.csv'
article_data_path = r'../input/h-and-m-personalized-fashion-recommendations/articles.csv'
submission_data_path = r'../input/h-m-ensembling/submission.csv'

In [4]:
!mkdir /kaggle/working/recbole_data
recbole_data_path = r'/kaggle/working/recbole_data'

In [5]:
# Data Extraction
def create_data(datapath, data_type=None):
    if data_type is None:
        df = pd.read_csv(datapath)
    elif data_type == 'transaction':
        df = pd.read_csv(datapath, dtype={'article_id': str}, parse_dates=['t_dat'])
    elif data_type == 'article':
        df = pd.read_csv(datapath, dtype={'article_id': str})
    return df

## Reading Transaction data

In [6]:
%%time

# Load all sales data (for 3 years starting from 2018 to 2020)
# ALso, article_id is treated as a string column otherwise it 
# would drop the leading zeros while reading the specific column values
transactions_data=create_data(data_path, data_type='transaction')
print(transactions_data.shape)

# # Unique Attributes
print(str(len(transactions_data['t_dat'].drop_duplicates())) + "-total No of unique transactions dates in data sheet")
print(str(len(transactions_data['customer_id'].drop_duplicates())) + "-total No of unique customers ids in data sheet")
print(str(len(transactions_data['article_id'].drop_duplicates())) + "-total No of unique article ids courses names in data sheet")
print(str(len(transactions_data['sales_channel_id'].drop_duplicates())) + "-total No of unique sales channels in data sheet")

transactions_data.head()

(31788324, 5)
734-total No of unique transactions dates in data sheet
1362281-total No of unique customers ids in data sheet
104547-total No of unique article ids courses names in data sheet
2-total No of unique sales channels in data sheet
CPU times: user 55.3 s, sys: 4.24 s, total: 59.5 s
Wall time: 1min 20s


Unnamed: 0,t_dat,customer_id,article_id,price,sales_channel_id
0,2018-09-20,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,663713001,0.050831,2
1,2018-09-20,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,541518023,0.030492,2
2,2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,505221004,0.015237,2
3,2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687003,0.016932,2
4,2018-09-20,00007d2de826758b65a93dd24ce629ed66842531df6699...,685687004,0.016932,2


## Postprocessing Transaction data  

1. timestamp column is created from transaction dates column  
2. columns are renamed for easy reading

In [7]:
transactions_data['timestamp'] = transactions_data.t_dat.values.astype(np.int64)// 10 ** 9
transactions_data = transactions_data[transactions_data['timestamp'] > 1585620000]
transactions_data = transactions_data[['customer_id','article_id','timestamp']].rename(columns={'customer_id': 'user_id:token', 
                                                                                                'article_id': 'item_id:token', 
                                                                                                'timestamp': 'timestamp:float'})
transactions_data

Unnamed: 0,user_id:token,item_id:token,timestamp:float
23934157,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,0727808001,1585699200
23934158,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,0727808007,1585699200
23934159,000563485cbb7850b0a93c6606f89c5b961c6647d1bd48...,0567532015,1585699200
23934160,000563485cbb7850b0a93c6606f89c5b961c6647d1bd48...,0706104009,1585699200
23934161,00083cda041544b2fbb0e0d2905ad17da7cf1007526fb4...,0783504004,1585699200
...,...,...,...
31788319,fff2282977442e327b45d8c89afde25617d00124d0f999...,0929511001,1600732800
31788320,fff2282977442e327b45d8c89afde25617d00124d0f999...,0891322004,1600732800
31788321,fff380805474b287b05cb2a7507b9a013482f7dd0bce0e...,0918325001,1600732800
31788322,fff4d3a8b1f3b60af93e78c30a7cb4cf75edaf2590d3e5...,0833459002,1600732800


## Saving transaction data to kaggle based recbole output directory

In [8]:
transactions_data.to_csv(os.path.join(recbole_data_path, 'recbole_data.inter'), index=False, sep='\t')
del [[transactions_data]]
gc.collect()

18

## Reading Article data

In [9]:
%%time

# Load all Customers
article_data=create_data(article_data_path, data_type='article')
print(article_data.shape)

print(str(len(article_data['article_id'].drop_duplicates())) + "-total No of unique article ids in article data sheet")

article_data.head()

(105542, 25)
105542-total No of unique article ids in article data sheet
CPU times: user 716 ms, sys: 43.6 ms, total: 760 ms
Wall time: 1.07 s


Unnamed: 0,article_id,product_code,prod_name,product_type_no,product_type_name,product_group_name,graphical_appearance_no,graphical_appearance_name,colour_group_code,colour_group_name,...,department_name,index_code,index_name,index_group_no,index_group_name,section_no,section_name,garment_group_no,garment_group_name,detail_desc
0,108775015,108775,Strap top,253,Vest top,Garment Upper body,1010016,Solid,9,Black,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
1,108775044,108775,Strap top,253,Vest top,Garment Upper body,1010016,Solid,10,White,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
2,108775051,108775,Strap top (1),253,Vest top,Garment Upper body,1010017,Stripe,11,Off White,...,Jersey Basic,A,Ladieswear,1,Ladieswear,16,Womens Everyday Basics,1002,Jersey Basic,Jersey top with narrow shoulder straps.
3,110065001,110065,OP T-shirt (Idro),306,Bra,Underwear,1010016,Solid,9,Black,...,Clean Lingerie,B,Lingeries/Tights,1,Ladieswear,61,Womens Lingerie,1017,"Under-, Nightwear","Microfibre T-shirt bra with underwired, moulde..."
4,110065002,110065,OP T-shirt (Idro),306,Bra,Underwear,1010016,Solid,10,White,...,Clean Lingerie,B,Lingeries/Tights,1,Ladieswear,61,Womens Lingerie,1017,"Under-, Nightwear","Microfibre T-shirt bra with underwired, moulde..."


## Postprocessing Article data  

1. drop duplicate columns to avoid multicollinearity
2. columns are renamed for easy reading

In [10]:
article_data = article_data.drop(columns = ['product_type_name', 'graphical_appearance_name', 'colour_group_name', 
                                            'perceived_colour_value_name', 'perceived_colour_master_name', 'index_name', 
                                            'index_group_name', 'section_name', 'garment_group_name', 
                                            'prod_name', 'department_name', 'detail_desc'])
article_data = article_data.rename(columns = {'article_id': 'item_id:token', 
                                              'product_code': 'product_code:token', 
                                              'product_type_no': 'product_type_no:float',
                                              'product_group_name': 'product_group_name:token_seq', 
                                              'graphical_appearance_no': 'graphical_appearance_no:token', 
                                              'colour_group_code': 'colour_group_code:token', 
                                              'perceived_colour_value_id': 'perceived_colour_value_id:token', 
                                              'perceived_colour_master_id': 'perceived_colour_master_id:token', 
                                              'department_no': 'department_no:token', 
                                              'index_code': 'index_code:token', 
                                              'index_group_no': 'index_group_no:token', 
                                              'section_no': 'section_no:token', 
                                              'garment_group_no': 'garment_group_no:token'})
article_data

Unnamed: 0,item_id:token,product_code:token,product_type_no:float,product_group_name:token_seq,graphical_appearance_no:token,colour_group_code:token,perceived_colour_value_id:token,perceived_colour_master_id:token,department_no:token,index_code:token,index_group_no:token,section_no:token,garment_group_no:token
0,0108775015,108775,253,Garment Upper body,1010016,9,4,5,1676,A,1,16,1002
1,0108775044,108775,253,Garment Upper body,1010016,10,3,9,1676,A,1,16,1002
2,0108775051,108775,253,Garment Upper body,1010017,11,1,9,1676,A,1,16,1002
3,0110065001,110065,306,Underwear,1010016,9,4,5,1339,B,1,61,1017
4,0110065002,110065,306,Underwear,1010016,10,3,9,1339,B,1,61,1017
...,...,...,...,...,...,...,...,...,...,...,...,...,...
105537,0953450001,953450,302,Socks & Tights,1010014,9,4,5,7188,F,3,26,1021
105538,0953763001,953763,253,Garment Upper body,1010016,9,4,5,1919,A,1,2,1005
105539,0956217002,956217,265,Garment Full body,1010016,9,4,5,1641,A,1,18,1005
105540,0957375001,957375,72,Accessories,1010016,9,4,5,3946,D,2,52,1019


## Saving article data to kaggle based recbole output directory

In [11]:
article_data.to_csv(os.path.join(recbole_data_path, 'recbole_data.item'), index=False, sep='\t')
del [[article_data]]
gc.collect()

21

## Setting up Recbole based dataset and configurations

In [12]:
import logging
from logging import getLogger
from recbole.config import Config
from recbole.data import create_dataset, data_preparation
from recbole.model.sequential_recommender import GRU4RecF
from recbole.trainer import Trainer
from recbole.utils import init_seed, init_logger

In [13]:
parameter_dict = {
    'data_path': '/kaggle/working',
    'USER_ID_FIELD': 'user_id',
    'ITEM_ID_FIELD': 'item_id',
    'TIME_FIELD': 'timestamp',
    'user_inter_num_interval': "[40,inf)",
    'item_inter_num_interval': "[40,inf)",
    'load_col': {'inter': ['user_id', 'item_id', 'timestamp'],
                 'item': ['item_id', 'product_code', 'product_type_no', 'product_group_name', 'graphical_appearance_no',
                      'colour_group_code', 'perceived_colour_value_id', 'perceived_colour_master_id',
                      'department_no', 'index_code', 'index_group_no', 'section_no', 'garment_group_no']
             },
    'selected_features': ['product_code', 'product_type_no', 'product_group_name', 'graphical_appearance_no',
                          'colour_group_code', 'perceived_colour_value_id', 'perceived_colour_master_id',
                          'department_no', 'index_code', 'index_group_no', 'section_no', 'garment_group_no'],
    'neg_sampling': None,
    'epochs': 100,
    'eval_args': {
        'split': {'RS': [10, 0, 0]},
        'group_by': 'user',
        'order': 'TO',
        'mode': 'full'},
    'topk':[12]
}

In [14]:
config = Config(model='GRU4RecF', dataset='recbole_data', config_dict=parameter_dict)

# init random seed
init_seed(config['seed'], config['reproducibility'])

# logger initialization
init_logger(config)
logger = getLogger()
# Create handlers
c_handler = logging.StreamHandler()
c_handler.setLevel(logging.INFO)
logger.addHandler(c_handler)

# write config info into log
logger.info(config)


General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /kaggle/working/recbole_data
checkpoint_dir = saved
show_progress = True
save_dataset = False
dataset_save_path = None
save_dataloaders = False
dataloaders_save_path = None
log_wandb = False

Training Hyper Parameters:
epochs = 100
train_batch_size = 2048
learner = adam
learning_rate = 0.001
neg_sampling = None
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [10, 0, 0]}, 'group_by': 'user', 'order': 'TO', 'mode': 'full'}
repeatable = True
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [12]
valid_metric = MRR@10
valid_metric_bigger = True
eval_batch_size = 4096
metric_decimal_place = 4

Dataset Hyper Parameters:
field_separator = 	
seq_separator =  
USER_ID_FIELD = user_id
ITEM_ID_FIELD = item_id
RATING_FIELD = rating
TIME_FIELD = timestamp
seq_len = Non

In [15]:
dataset = create_dataset(config)
logger.info(dataset)

recbole_data
The number of users: 15459
Average actions of users: 59.21956268598784
The number of items: 7330
Average actions of items: 124.9032610178742
The number of inters: 915416
The sparsity of the dataset: 99.19214553975321%
Remain Fields: ['user_id', 'item_id', 'timestamp', 'product_code', 'product_type_no', 'product_group_name', 'graphical_appearance_no', 'colour_group_code', 'perceived_colour_value_id', 'perceived_colour_master_id', 'department_no', 'index_code', 'index_group_no', 'section_no', 'garment_group_no']


In [16]:
# dataset splitting
train_data, valid_data, test_data = data_preparation(config, dataset)

[Training]: train_batch_size = [2048] negative sampling: [None]
[Evaluation]: eval_batch_size = [4096] eval_args: [{'split': {'RS': [10, 0, 0]}, 'group_by': 'user', 'order': 'TO', 'mode': 'full'}]


In [17]:
# model loading and initialization
model = GRU4RecF(config, train_data.dataset).to(config['device'])
logger.info(model)

# trainer loading and initialization
trainer = Trainer(config, model)

# model training
best_valid_score, best_valid_result = trainer.fit(train_data)

GRU4RecF(
  (item_embedding): Embedding(7330, 64, padding_idx=0)
  (feature_embed_layer): FeatureSeqEmbLayer(
    (token_embedding_table): ModuleDict(
      (item): FMEmbedding(
        (embedding): Embedding(3935, 64)
      )
    )
    (float_embedding_table): ModuleDict(
      (item): Embedding(1, 64)
    )
    (token_seq_embedding_table): ModuleDict(
      (item): ModuleList(
        (0): Embedding(16, 64)
      )
    )
  )
  (item_gru_layers): GRU(64, 128, bias=False, batch_first=True)
  (feature_gru_layers): GRU(768, 128, bias=False, batch_first=True)
  (dense_layer): Linear(in_features=256, out_features=64, bias=True)
  (dropout): Dropout(p=0.3, inplace=False)
  (loss_fct): CrossEntropyLoss()
)
Trainable parameters: 1156288
epoch 0 training [time: 45.54s, train loss: 3642.3637]
Saving current: saved/GRU4RecF-Apr-29-2022_07-24-58.pth
epoch 1 training [time: 43.23s, train loss: 3390.6134]
Saving current: saved/GRU4RecF-Apr-29-2022_07-24-58.pth
epoch 2 training [time: 43.00s, train 

## Generate trained recommender based predictions 

In [18]:
from recbole.utils.case_study import full_sort_topk
external_user_ids = dataset.id2token(
    dataset.uid_field, list(range(dataset.user_num)))[1:]#fist element in array is 'PAD'(default of Recbole) ->remove it 

In [19]:
import torch
from recbole.data.interaction import Interaction

def add_last_item(old_interaction, last_item_id, max_len=50):
    new_seq_items = old_interaction['item_id_list'][-1]
    if old_interaction['item_length'][-1].item() < max_len:
        new_seq_items[old_interaction['item_length'][-1].item()] = last_item_id
    else:
        new_seq_items = torch.roll(new_seq_items, -1)
        new_seq_items[-1] = last_item_id
    return new_seq_items.view(1, len(new_seq_items))

def predict_for_all_item(external_user_id, dataset, model):
    model.eval()
    with torch.no_grad():
        uid_series = dataset.token2id(dataset.uid_field, [external_user_id])
        index = np.isin(dataset.inter_feat[dataset.uid_field].numpy(), uid_series)
        input_interaction = dataset[index]
        test = {
            'item_id_list': add_last_item(input_interaction, 
                                          input_interaction['item_id'][-1].item(), model.max_seq_length),
            'item_length': torch.tensor(
                [input_interaction['item_length'][-1].item() + 1
                 if input_interaction['item_length'][-1].item() < model.max_seq_length else model.max_seq_length])
        }
        new_inter = Interaction(test)
        new_inter = new_inter.to(config['device'])
        new_scores = model.full_sort_predict(new_inter)
        new_scores = new_scores.view(-1, test_data.dataset.item_num)
        new_scores[:, 0] = -np.inf  # set scores of [pad] to -inf
    return torch.topk(new_scores, 12)

In [20]:
topk_items = []
for external_user_id in external_user_ids:
    _, topk_iid_list = predict_for_all_item(external_user_id, dataset, model)
    last_topk_iid_list = topk_iid_list[-1]
    external_item_list = dataset.id2token(dataset.iid_field, last_topk_iid_list.cpu()).tolist()
    topk_items.append(external_item_list)
print(len(topk_items))

15458


In [21]:
external_item_str = [' '.join(x) for x in topk_items]
result = pd.DataFrame(external_user_ids, columns=['customer_id'])
result['prediction'] = external_item_str
result.head()

Unnamed: 0,customer_id,prediction
0,00d7ebd46f6a6d53630d41386b6ef6a505cdc4c80011ff...,0918522001 0898692006 0736049001 0923758001 07...
1,0109ad0b5a76924a1b58be677409bb601cc8bead9a87b8...,0901955001 0861477001 0913030001 0823168002 08...
2,013f00f9e218549246a3aa82b3f3a0c22a693bc25fa735...,0839402002 0857621001 0770336001 0839402001 08...
3,01bada2a453b09c70ea57bdda5a9ef0fb04062718d3a3d...,0914441001 0724906006 0867966009 0799365028 07...
4,01dd96059a11759518f10969d0a528f03c8501dc4c628b...,0891663002 0448509014 0817353008 0829145007 08...


In [22]:
del external_item_str
del topk_items
del external_user_ids
del train_data
del valid_data
del test_data
del model
del Trainer
del logger
del dataset
gc.collect()

42

## Reading Submission data

In [23]:
submission_data = pd.read_csv(submission_data_path)
submission_data.shape

(1371980, 2)

## Postprocessing submision data  

1. Replacing trained customer ids based prediction from recbole based predictions by performing merge  
2. Filling up Nan values for customer ids which were not part of recbole training session  
3. Generating the final prediction column  
4. Dropping redundant columns

In [24]:
submission_data = pd.merge(submission_data, result, on='customer_id', how='outer')
submission_data

Unnamed: 0,customer_id,prediction_x,prediction_y
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,0568601043 0568601006 0751471001 0448509014 09...,
1,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,0826211002 0924243001 0739590027 0800436010 09...,
2,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,0794321007 0852643001 0852643003 0858883002 09...,
3,00005ca1c9ed5f5146b52ac8639a40ca9d57aeff4d1bd2...,0448509014 0751471001 0924243001 0924243002 05...,
4,00006413d8573cd20ed7128e53b7b13819fe5cfc2d801f...,0730683050 0791587015 0924243001 0896152002 09...,
...,...,...,...
1371975,ffffbbf78b6eaac697a8a5dfbfd2bfa8113ee5b403e474...,0804992033 0713997002 0720125039 0740922009 05...,
1371976,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,0818754002 0762846031 0924243001 0759871030 06...,
1371977,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,0884081001 0762846027 0689365050 0794819001 06...,
1371978,ffffd7744cebcf3aca44ae7049d2a94b87074c3d4ffe38...,0882810001 0714790020 0924243001 0448509014 09...,


In [25]:
submission_data = submission_data.fillna(-1)
submission_data['prediction'] = submission_data.apply(
    lambda x: x['prediction_y'] if x['prediction_y'] != -1 else x['prediction_x'], axis=1)
submission_data

Unnamed: 0,customer_id,prediction_x,prediction_y,prediction
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,0568601043 0568601006 0751471001 0448509014 09...,-1,0568601043 0568601006 0751471001 0448509014 09...
1,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,0826211002 0924243001 0739590027 0800436010 09...,-1,0826211002 0924243001 0739590027 0800436010 09...
2,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,0794321007 0852643001 0852643003 0858883002 09...,-1,0794321007 0852643001 0852643003 0858883002 09...
3,00005ca1c9ed5f5146b52ac8639a40ca9d57aeff4d1bd2...,0448509014 0751471001 0924243001 0924243002 05...,-1,0448509014 0751471001 0924243001 0924243002 05...
4,00006413d8573cd20ed7128e53b7b13819fe5cfc2d801f...,0730683050 0791587015 0924243001 0896152002 09...,-1,0730683050 0791587015 0924243001 0896152002 09...
...,...,...,...,...
1371975,ffffbbf78b6eaac697a8a5dfbfd2bfa8113ee5b403e474...,0804992033 0713997002 0720125039 0740922009 05...,-1,0804992033 0713997002 0720125039 0740922009 05...
1371976,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,0818754002 0762846031 0924243001 0759871030 06...,-1,0818754002 0762846031 0924243001 0759871030 06...
1371977,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,0884081001 0762846027 0689365050 0794819001 06...,-1,0884081001 0762846027 0689365050 0794819001 06...
1371978,ffffd7744cebcf3aca44ae7049d2a94b87074c3d4ffe38...,0882810001 0714790020 0924243001 0448509014 09...,-1,0882810001 0714790020 0924243001 0448509014 09...


In [26]:
submission_data = submission_data.drop(columns=['prediction_y', 'prediction_x'])
submission_data

Unnamed: 0,customer_id,prediction
0,00000dbacae5abe5e23885899a1fa44253a17956c6d1c3...,0568601043 0568601006 0751471001 0448509014 09...
1,0000423b00ade91418cceaf3b26c6af3dd342b51fd051e...,0826211002 0924243001 0739590027 0800436010 09...
2,000058a12d5b43e67d225668fa1f8d618c13dc232df0ca...,0794321007 0852643001 0852643003 0858883002 09...
3,00005ca1c9ed5f5146b52ac8639a40ca9d57aeff4d1bd2...,0448509014 0751471001 0924243001 0924243002 05...
4,00006413d8573cd20ed7128e53b7b13819fe5cfc2d801f...,0730683050 0791587015 0924243001 0896152002 09...
...,...,...
1371975,ffffbbf78b6eaac697a8a5dfbfd2bfa8113ee5b403e474...,0804992033 0713997002 0720125039 0740922009 05...
1371976,ffffcd5046a6143d29a04fb8c424ce494a76e5cdf4fab5...,0818754002 0762846031 0924243001 0759871030 06...
1371977,ffffcf35913a0bee60e8741cb2b4e78b8a98ee5ff2e6a1...,0884081001 0762846027 0689365050 0794819001 06...
1371978,ffffd7744cebcf3aca44ae7049d2a94b87074c3d4ffe38...,0882810001 0714790020 0924243001 0448509014 09...


## Writing final submission file to kaggle output disk

In [27]:
submission_data.to_csv('submission.csv', index=False)