# Fix all jupyter notebook problems

<sub> 0. Pray </sub> 
1. Connect to head machine via SSH
2. Open `/usr/bin/anaconda/lib/python2.7/site-packages/nbformat/_version.py` and change 5 to 4.
3. Fix anaconda installation via official fix script. 
```
curl https://gregorysfixes.blob.core.windows.net/public/fix-conda.sh | sudo sh
```
4. Install all necessary python packages. At least kaggle - 
```
sudo /usr/bin/anaconda/bin/conda install -c conda-forge kaggle --yes
```
5. Open Ambari and restart jupyter service.
6. Open azure jupyter notebook and upload this notebook
7. Check, that cells below can be executed correctly

# Create Spark Context

In [1]:
import os
os.environ["JAVA_HOME"] = "/usr/lib/jvm/java-8-openjdk-amd64"
os.environ["SPARK_HOME"] = "/opt/spark/"

In [2]:
import findspark
findspark.init('/opt/spark')
from pyspark import SparkContext
from pyspark.conf import SparkConf
conf = SparkConf().set("spark.ui.port", 5050).set("spark.driver.memory", "32g").set("spark.executor.memory", "32g")
# sc.stop()
sc = SparkContext("local[8]", "my app", conf=conf)  # spark with 4 cores

In [3]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.master("local[*]").getOrCreate()

In [4]:
sc = spark.sparkContext

In [5]:
sc

In [6]:
import pandas as pd
from pyspark.sql import SparkSession

ss = SparkSession(sc)

In [7]:
hadoop = sc._jvm.org.apache.hadoop
fs = hadoop.fs.FileSystem
conf = hadoop.conf.Configuration() 
path = hadoop.fs.Path('/')
    
def hdfs_ls(path):
    result = []
    for f in fs.get(conf).listStatus(hadoop.fs.Path(path)):
        result.append(str(f.getPath()))
    return result

In [8]:
from pprint import pprint

pprint(hdfs_ls('/'))

['file:/etc',
 'file:/tmp',
 'file:/lib64',
 'file:/bin',
 'file:/boot',
 'file:/sys',
 'file:/vmlinuz.old',
 'file:/opt',
 'file:/mnt',
 'file:/.rnd',
 'file:/home',
 'file:/lib',
 'file:/srv',
 'file:/usr',
 'file:/datadrive',
 'file:/run',
 'file:/lost+found',
 'file:/snap',
 'file:/var',
 'file:/proc',
 'file:/root',
 'file:/initrd.img',
 'file:/vmlinuz',
 'file:/media',
 'file:/sbin',
 'file:/dev',
 'file:/initrd.img.old']


# Download task data

Download data directly from kaggle. Read this to understand how: https://github.com/Kaggle/kaggle-api

In [9]:
#%%local
! cat ~/.kaggle/kaggle.json

{"username":"hukutoc46","key":"225711d8186c94c45fa2e360a70419c4"}


In [10]:
# %%local
! kaggle competitions files outbrain-click-prediction

name                           size  creationDate         
----------------------------  -----  -------------------  
documents_topics.csv.zip      121MB  2018-06-22 05:33:10  
clicks_train.csv.zip          390MB  2018-06-22 05:33:10  
documents_meta.csv.zip         16MB  2018-06-22 05:33:10  
clicks_test.csv.zip           135MB  2018-06-22 05:33:10  
sample_submission.csv.zip     100MB  2018-06-22 05:33:10  
promoted_content.csv.zip        3MB  2018-06-22 05:33:10  
documents_categories.csv.zip   32MB  2018-06-22 05:33:10  
events.csv.zip                478MB  2018-06-22 05:33:10  
documents_entities.csv.zip    126MB  2018-06-22 05:33:10  
page_views.csv.zip             35GB  2018-06-22 05:33:10  
page_views_sample.csv.zip     149MB  2018-06-22 05:33:10  


In [11]:
#%%local
! kaggle competitions download -c outbrain-click-prediction

404 - Not Found


In [12]:
! for f in $(kaggle competitions files outbrain-click-prediction | cut -d ' ' -f 1 | tail -n+3); do kaggle competitions download outbrain-click-prediction -f $f -p ./data/; done

documents_entities.csv.zip: Skipping, found more recently modified local copy (use --force to force download)
events.csv.zip: Skipping, found more recently modified local copy (use --force to force download)
promoted_content.csv.zip: Skipping, found more recently modified local copy (use --force to force download)
sample_submission.csv.zip: Skipping, found more recently modified local copy (use --force to force download)
page_views_sample.csv.zip: Skipping, found more recently modified local copy (use --force to force download)
clicks_train.csv.zip: Skipping, found more recently modified local copy (use --force to force download)
documents_topics.csv.zip: Skipping, found more recently modified local copy (use --force to force download)
documents_categories.csv.zip: Skipping, found more recently modified local copy (use --force to force download)
documents_meta.csv.zip: Skipping, found more recently modified local copy (use --force to force download)
page_views.csv.zip: Skipping, found 

# Load data to HDFS

https://www.kaggle.com/c/outbrain-click-prediction/data

%%local
! hdfs dfs -rm -r /task1
! hdfs dfs -mkdir /task1

%%local
! for i in `ls *.zip`; do unzip -p $i | tqdm | hadoop fs -put - /task1/${i//\.zip/}; done

%%local
! hadoop fs -du -s -h /task1/*.csv

In [13]:
! for i in `ls data/*.zip`; do unzip -n -d ./data/ $i; done

Archive:  data/clicks_test.csv.zip
Archive:  data/clicks_train.csv.zip
Archive:  data/documents_categories.csv.zip
Archive:  data/documents_entities.csv.zip
Archive:  data/documents_meta.csv.zip
Archive:  data/documents_topics.csv.zip
Archive:  data/events.csv.zip
Archive:  data/page_views.csv.zip
Archive:  data/page_views_sample.csv.zip
Archive:  data/promoted_content.csv.zip
Archive:  data/sample_submission.csv.zip


# Read example

In [14]:
data_path = '/datadrive/LSML2020/data/'

In [15]:
pvdf = ss.read.csv(data_path + "page_views.csv", header=True)

In [16]:
pvdf.dtypes

[('uuid', 'string'),
 ('document_id', 'string'),
 ('timestamp', 'string'),
 ('platform', 'string'),
 ('geo_location', 'string'),
 ('traffic_source', 'string')]

In [17]:
pvdf.show(5)

+--------------+-----------+---------+--------+------------+--------------+
|          uuid|document_id|timestamp|platform|geo_location|traffic_source|
+--------------+-----------+---------+--------+------------+--------------+
|1fd5f051fba643|        120| 31905835|       1|          RS|             2|
|8557aa9004be3b|        120| 32053104|       1|       VN>44|             2|
|c351b277a358f0|        120| 54013023|       1|       KR>12|             1|
|8205775c5387f9|        120| 44196592|       1|       IN>16|             2|
|9cb0ccd8458371|        120| 65817371|       1|   US>CA>807|             2|
+--------------+-----------+---------+--------+------------+--------------+
only showing top 5 rows



# Convert all to Parquet

%%time

def convert_all_to_parquet():
    task_dir = data_path
    all_files = hdfs_ls(task_dir)
    for fn in all_files:
        if fn.endswith(".csv"):
            fn_after = fn.replace(".csv", ".parquet")
            path_before = fn
            path_after = fn_after
            if fn_after not in all_files:
                # generate parquet
                df = ss.read.csv(path_before, header=True)
                df.write.parquet(path_after)
            print(fn_after, "done")

convert_all_to_parquet()

Remove csv, we have parquet now

%%local
! hdfs dfs -rm /data/*.csv

%%local
! hadoop fs -du -s -h /data/*

# Preview all files

In [18]:
%%time
def preview_all_files():
    task_dir = data_path
    all_files = hdfs_ls(task_dir)
    for fn in all_files:
        if '.zip' in fn:
            continue
        if '.csv' not in fn:
            continue
        df = ss.read.csv(fn, header=True)
        print("#" * 15 + " {0} ".format(fn) + "#" * 15)
        df.show(3)
        
preview_all_files()

############### file:/datadrive/LSML2020/data/documents_topics.csv ###############
+-----------+--------+------------------+
|document_id|topic_id|  confidence_level|
+-----------+--------+------------------+
|    1595802|     140|0.0731131601068925|
|    1595802|      16|0.0594164867373976|
|    1595802|     143|0.0454207537554526|
+-----------+--------+------------------+
only showing top 3 rows

############### file:/datadrive/LSML2020/data/promoted_content.csv ###############
+-----+-----------+-----------+-------------+
|ad_id|document_id|campaign_id|advertiser_id|
+-----+-----------+-----------+-------------+
|    1|       6614|          1|            7|
|    2|     471467|          2|            7|
|    3|       7692|          3|            7|
+-----+-----------+-----------+-------------+
only showing top 3 rows

############### file:/datadrive/LSML2020/data/documents_meta.csv ###############
+-----------+---------+------------+-------------------+
|document_id|source_id|publish

# Register all tables to be usable in SQL queries

In [19]:
%%time
def register_all_tables():
    task_dir = data_path
    all_files = hdfs_ls(task_dir)
    for fn in all_files:
        if fn.endswith(".csv"):
            table_name = os.path.basename(fn).replace(".csv", "")
            
            df = ss.read.csv(fn, header=True)
            df.registerTempTable(table_name)
            print(table_name, "done")
        
register_all_tables()

documents_topics done
promoted_content done
documents_meta done
clicks_test done
page_views_sample done
page_views done
test_table done
documents_categories done
events done
documents_entities done
sample_submission done
clicks_train done
CPU times: user 65.7 ms, sys: 7.34 ms, total: 73 ms
Wall time: 7.15 s


# 1. Baseline

In [20]:
ss.sql("""
SELECT *
FROM events
""").show(10)

+----------+--------------+-----------+---------+--------+------------+
|display_id|          uuid|document_id|timestamp|platform|geo_location|
+----------+--------------+-----------+---------+--------+------------+
|         1|cb8c55702adb93|     379743|       61|       3|   US>SC>519|
|         2|79a85fa78311b9|    1794259|       81|       2|   US>CA>807|
|         3|822932ce3d8757|    1179111|      182|       2|   US>MI>505|
|         4|85281d0a49f7ac|    1777797|      234|       2|   US>WV>564|
|         5|8d0daef4bf5b56|     252458|      338|       2|       SG>00|
|         6|7765b4faae4ad4|    1773517|      395|       3|   US>OH>510|
|         7|2cc3f6457d16da|    1149661|      602|       2|   US>MT>762|
|         8|166fc654d73c98|    1330329|      638|       2|   US>PA>566|
|         9|9dddccf70f6067|    1772126|      667|       1|   US>FL>528|
|        10|b09a0e92aa4d17|     157455|      693|       1|          US|
+----------+--------------+-----------+---------+--------+------

Simple model using the following features:
- **clicked**
- geo_location features (country, state, dma)
- day_of_week (from timestamp, use *date.isoweekday()*)
- ad_id
- ad_document_id
- campaign_id
- advertiser_id
- display_document_id
- platform

In [20]:
from pyspark.sql import SQLContext

sqlContext = SQLContext(sc)

In [54]:
def get_country(row):
    if row is None:
        return ''
    geo_locations = row.split('>')
    return geo_locations[0] 

def get_state(row):
    if row is None:
        return ''
    geo_locations = row.split('>')
    return geo_locations[1] if len(geo_locations) == 3 else ''

def get_dma(row):
    if row is None:
        return ''
    geo_locations = row.split('>')
    return geo_locations[2] if len(geo_locations) == 3 else ''

sqlContext.udf.register("get_country", get_country)
sqlContext.udf.register("get_state", get_state)
sqlContext.udf.register("get_dma", get_dma)

<function __main__.get_dma(row)>

In [55]:
from datetime import datetime

def get_weekday(timestamp): 
    if timestamp is None:
        return ''
    dt = datetime.fromtimestamp((int(timestamp) + 1465876799998) / 1000)
    return str(dt.weekday())

def get_day(timestamp): 
    if timestamp is None:
        return ''
    dt = datetime.fromtimestamp((int(timestamp) + 1465876799998) / 1000)
    return str(dt.day)

def get_month(timestamp): 
    if timestamp is None:
        return ''
    dt = datetime.fromtimestamp((int(timestamp) + 1465876799998) / 1000)
    return str(dt.month)

def get_hour(timestamp):
    if timestamp is None:
        return ''
    dt = datetime.fromtimestamp((int(timestamp) + 1465876799998) / 1000)
    return str(dt.hour)

sqlContext.udf.register("get_weekday", get_weekday)
sqlContext.udf.register("get_day", get_day)
sqlContext.udf.register("get_month", get_month)
sqlContext.udf.register("get_hour", get_hour)

<function __main__.get_hour(timestamp)>

In [23]:
def get_train_table():
    return ss.sql("""
SELECT clicks_train.clicked,
       geo_info.display_id,
       geo_info.country AS country,
       geo_info.state AS state,
       geo_info.dma AS dma,
       geo_info.month AS month,
       geo_info.day AS day,
       geo_info.weekday AS weekday,
       geo_info.hour AS hour,
       
       clicks_train.ad_id,
       promoted_content.document_id as ad_document_id,
       promoted_content.campaign_id,
       promoted_content.advertiser_id,
       events.document_id as display_document_id,
       events.platform

FROM clicks_train

LEFT JOIN 
(
    SELECT display_id, 
        get_country(geo_location) AS country,
        get_state(geo_location) AS state,
        get_dma(geo_location) AS dma,
        get_day(timestamp) AS day,
        get_month(timestamp) AS month,
        get_weekday(timestamp) AS weekday,
        get_hour(timestamp) AS hour
    FROM events
    WHERE geo_location IS NOT NULL AND timestamp IS NOT NULL
) AS geo_info
ON clicks_train.display_id = geo_info.display_id

left join events
on clicks_train.display_id = events.display_id

left join promoted_content
on clicks_train.ad_id = promoted_content.ad_id
""")

In [24]:
def make_vw_train_test(row, target=True):
    clicked = ''
    if target:
        clicked = 1 if int(row.clicked) == 1 else -1,
    return (
        "{clicked} |f"
        " display_id_{display_id}"
        " country_{country}"
        " state_{state}"
        " dma_{dma}"
        " month:{month}"
        " day:{day}"
        " weekday:{weekday}"
        " hour:{hour}"
        " ad_id_{ad_id}"
        " ad_document_id_{ad_document_id}"
        " campaign_id_{campaign_id}"
        " advertiser_id_{advertiser_id}"
        " display_document_id_{display_document_id}"
        " platform_{platform}"
    ).format(
        clicked=clicked,
        display_id=row.display_id,
        country=row.country,
        state=row.state,
        dma=row.dma,
        month=row.month if row.month else 0,
        day=row.day if row.day else 0,
        weekday=row.weekday if row.weekday else 0,
        hour=row.hour if row.hour else 0,
        ad_id=row.ad_id,
        ad_document_id=row.ad_document_id,
        campaign_id=row.campaign_id,
        advertiser_id=row.advertiser_id,
        display_document_id=row.display_document_id,
        platform=row.platform
    )
def make_vw_train(row):
    return make_vw_train_test(row, target=True)

def make_vw_test(row):
    return make_vw_train_test(row, target=False)

In [25]:
%%time
dataset_name = "data/dataset-v2.data"
if not(os.path.exists(dataset_name)):
    train_table = get_train_table()
    train_table.rdd.map(make_vw).saveAsTextFile(dataset_name)
else:
    print('already exists')

already exists
CPU times: user 213 µs, sys: 43 µs, total: 256 µs
Wall time: 211 µs


In [26]:
def merge_parted_file(parted_file, full_file):
    if os.path.exists(full_file):
        print('Already exists')
        return
    if parted_file[-1] == '/':
        parted_file = parted_file[:-1]
    os.system('cat {0}/part-* > {1}'.format(parted_file, full_file))
    print("done")

In [27]:
%%time
merge_parted_file('data/dataset-v2.data', 'data/dataset-v2.txt')

Already exists
CPU times: user 120 µs, sys: 24 µs, total: 144 µs
Wall time: 144 µs


## Train VW

https://github.com/JohnLangford/vowpal_wabbit/wiki/Command-line-arguments

In [28]:
! head -n2 data/dataset-v2.txt

-1 |f display_id_15514599 country_US state_ dma_ month:6 day:26 weekday:6 hour:1 ad_id_100010 ad_document_id_1132761 campaign_id_13049 advertiser_id_2848 display_document_id_1914050 platform_1
-1 |f display_id_15434742 country_US state_FL dma_539 month:6 day:26 weekday:6 hour:0 ad_id_100010 ad_document_id_1132761 campaign_id_13049 advertiser_id_2848 display_document_id_339764 platform_1


In [30]:
%%time
! vw -d data/dataset-v2.txt -b 24 -c -k --ftrl --passes 1 -f model --holdout_off --loss_function logistic --random_seed 42 --progress 8000000 

final_regressor = model
Enabling FTRL based optimization
Algorithm used: Proximal-FTRL
ftrl_alpha = 0.005
ftrl_beta = 0.1
Num weight bits = 24
learning rate = 0.5
initial_t = 0
power_t = 0.5
creating cache_file = data/dataset-v2.txt.cache
Reading datafile = data/dataset-v2.txt
num sources = 1
average  since         example        example  current  current  current
loss     last          counter         weight    label  predict features
0.454215 0.454215      8000000      8000000.0  -1.0000  -1.6147       15
0.449374 0.444533     16000000     16000000.0  -1.0000  -1.3661       15
0.452394 0.458433     24000000     24000000.0   1.0000  -0.5013       15
0.451078 0.447131     32000000     32000000.0  -1.0000  -1.5127       15
0.451019 0.450786     40000000     40000000.0  -1.0000  -1.6190       15
0.449667 0.442907     48000000     48000000.0  -1.0000  -1.2818       15
0.448160 0.439119     56000000     56000000.0  -1.0000  -1.8741       15
0.447093 0.439619     64000000     64000000.0   1

## Check VW test performance

In [56]:
def get_test_table():
    return ss.sql("""
SELECT geo_info.display_id,
       geo_info.country AS country,
       geo_info.state AS state,
       geo_info.dma AS dma,
       geo_info.month AS month,
       geo_info.day AS day,
       geo_info.weekday AS weekday,
       geo_info.hour AS hour,
       
       clicks_test.ad_id,
       promoted_content.document_id as ad_document_id,
       promoted_content.campaign_id,
       promoted_content.advertiser_id,
       events.document_id as display_document_id,
       events.platform

FROM clicks_test

LEFT JOIN 
(
    SELECT display_id, 
        get_country(geo_location) AS country,
        get_state(geo_location) AS state,
        get_dma(geo_location) AS dma,
        get_day(timestamp) AS day,
        get_month(timestamp) AS month,
        get_weekday(timestamp) AS weekday,
        get_hour(timestamp) AS hour
    FROM events
) AS geo_info
ON clicks_test.display_id = geo_info.display_id

left join events
on clicks_test.display_id = events.display_id

left join promoted_content
on clicks_test.ad_id = promoted_content.ad_id
""")

In [58]:
%%time
dataset_test_name = "data/dataset-test-v2.data"
if not(os.path.exists(dataset_test_name)):
    test_table = get_test_table()
    test_table.rdd.map(make_vw_test).saveAsTextFile(dataset_test_name)
else:
    print('already exists')

CPU times: user 229 ms, sys: 89.5 ms, total: 319 ms
Wall time: 22min 58s


In [59]:
%%time
merge_parted_file(dataset_test_name, 'data/dataset-test-v2.txt')

done
CPU times: user 10.9 ms, sys: 34.1 ms, total: 45 ms
Wall time: 1min 37s


In [60]:
! vw -d data/dataset-test-v2.txt -i model -t -k -p data/test_predictions.txt --progress 1000000 --link=logistic

only testing
predictions = data/test_predictions.txt
Num weight bits = 24
learning rate = 0.5
initial_t = 0
power_t = 0.5
using no cache
Reading datafile = data/dataset-test-v2.txt
num sources = 1
average  since         example        example  current  current  current
loss     last          counter         weight    label  predict features
0.000000 0.000000      1000000      1000000.0  unknown   0.0239       15
0.000000 0.000000      2000000      2000000.0  unknown   0.2434       14
0.000000 0.000000      3000000      3000000.0  unknown   0.1286       15
0.000000 0.000000      4000000      4000000.0  unknown   0.2648       15
0.000000 0.000000      5000000      5000000.0  unknown   0.0253       15
0.000000 0.000000      6000000      6000000.0  unknown   0.2028       15
0.000000 0.000000      7000000      7000000.0  unknown   0.1990       15
0.000000 0.000000      8000000      8000000.0  unknown   0.4490       15
0.000000 0.000000      9000000      9000000.0  unknown   0.0897       14


In [61]:
! cat data/test_predictions.txt | head -n2

0.237986
0.217157
cat: write error: Broken pipe


In [62]:
import numpy as np

def read_vw_predictions(p):
    y_pred = []
    with open(p, "r") as f:
        for line in f:
            y_pred.append(float(line.split()[0]))
    return np.array(y_pred)

y_pred = read_vw_predictions("data/test_predictions.txt")

In [63]:
import pandas as pd

In [64]:
test_table = get_test_table()

In [65]:
test_table.registerTempTable('test_table')

In [66]:
pred_table = ss.sql("""
SELECT display_id, ad_id FROM test_table
""")

In [67]:
pred_table.write.csv('data/pred_data.data')

In [68]:
merge_parted_file('data/pred_data.data', 'data/pred_data.csv')

done


In [69]:
def read_pred_data(file_name):
    display_ids = []
    ad_ids = []
    with open(file_name, 'r') as f:
        for line in f:
            #print(line)
            try:
                elems = list(map(int, line.split(',')))
            except:
                print(line)
                continue
            display_ids.append(elems[0])
            ad_ids.append(elems[1])
    return display_ids, ad_ids

In [70]:
display_ids, ad_ids = read_pred_data('data/pred_data.csv')

In [71]:
from tqdm import tqdm

In [74]:
unique_dis_ids = set(display_ids)
display_ids = np.array(display_ids)
ad_ids = np.array(ad_ids)
display_to_set_of_ads = dict()
for dis_id, ad_id in tqdm(zip(display_ids, ad_ids), total=len(display_ids)):
    if dis_id not in display_to_set_of_ads:
        display_to_set_of_ads[dis_id] = list()
        
    display_to_set_of_ads[dis_id].append(ad_id)



  0%|          | 0/32225162 [00:00<?, ?it/s][A[A

  0%|          | 2968/32225162 [00:00<1:42:07, 5258.36it/s][A[A

  0%|          | 24502/32225162 [00:01<1:16:05, 7053.63it/s][A[A

  0%|          | 66562/32225162 [00:01<53:34, 10004.71it/s] [A[A

  0%|          | 108138/32225162 [00:01<37:51, 14138.53it/s][A[A

  0%|          | 150627/32225162 [00:01<26:50, 19911.58it/s][A[A

  1%|          | 192817/32225162 [00:01<19:08, 27881.18it/s][A[A

  1%|          | 235258/32225162 [00:01<13:45, 38739.56it/s][A[A

  1%|          | 281264/32225162 [00:01<09:58, 53401.63it/s][A[A

  1%|          | 324016/32225162 [00:01<07:20, 72409.24it/s][A[A

  1%|          | 363609/32225162 [00:07<26:53, 19752.93it/s][A[A

  1%|▏         | 404703/32225162 [00:07<19:11, 27642.94it/s][A[A

  1%|▏         | 443482/32225162 [00:07<13:49, 38319.27it/s][A[A

  2%|▏         | 485739/32225162 [00:07<10:02, 52693.93it/s][A[A

  2%|▏         | 530747/32225162 [00:07<07:22, 71680.41it/s][A

 16%|█▌        | 5126065/32225162 [00:27<01:02, 431000.32it/s][A[A

 16%|█▌        | 5170532/32225162 [00:27<01:02, 435011.06it/s][A[A

 16%|█▌        | 5214623/32225162 [00:27<01:01, 436750.92it/s][A[A

 16%|█▋        | 5259725/32225162 [00:27<01:01, 440935.44it/s][A[A

 16%|█▋        | 5304118/32225162 [00:27<01:02, 429020.29it/s][A[A

 17%|█▋        | 5347411/32225162 [00:27<01:02, 430185.58it/s][A[A

 17%|█▋        | 5392504/32225162 [00:28<01:01, 436205.51it/s][A[A

 17%|█▋        | 5437341/32225162 [00:28<01:00, 439784.80it/s][A[A

 17%|█▋        | 5481661/32225162 [00:28<01:00, 440803.32it/s][A[A

 17%|█▋        | 5525825/32225162 [00:28<01:00, 440391.94it/s][A[A

 17%|█▋        | 5571542/32225162 [00:28<00:59, 445293.80it/s][A[A

 17%|█▋        | 5618046/32225162 [00:28<00:58, 451038.50it/s][A[A

 18%|█▊        | 5663206/32225162 [00:28<00:59, 447894.11it/s][A[A

 18%|█▊        | 5708040/32225162 [00:28<00:59, 443185.44it/s][A[A

 18%|█▊        | 575

 32%|███▏      | 10366492/32225162 [00:46<01:50, 198713.29it/s][A[A

 32%|███▏      | 10410318/32225162 [00:46<01:31, 237562.49it/s][A[A

 32%|███▏      | 10455165/32225162 [00:46<01:18, 276584.23it/s][A[A

 33%|███▎      | 10498768/32225162 [00:46<01:09, 310420.09it/s][A[A

 33%|███▎      | 10544136/32225162 [00:46<01:03, 342903.38it/s][A[A

 33%|███▎      | 10591328/32225162 [00:47<00:57, 373539.47it/s][A[A

 33%|███▎      | 10636407/32225162 [00:47<00:54, 393783.06it/s][A[A

 33%|███▎      | 10681859/32225162 [00:47<00:52, 410219.40it/s][A[A

 33%|███▎      | 10729455/32225162 [00:47<00:50, 427951.86it/s][A[A

 33%|███▎      | 10775253/32225162 [00:47<00:50, 424876.92it/s][A[A

 34%|███▎      | 10821691/32225162 [00:47<00:49, 430130.53it/s][A[A

 34%|███▎      | 10866907/32225162 [00:47<00:48, 436509.86it/s][A[A

 34%|███▍      | 10912535/32225162 [00:47<00:48, 442258.46it/s][A[A

 34%|███▍      | 10957526/32225162 [00:47<00:48, 438957.99it/s][A[A

 34%|█

 48%|████▊     | 15569250/32225162 [01:01<00:33, 493013.67it/s][A[A

 48%|████▊     | 15619213/32225162 [01:01<00:33, 494979.88it/s][A[A

 49%|████▊     | 15668739/32225162 [01:01<00:33, 494261.14it/s][A[A

 49%|████▉     | 15718185/32225162 [01:01<00:33, 487175.42it/s][A[A

 49%|████▉     | 15766940/32225162 [01:01<00:34, 476977.07it/s][A[A

 49%|████▉     | 15815270/32225162 [01:01<00:34, 478855.29it/s][A[A

 49%|████▉     | 15863632/32225162 [01:01<00:34, 480274.27it/s][A[A

 49%|████▉     | 15911815/32225162 [01:02<00:34, 475203.88it/s][A[A

 50%|████▉     | 15961760/32225162 [01:02<00:33, 482226.02it/s][A[A

 50%|████▉     | 16010672/32225162 [01:02<00:33, 478631.66it/s][A[A

 50%|████▉     | 16059756/32225162 [01:02<00:33, 482228.70it/s][A[A

 50%|████▉     | 16108474/32225162 [01:02<00:33, 483702.59it/s][A[A

 50%|█████     | 16156871/32225162 [01:02<00:33, 483048.67it/s][A[A

 50%|█████     | 16205195/32225162 [01:02<00:33, 474103.07it/s][A[A

 50%|█

 66%|██████▌   | 21204619/32225162 [01:16<00:22, 484211.24it/s][A[A

 66%|██████▌   | 21255713/32225162 [01:16<00:22, 491930.67it/s][A[A

 66%|██████▌   | 21305764/32225162 [01:16<00:22, 486539.96it/s][A[A

 66%|██████▋   | 21356078/32225162 [01:16<00:22, 491403.60it/s][A[A

 66%|██████▋   | 21405284/32225162 [01:19<04:08, 43600.86it/s] [A[A

 67%|██████▋   | 21450101/32225162 [01:20<03:00, 59790.73it/s][A[A

 67%|██████▋   | 21493643/32225162 [01:20<02:13, 80667.99it/s][A[A

 67%|██████▋   | 21541096/32225162 [01:20<01:39, 107414.23it/s][A[A

 67%|██████▋   | 21588735/32225162 [01:20<01:16, 139927.31it/s][A[A

 67%|██████▋   | 21638495/32225162 [01:20<00:59, 178396.38it/s][A[A

 67%|██████▋   | 21686652/32225162 [01:20<00:47, 219934.36it/s][A[A

 67%|██████▋   | 21733907/32225162 [01:20<00:40, 259773.77it/s][A[A

 68%|██████▊   | 21784902/32225162 [01:20<00:34, 304604.45it/s][A[A

 68%|██████▊   | 21832326/32225162 [01:20<00:30, 339647.58it/s][A[A

 68%|███

 83%|████████▎ | 26859533/32225162 [01:31<00:10, 494022.77it/s][A[A

 84%|████████▎ | 26908983/32225162 [01:31<00:10, 484536.53it/s][A[A

 84%|████████▎ | 26960529/32225162 [01:31<00:10, 493415.63it/s][A[A

 84%|████████▍ | 27009962/32225162 [01:31<00:10, 485882.16it/s][A[A

 84%|████████▍ | 27058640/32225162 [01:31<00:10, 484773.77it/s][A[A

 84%|████████▍ | 27107181/32225162 [01:35<02:12, 38739.51it/s] [A[A

 84%|████████▍ | 27152253/32225162 [01:35<01:35, 53310.15it/s][A[A

 84%|████████▍ | 27198874/32225162 [01:35<01:09, 72599.51it/s][A[A

 85%|████████▍ | 27247151/32225162 [01:35<00:51, 97319.73it/s][A[A

 85%|████████▍ | 27294686/32225162 [01:35<00:38, 127813.40it/s][A[A

 85%|████████▍ | 27343400/32225162 [01:36<00:29, 164134.12it/s][A[A

 85%|████████▍ | 27390761/32225162 [01:36<00:23, 204029.47it/s][A[A

 85%|████████▌ | 27438234/32225162 [01:36<00:19, 243968.31it/s][A[A

 85%|████████▌ | 27487400/32225162 [01:36<00:16, 287405.57it/s][A[A

 85%|████

In [81]:
display_to_set_of_ads[16874594]

[66758, 150083, 162754, 170392, 172888, 180797]

In [84]:
id2score = {(dis_id, ad_id): score for ad_id, dis_id, score in zip(ad_ids, display_ids, y_pred)}

In [85]:
with open('submission.csv', 'w') as f:
    print('display_id,ad_id', file=f)
    for display, potential_ids in tqdm(display_to_set_of_ads.items()):
        print(str(display) + ',' + ' '.join(map(str, sorted(potential_ids, key=lambda x: id2score[(display,x)], reverse=True))), file=f)



  0%|          | 0/6245533 [00:00<?, ?it/s][A[A

  0%|          | 3994/6245533 [00:00<02:43, 38194.98it/s][A[A

  0%|          | 8146/6245533 [00:00<02:39, 39134.25it/s][A[A

  0%|          | 11899/6245533 [00:00<02:41, 38638.20it/s][A[A

  0%|          | 15654/6245533 [00:00<02:42, 38304.82it/s][A[A

  0%|          | 19584/6245533 [00:00<02:41, 38480.14it/s][A[A

  0%|          | 23574/6245533 [00:00<02:39, 38895.28it/s][A[A

  0%|          | 27620/6245533 [00:00<02:38, 39351.19it/s][A[A

  1%|          | 31692/6245533 [00:00<02:36, 39752.00it/s][A[A

  1%|          | 35656/6245533 [00:00<02:37, 39352.25it/s][A[A

  1%|          | 39575/6245533 [00:01<02:37, 39302.91it/s][A[A

  1%|          | 43400/6245533 [00:01<02:41, 38477.17it/s][A[A

  1%|          | 47492/6245533 [00:01<02:38, 39178.53it/s][A[A

  1%|          | 51486/6245533 [00:01<02:37, 39403.47it/s][A[A

  1%|          | 55395/6245533 [00:01<02:37, 39241.48it/s][A[A

  1%|          | 59390/62

  8%|▊         | 483202/6245533 [00:12<02:25, 39606.92it/s][A[A

  8%|▊         | 487294/6245533 [00:12<02:23, 39991.42it/s][A[A

  8%|▊         | 491296/6245533 [00:12<02:24, 39950.05it/s][A[A

  8%|▊         | 495365/6245533 [00:12<02:23, 40168.44it/s][A[A

  8%|▊         | 499447/6245533 [00:12<02:22, 40361.67it/s][A[A

  8%|▊         | 503485/6245533 [00:12<02:24, 39741.09it/s][A[A

  8%|▊         | 507463/6245533 [00:13<02:24, 39715.21it/s][A[A

  8%|▊         | 511437/6245533 [00:13<02:27, 38902.88it/s][A[A

  8%|▊         | 515333/6245533 [00:13<02:27, 38776.66it/s][A[A

  8%|▊         | 519426/6245533 [00:13<02:25, 39397.44it/s][A[A

  8%|▊         | 523371/6245533 [00:13<02:26, 39121.40it/s][A[A

  8%|▊         | 527288/6245533 [00:13<02:26, 38927.39it/s][A[A

  9%|▊         | 531213/6245533 [00:13<02:27, 38679.58it/s][A[A

  9%|▊         | 535307/6245533 [00:13<02:25, 39330.50it/s][A[A

  9%|▊         | 539357/6245533 [00:13<02:25, 39122.97it/s][A

 15%|█▌        | 963986/6245533 [00:24<02:19, 37834.11it/s][A[A

 15%|█▌        | 967919/6245533 [00:24<02:17, 38270.75it/s][A[A

 16%|█▌        | 971991/6245533 [00:25<02:17, 38464.97it/s][A[A

 16%|█▌        | 975891/6245533 [00:25<02:16, 38623.15it/s][A[A

 16%|█▌        | 979927/6245533 [00:25<02:14, 39126.49it/s][A[A

 16%|█▌        | 983843/6245533 [00:25<02:14, 39041.08it/s][A[A

 16%|█▌        | 987750/6245533 [00:25<02:14, 38964.64it/s][A[A

 16%|█▌        | 991649/6245533 [00:25<02:16, 38423.00it/s][A[A

 16%|█▌        | 995623/6245533 [00:25<02:16, 38428.87it/s][A[A

 16%|█▌        | 999623/6245533 [00:25<02:14, 38886.83it/s][A[A

 16%|█▌        | 1003532/6245533 [00:25<02:14, 38945.94it/s][A[A

 16%|█▌        | 1007445/6245533 [00:25<02:15, 38597.65it/s][A[A

 16%|█▌        | 1011388/6245533 [00:26<02:14, 38843.44it/s][A[A

 16%|█▋        | 1015275/6245533 [00:26<02:15, 38628.08it/s][A[A

 16%|█▋        | 1019140/6245533 [00:26<02:15, 38517.60it/

 23%|██▎       | 1431166/6245533 [00:37<02:08, 37353.18it/s][A[A

 23%|██▎       | 1434962/6245533 [00:37<02:08, 37532.90it/s][A[A

 23%|██▎       | 1438718/6245533 [00:37<02:10, 36713.34it/s][A[A

 23%|██▎       | 1442404/6245533 [00:37<02:11, 36639.07it/s][A[A

 23%|██▎       | 1446323/6245533 [00:37<02:08, 37366.10it/s][A[A

 23%|██▎       | 1450257/6245533 [00:37<02:06, 37936.01it/s][A[A

 23%|██▎       | 1454142/6245533 [00:37<02:05, 38205.27it/s][A[A

 23%|██▎       | 1458044/6245533 [00:37<02:04, 38444.99it/s][A[A

 23%|██▎       | 1461893/6245533 [00:37<02:07, 37591.65it/s][A[A

 23%|██▎       | 1465770/6245533 [00:38<02:05, 37937.41it/s][A[A

 24%|██▎       | 1469649/6245533 [00:38<02:05, 38188.92it/s][A[A

 24%|██▎       | 1473537/6245533 [00:38<02:04, 38392.57it/s][A[A

 24%|██▎       | 1477380/6245533 [00:38<02:05, 38069.14it/s][A[A

 24%|██▎       | 1481190/6245533 [00:38<02:05, 37924.50it/s][A[A

 24%|██▍       | 1485076/6245533 [00:38<02:04, 3

 30%|███       | 1894782/6245533 [00:49<01:54, 37873.11it/s][A[A

 30%|███       | 1898573/6245533 [00:49<01:55, 37547.05it/s][A[A

 30%|███       | 1902331/6245533 [00:49<01:56, 37351.25it/s][A[A

 31%|███       | 1906069/6245533 [00:49<01:56, 37343.78it/s][A[A

 31%|███       | 1909805/6245533 [00:49<01:57, 36829.39it/s][A[A

 31%|███       | 1913617/6245533 [00:49<01:56, 37206.80it/s][A[A

 31%|███       | 1917341/6245533 [00:49<01:57, 36876.75it/s][A[A

 31%|███       | 1921032/6245533 [00:50<01:57, 36802.02it/s][A[A

 31%|███       | 1924715/6245533 [00:50<01:57, 36775.32it/s][A[A

 31%|███       | 1928482/6245533 [00:50<01:56, 37038.96it/s][A[A

 31%|███       | 1932338/6245533 [00:50<01:55, 37481.93it/s][A[A

 31%|███       | 1936089/6245533 [00:50<01:55, 37431.67it/s][A[A

 31%|███       | 1939890/6245533 [00:50<01:54, 37602.93it/s][A[A

 31%|███       | 1943652/6245533 [00:50<01:58, 36409.59it/s][A[A

 31%|███       | 1947552/6245533 [00:50<01:55, 3

 38%|███▊      | 2349217/6245533 [01:01<01:47, 36183.27it/s][A[A

 38%|███▊      | 2353156/6245533 [01:01<01:44, 37088.33it/s][A[A

 38%|███▊      | 2356872/6245533 [01:01<01:45, 37020.81it/s][A[A

 38%|███▊      | 2360623/6245533 [01:01<01:46, 36608.37it/s][A[A

 38%|███▊      | 2364289/6245533 [01:01<01:46, 36482.82it/s][A[A

 38%|███▊      | 2368069/6245533 [01:02<01:45, 36669.33it/s][A[A

 38%|███▊      | 2371885/6245533 [01:02<01:44, 37103.55it/s][A[A

 38%|███▊      | 2375599/6245533 [01:02<01:45, 36830.65it/s][A[A

 38%|███▊      | 2379285/6245533 [01:02<01:45, 36591.35it/s][A[A

 38%|███▊      | 2383070/6245533 [01:02<01:44, 36960.05it/s][A[A

 38%|███▊      | 2386769/6245533 [01:02<01:44, 36951.53it/s][A[A

 38%|███▊      | 2390466/6245533 [01:02<01:45, 36619.71it/s][A[A

 38%|███▊      | 2394237/6245533 [01:02<01:45, 36370.28it/s][A[A

 38%|███▊      | 2397876/6245533 [01:02<01:45, 36335.08it/s][A[A

 38%|███▊      | 2401557/6245533 [01:02<01:47, 3

 45%|████▍     | 2800588/6245533 [01:13<01:32, 37164.32it/s][A[A

 45%|████▍     | 2804308/6245533 [01:13<01:33, 36621.15it/s][A[A

 45%|████▍     | 2807974/6245533 [01:13<01:34, 36565.82it/s][A[A

 45%|████▌     | 2811634/6245533 [01:14<01:34, 36413.48it/s][A[A

 45%|████▌     | 2815278/6245533 [01:14<01:34, 36175.17it/s][A[A

 45%|████▌     | 2818929/6245533 [01:14<01:34, 36122.11it/s][A[A

 45%|████▌     | 2822760/6245533 [01:14<01:34, 36254.32it/s][A[A

 45%|████▌     | 2826392/6245533 [01:14<01:34, 36160.94it/s][A[A

 45%|████▌     | 2830092/6245533 [01:14<01:33, 36408.11it/s][A[A

 45%|████▌     | 2833895/6245533 [01:14<01:32, 36879.20it/s][A[A

 45%|████▌     | 2837586/6245533 [01:14<01:33, 36542.72it/s][A[A

 45%|████▌     | 2841351/6245533 [01:14<01:33, 36416.10it/s][A[A

 46%|████▌     | 2844995/6245533 [01:15<01:33, 36281.06it/s][A[A

 46%|████▌     | 2848654/6245533 [01:15<01:33, 36373.15it/s][A[A

 46%|████▌     | 2852293/6245533 [01:15<01:33, 3

 52%|█████▏    | 3243617/6245533 [01:26<01:24, 35341.20it/s][A[A

 52%|█████▏    | 3247314/6245533 [01:26<01:24, 35520.29it/s][A[A

 52%|█████▏    | 3251039/6245533 [01:26<01:23, 36022.01it/s][A[A

 52%|█████▏    | 3254734/6245533 [01:26<01:22, 36294.41it/s][A[A

 52%|█████▏    | 3258367/6245533 [01:26<01:23, 35659.73it/s][A[A

 52%|█████▏    | 3261938/6245533 [01:26<01:25, 35003.12it/s][A[A

 52%|█████▏    | 3265569/6245533 [01:26<01:24, 35078.18it/s][A[A

 52%|█████▏    | 3269082/6245533 [01:26<01:24, 35093.34it/s][A[A

 52%|█████▏    | 3272595/6245533 [01:26<01:25, 34608.51it/s][A[A

 52%|█████▏    | 3276386/6245533 [01:26<01:24, 35156.96it/s][A[A

 53%|█████▎    | 3280189/6245533 [01:27<01:23, 35516.78it/s][A[A

 53%|█████▎    | 3283745/6245533 [01:27<01:23, 35474.30it/s][A[A

 53%|█████▎    | 3287393/6245533 [01:27<01:22, 35770.12it/s][A[A

 53%|█████▎    | 3291110/6245533 [01:27<01:21, 36178.77it/s][A[A

 53%|█████▎    | 3294765/6245533 [01:27<01:21, 3

 59%|█████▉    | 3681636/6245533 [01:38<01:11, 35691.94it/s][A[A

 59%|█████▉    | 3685208/6245533 [01:38<01:13, 35038.43it/s][A[A

 59%|█████▉    | 3688717/6245533 [01:38<01:13, 34968.09it/s][A[A

 59%|█████▉    | 3692268/6245533 [01:38<01:12, 35128.36it/s][A[A

 59%|█████▉    | 3696122/6245533 [01:38<01:11, 35606.32it/s][A[A

 59%|█████▉    | 3699686/6245533 [01:38<01:11, 35444.03it/s][A[A

 59%|█████▉    | 3703339/6245533 [01:38<01:11, 35762.29it/s][A[A

 59%|█████▉    | 3706958/6245533 [01:39<01:11, 35295.01it/s][A[A

 59%|█████▉    | 3710692/6245533 [01:39<01:10, 35883.74it/s][A[A

 59%|█████▉    | 3714493/6245533 [01:39<01:09, 36495.72it/s][A[A

 60%|█████▉    | 3718153/6245533 [01:39<01:10, 36047.54it/s][A[A

 60%|█████▉    | 3721771/6245533 [01:39<01:09, 36086.93it/s][A[A

 60%|█████▉    | 3725384/6245533 [01:39<01:09, 36048.30it/s][A[A

 60%|█████▉    | 3728992/6245533 [01:39<01:10, 35737.40it/s][A[A

 60%|█████▉    | 3732643/6245533 [01:39<01:10, 3

 66%|██████▌   | 4113159/6245533 [01:50<01:00, 35485.13it/s][A[A

 66%|██████▌   | 4116710/6245533 [01:50<01:01, 34897.31it/s][A[A

 66%|██████▌   | 4120204/6245533 [01:50<01:01, 34346.48it/s][A[A

 66%|██████▌   | 4123769/6245533 [01:50<01:01, 34290.73it/s][A[A

 66%|██████▌   | 4127324/6245533 [01:50<01:01, 34658.88it/s][A[A

 66%|██████▌   | 4130988/6245533 [01:51<01:00, 35230.12it/s][A[A

 66%|██████▌   | 4134516/6245533 [01:51<01:00, 35078.97it/s][A[A

 66%|██████▋   | 4138111/6245533 [01:51<00:59, 35329.44it/s][A[A

 66%|██████▋   | 4141647/6245533 [01:51<00:59, 35318.14it/s][A[A

 66%|██████▋   | 4145181/6245533 [01:51<01:00, 35000.28it/s][A[A

 66%|██████▋   | 4148699/6245533 [01:51<00:59, 35043.85it/s][A[A

 66%|██████▋   | 4152237/6245533 [01:51<00:59, 35143.45it/s][A[A

 67%|██████▋   | 4155785/6245533 [01:51<00:59, 34921.90it/s][A[A

 67%|██████▋   | 4159390/6245533 [01:51<00:59, 35252.51it/s][A[A

 67%|██████▋   | 4162917/6245533 [01:51<00:59, 3

 73%|███████▎  | 4540394/6245533 [02:02<00:48, 35022.88it/s][A[A

 73%|███████▎  | 4543954/6245533 [02:02<00:48, 35193.78it/s][A[A

 73%|███████▎  | 4547500/6245533 [02:03<00:48, 35272.88it/s][A[A

 73%|███████▎  | 4551031/6245533 [02:03<00:48, 35282.58it/s][A[A

 73%|███████▎  | 4554560/6245533 [02:03<00:48, 34915.76it/s][A[A

 73%|███████▎  | 4558144/6245533 [02:03<00:47, 35187.33it/s][A[A

 73%|███████▎  | 4561665/6245533 [02:03<00:48, 34837.40it/s][A[A

 73%|███████▎  | 4565205/6245533 [02:03<00:48, 34444.01it/s][A[A

 73%|███████▎  | 4568853/6245533 [02:03<00:47, 35029.82it/s][A[A

 73%|███████▎  | 4572360/6245533 [02:03<00:48, 34784.77it/s][A[A

 73%|███████▎  | 4575896/6245533 [02:03<00:48, 34706.71it/s][A[A

 73%|███████▎  | 4579468/6245533 [02:03<00:48, 34432.10it/s][A[A

 73%|███████▎  | 4583060/6245533 [02:04<00:47, 34743.90it/s][A[A

 73%|███████▎  | 4586537/6245533 [02:04<00:47, 34652.32it/s][A[A

 73%|███████▎  | 4590213/6245533 [02:04<00:46, 3

 79%|███████▉  | 4962256/6245533 [02:15<00:38, 33757.84it/s][A[A

 80%|███████▉  | 4965720/6245533 [02:15<00:37, 34017.10it/s][A[A

 80%|███████▉  | 4969159/6245533 [02:15<00:37, 34127.66it/s][A[A

 80%|███████▉  | 4972624/6245533 [02:15<00:37, 33744.76it/s][A[A

 80%|███████▉  | 4976002/6245533 [02:15<00:39, 32053.84it/s][A[A

 80%|███████▉  | 4979500/6245533 [02:15<00:38, 32513.58it/s][A[A

 80%|███████▉  | 4983088/6245533 [02:15<00:38, 33091.08it/s][A[A

 80%|███████▉  | 4986534/6245533 [02:15<00:37, 33488.69it/s][A[A

 80%|███████▉  | 4990047/6245533 [02:16<00:36, 33964.69it/s][A[A

 80%|███████▉  | 4993459/6245533 [02:16<00:36, 33954.23it/s][A[A

 80%|████████  | 4996861/6245533 [02:16<00:36, 33907.70it/s][A[A

 80%|████████  | 5000297/6245533 [02:16<00:36, 34041.28it/s][A[A

 80%|████████  | 5003705/6245533 [02:16<00:36, 34031.76it/s][A[A

 80%|████████  | 5007235/6245533 [02:16<00:35, 34401.49it/s][A[A

 80%|████████  | 5010751/6245533 [02:16<00:36, 3

 86%|████████▌ | 5380841/6245533 [02:27<00:25, 33917.14it/s][A[A

 86%|████████▌ | 5384350/6245533 [02:27<00:25, 34260.29it/s][A[A

 86%|████████▋ | 5387876/6245533 [02:27<00:24, 34554.06it/s][A[A

 86%|████████▋ | 5391363/6245533 [02:27<00:24, 34647.29it/s][A[A

 86%|████████▋ | 5394830/6245533 [02:27<00:24, 34515.97it/s][A[A

 86%|████████▋ | 5398343/6245533 [02:27<00:24, 34118.81it/s][A[A

 86%|████████▋ | 5401782/6245533 [02:27<00:24, 33833.85it/s][A[A

 87%|████████▋ | 5405186/6245533 [02:28<00:24, 33894.77it/s][A[A

 87%|████████▋ | 5408627/6245533 [02:28<00:24, 33618.05it/s][A[A

 87%|████████▋ | 5412119/6245533 [02:28<00:24, 33998.24it/s][A[A

 87%|████████▋ | 5415593/6245533 [02:28<00:24, 34217.02it/s][A[A

 87%|████████▋ | 5419045/6245533 [02:28<00:24, 34306.85it/s][A[A

 87%|████████▋ | 5422560/6245533 [02:28<00:24, 33996.35it/s][A[A

 87%|████████▋ | 5426140/6245533 [02:28<00:23, 34517.15it/s][A[A

 87%|████████▋ | 5429595/6245533 [02:28<00:24, 3

 93%|█████████▎| 5797088/6245533 [02:39<00:13, 33820.16it/s][A[A

 93%|█████████▎| 5800472/6245533 [02:39<00:13, 33106.46it/s][A[A

 93%|█████████▎| 5803835/6245533 [02:39<00:13, 33159.72it/s][A[A

 93%|█████████▎| 5807155/6245533 [02:39<00:13, 33101.71it/s][A[A

 93%|█████████▎| 5810654/6245533 [02:40<00:12, 33645.53it/s][A[A

 93%|█████████▎| 5814104/6245533 [02:40<00:12, 33896.97it/s][A[A

 93%|█████████▎| 5817549/6245533 [02:40<00:12, 34060.56it/s][A[A

 93%|█████████▎| 5820984/6245533 [02:40<00:12, 34146.23it/s][A[A

 93%|█████████▎| 5824499/6245533 [02:40<00:12, 34440.19it/s][A[A

 93%|█████████▎| 5827945/6245533 [02:40<00:12, 34185.74it/s][A[A

 93%|█████████▎| 5831466/6245533 [02:40<00:12, 34486.57it/s][A[A

 93%|█████████▎| 5834928/6245533 [02:40<00:11, 34526.35it/s][A[A

 93%|█████████▎| 5838390/6245533 [02:40<00:11, 34553.49it/s][A[A

 94%|█████████▎| 5841852/6245533 [02:40<00:11, 34030.90it/s][A[A

 94%|█████████▎| 5845258/6245533 [02:41<00:11, 3

100%|█████████▉| 6214819/6245533 [02:51<00:00, 33603.70it/s][A[A

100%|█████████▉| 6218181/6245533 [02:51<00:00, 33514.16it/s][A[A

100%|█████████▉| 6221534/6245533 [02:52<00:00, 30367.10it/s][A[A

100%|█████████▉| 6224904/6245533 [02:52<00:00, 31238.47it/s][A[A

100%|█████████▉| 6228333/6245533 [02:52<00:00, 32094.23it/s][A[A

100%|█████████▉| 6231820/6245533 [02:52<00:00, 32878.41it/s][A[A

100%|█████████▉| 6235140/6245533 [02:52<00:00, 32221.60it/s][A[A

100%|█████████▉| 6238443/6245533 [02:52<00:00, 32443.55it/s][A[A

100%|█████████▉| 6241766/6245533 [02:52<00:00, 32674.57it/s][A[A

100%|██████████| 6245533/6245533 [02:52<00:00, 36152.24it/s][A[A
