In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import re
import collections
from tqdm import tqdm
from xqdm import xqdm

In [2]:
def softmax(x):
    ex = np.exp(x)
    total = np.sum(ex)
    return ex / total

def preprocessing(csv, score):
    data = {'id': [], 'vals': [], 'score': score}
    data.update({str(i): [] for i in range(10)})
    for _, row in csv.iterrows():
        data['id'].append(row[0])
        vals = softmax(row[1:].to_numpy(np.float32)) * score
        data['vals'].append(vals)
    return data

def __preprocessing(args):
    return preprocessing(*args)

def get_score(filename):
    score = re.search('submission-raw-L\\d{4}-(0\\.\\d+)\\.csv', str(filename))
    score = score.groups()[0]
    score = float(score)
    #return np.exp(1 / score)
    return 1 / score

In [3]:
submission_files = list(Path('submissions/prep1').glob('submission-raw-*.csv'))

In [4]:
items = [(pd.read_csv(f), get_score(f)) for f in submission_files]

In [5]:
datas = xqdm(__preprocessing, items)

100%|██████████| 7/7 [00:03<00:00,  2.31it/s]


In [6]:
datas[0]['vals'][0], datas[0]['score']

(array([4.7010086e-05, 2.6876526e-04, 1.3271886e-03, 1.5354828e-03,
        2.8078593e-04, 2.3965705e-02, 7.5105610e+00, 4.9809790e-07,
        1.1523025e-02, 1.5697771e-05], dtype=float32),
 7.54952585733877)

In [8]:
for data in datas:
    data['vals'] = np.stack(data['vals'])

In [9]:
datas[0]['vals'].shape

(20480, 10)

In [10]:
vals = [data['vals'] for data in datas]

In [11]:
vals = np.stack(vals)

In [12]:
vals.shape

(7, 20480, 10)

In [13]:
valsum = np.sum(vals, axis=0)

In [14]:
valsum.shape

(20480, 10)

In [15]:
valarg = np.argmax(valsum, axis=1)

In [16]:
valarg.shape

(20480,)

In [17]:
newcsv = {'id': list(map(int, datas[0]['id'])), 'digit': list(valarg)}

In [18]:
newcsv = pd.DataFrame(newcsv)

In [19]:
newcsv

Unnamed: 0,id,digit
0,2049,6
1,2050,9
2,2051,8
3,2052,0
4,2053,3
...,...,...
20475,22524,4
20476,22525,1
20477,22526,6
20478,22527,8


In [20]:
newcsv.to_csv('./submissions/prep1/ensemble1.csv', index=False)