In [12]:
import csv
from pathlib import Path
from collections import Counter
from zipfile import ZipFile

In [2]:
!hostname

In [3]:
experiment_path = Path('/home/huebner/research/semeval_2020_task_6/search/repeated_joint_bert')
split = 'dev'

In [4]:
!ls -l {experiment_path}

total 60
-rw-r--r-- 1 huebner deeplee  3071 19. Feb 18:04 config.jsonnet
-rw-r--r-- 1 huebner deeplee 45734 20. Feb 03:01 experiment_state-2020-02-19_18-04-30.json
-rw-r--r-- 1 huebner deeplee    92 19. Feb 18:04 params.json
drwx------ 3 huebner deeplee   123 19. Feb 18:04 run_0_2020-02-19_18-04-307kmx4424
drwx------ 3 huebner deeplee   123 19. Feb 18:04 run_1_2020-02-19_18-04-301dga1v3y
drwx------ 3 huebner deeplee   151 19. Feb 19:39 run_2_2020-02-19_19-39-06qfxvllgt
drwx------ 3 huebner deeplee   151 19. Feb 19:44 run_3_2020-02-19_19-44-16z4sbvupq
drwx------ 3 huebner deeplee   123 19. Feb 21:08 run_4_2020-02-19_21-08-55ruo1cxt7
drwx------ 3 huebner deeplee   123 19. Feb 22:05 run_5_2020-02-19_22-05-217gb2_2ai
drwx------ 3 huebner deeplee   151 19. Feb 22:49 run_6_2020-02-19_22-49-21esr752gm
drwx------ 3 huebner deeplee   151 19. Feb 23:49 run_7_2020-02-19_23-49-40vro8xjp4
drwx------ 3 huebner deeplee   123 20. Feb 01:00 run_8_2020-02-20_01-00-052aki2vrf
drwx------ 3 huebner deeplee

In [5]:
submission_dir = f'{split}_submission'
submission_path = Path(experiment_path, submission_dir)
submission_path.mkdir(exist_ok=True)

In [6]:
run_paths = list(experiment_path.glob('run_*'))
run_0_submission_path = run_paths[0].joinpath('trial', submission_dir)

In [7]:
!ls -l {run_0_submission_path}

total 3928
-rw-r--r-- 1 huebner deeplee 2003843 26. Feb 08:53 submission.zip
-rw-r--r-- 1 huebner deeplee  252537 26. Feb 08:53 task_2_t1_biology_1_505.deft
-rw-r--r-- 1 huebner deeplee  238192 26. Feb 08:53 task_2_t1_biology_2_404.deft
-rw-r--r-- 1 huebner deeplee  240595 26. Feb 08:53 task_2_t2_history_1_101.deft
-rw-r--r-- 1 huebner deeplee  245501 26. Feb 08:53 task_2_t3_physics_2_101.deft
-rw-r--r-- 1 huebner deeplee   77157 26. Feb 08:53 task_2_t5_economic_1_303.deft
-rw-r--r-- 1 huebner deeplee  277842 26. Feb 08:53 task_2_t6_sociology_1_0.deft
-rw-r--r-- 1 huebner deeplee  330301 26. Feb 08:53 task_2_t7_government_0_101.deft
-rw-r--r-- 1 huebner deeplee  340626 26. Feb 08:53 task_2_t7_government_2_101.deft


In [8]:
prediction_files = [file_path.name for file_path in run_0_submission_path.glob('*.deft')]

In [9]:
def merge_prediction(prediction):
    if len(prediction[0]) == 0:
        merged_prediction = []
    else:
        static_content = prediction[0][0:4]
        labels = [p[4] for p in prediction]
        majority_vote_label = Counter(labels).most_common(1)[0][0]
        merged_prediction = static_content + [majority_vote_label]
    return merged_prediction

In [11]:
for prediction_file in prediction_files:
    print(f'Merging results of {prediction_file}...')
    with submission_path.joinpath(prediction_file).open(mode='w') as out:
        file_handlers = []
        readers = []
        for run_path in run_paths:
            prediction_file_path = run_path.joinpath('trial', submission_dir, prediction_file)
            file_handler = prediction_file_path.open()
            file_handlers.append(file_handler)
            reader = csv.reader(file_handler,
                                delimiter='\t',
                                quotechar=None,
                                quoting=csv.QUOTE_NONE)
            readers.append(reader)
            
        joint_readers = zip(*readers)
        writer = csv.writer(out,
                            delimiter='\t',
                            quotechar=None,
                            quoting=csv.QUOTE_NONE)
        
        for predictions in joint_readers:
            writer.writerow(merge_prediction(predictions))
        for f in file_handlers:
            f.close()

Merging results of task_2_t1_biology_1_505.deft ...
Merging results of task_2_t1_biology_2_404.deft ...
Merging results of task_2_t2_history_1_101.deft ...
Merging results of task_2_t3_physics_2_101.deft ...
Merging results of task_2_t5_economic_1_303.deft ...
Merging results of task_2_t6_sociology_1_0.deft ...
Merging results of task_2_t7_government_0_101.deft ...
Merging results of task_2_t7_government_2_101.deft ...
Done.


In [15]:
with ZipFile(submission_path.joinpath('submission.zip'), 'w') as zf:
    for prediction_file in submission_path.glob('*.deft'):
        zf.write(prediction_file, prediction_file.name)