In [1]:
# mount google drive 

import os, sys 
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
# go to the project directory
%cd /content/drive/MyDrive/hackathon_2-2
! ls

### check folders and files
## check train data
assert os.path.isdir('./data/train')
assert os.path.isfile('./data/train/MedQuAD_v1.0_train.json')

## check dev data
assert os.path.isdir('./data/dev')
assert os.path.isfile('./data/dev/dev_input.txt')
assert os.path.isfile('./data/dev/dev_answer.txt')

# check result folder and files
assert os.path.isdir('./result')
assert os.path.isfile('./result/dev_result.txt')

## check test data
# assert os.path.isdir('./data/test')
# assert os.path.isfile('./data/dev/test_input.txt')
# assert os.path.isfile('./data/dev/test_answer.txt')

# check test result
# assert os.path.isfile('./result/test_result.txt')

/content/drive/MyDrive/hackathon_2-2
data  evaluate.ipynb  predict.ipynb  result  src


In [5]:
# evaluate functions

import re
import string
from collections import Counter
def normalize_answer(s):    
    def remove_(text):
        ''' 불필요한 기호 제거 '''
        text = re.sub("'", " ", text)
        text = re.sub('"', " ", text)
        text = re.sub('《', " ", text)
        text = re.sub('》', " ", text)
        text = re.sub('<', " ", text)
        text = re.sub('>', " ", text) 
        text = re.sub('〈', " ", text)
        text = re.sub('〉', " ", text)   
        text = re.sub("\(", " ", text)
        text = re.sub("\)", " ", text)
        text = re.sub("‘", " ", text)
        text = re.sub("’", " ", text)      
        return text

    def white_space_fix(text):
        return ' '.join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return ''.join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_punc(lower(remove_(s))))

def f1_score(prediction, ground_truth):
    prediction_tokens = normalize_answer(prediction).split()
    ground_truth_tokens = normalize_answer(ground_truth).split()
   
    #F1 by character
    prediction_Char = []
    for tok in prediction_tokens:
        now = [a for a in tok]
        prediction_Char.extend(now)
        
    ground_truth_Char = []
    for tok in ground_truth_tokens:
        now = [a for a in tok]
        ground_truth_Char.extend(now)   
        
    common = Counter(prediction_Char) & Counter(ground_truth_Char)
    num_same = sum(common.values())
    if num_same == 0:
        return 0
    
    precision = 1.0 * num_same / len(prediction_Char)
    recall = 1.0 * num_same / len(ground_truth_Char)
    f1 = (2 * precision * recall) / (precision + recall)
    
    return f1

def exact_match_score(prediction, ground_truth):
    return (normalize_answer(prediction) == normalize_answer(ground_truth))

In [6]:
# evaluate EM and F1 score on dev data

# load dev_result.txt and dev_answer.txt
with open('./data/dev/dev_answer.txt', 'r') as fa:
  answers = fa.readlines()
with open('./result/dev_result.txt', 'r') as fr:
  predictions = fr.readlines()

total = len(answers)
correct = 0
f1 = 0
for (i, j) in zip(answers, predictions):
  correct += exact_match_score(i, j)
  f1 += f1_score(i, j)

acc = 100 * (correct / total)
f1_score = 100 * (f1 / total)
print(f"EM score (dev) : {acc}")
print(f"F1 score (dev) : {f1_score}")

EM score (dev) : 48.96551724137931
F1 score (dev) : 68.5630778417393


In [None]:
# evaluate EM and F1 score on test data

# load test_result.txt and test_answer.txt
with open('./data/test/test_answer.txt', 'r') as fa:
  answers = fa.readlines()
with open('./result/test_result.txt', 'r') as fr:
  predictions = fr.readlines()

total = len(answers)
correct = 0
f1 = 0
for (i, j) in zip(answers, predictions):
  correct += exact_match_score(i, j)
  f1 += f1_score(i, j)

acc = 100 * (correct / total)
f1_score = 100 * (f1 / total)
print(f"EM score (dev) : {acc}")
print(f"F1 score (dev) : {f1_score}")

FileNotFoundError: ignored