In [1]:
# 모델 평가하기
# 1. 모델 불러오기
import torch
from kobart import get_kobart_tokenizer
from transformers.models.bart import BartForConditionalGeneration

def load_model():
    model = BartForConditionalGeneration.from_pretrained('./kobart_translation')
    # tokenizer = get_kobart_tokenizer()
    return model

model = load_model()
tokenizer = get_kobart_tokenizer()

  from .autonotebook import tqdm as notebook_tqdm


using cached model. c:\Users\eunwo\OneDrive\바탕 화면\Dev\KoBART-translation\.cache\kobart_base_tokenizer_cased_cf74400bce.zip


In [2]:
# 2. 모델 함수 정의하기
def translate(text:str) -> str:
    input_ids = tokenizer.encode(text)
    input_ids = torch.tensor(input_ids)
    input_ids = input_ids.unsqueeze(0)
    output = model.generate(input_ids, eos_token_id=1, max_length=512, num_beams=5)
    output = tokenizer.decode(output[0], skip_special_tokens=True)
    return output

In [33]:
# 중복 데이터 제거하기.
import json
from tqdm import tqdm

input_file = 'ko_converted_data.json'
output_file = 'ko_converted_data_cleaned.json'

# ko_converted_data.json 파일 불러오기
with open(input_file, 'r', encoding='utf-8') as f:
    data = json.load(f)

# 중복된 데이터 제거하기
unique_data = []
seen_sentences = set()

with tqdm(total=len(data), desc='Processing') as pbar:
    for item in data:
        ko_original = item['ko_original']
        ko_converted = item['ko_converted']

        if ko_original != ko_converted and ko_original not in seen_sentences:
            unique_data.append(item)
            seen_sentences.add(ko_original)
        pbar.update(1)

# 중복 제거된 데이터 저장하기
with open(output_file, 'w', encoding='utf-8') as f:
    json.dump(unique_data, f, ensure_ascii=False, indent=4)

print("Cleaned data has been saved to", output_file)


Processing: 100%|██████████| 1200000/1200000 [00:00<00:00, 1200068.67it/s]


Cleaned data has been saved to ko_converted_data_cleaned.json


In [1]:
import csv
import random
import json
from nltk.translate.bleu_score import sentence_bleu
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from tqdm.contrib.concurrent import thread_map

#3-1. 테스트할 데이터 분리하기. (원본 문장과 노이즈가 적용된 문장 분리)
#I: 원본만 분리 (100 개)
tsv_file = 'data/validation.tsv'
json_file = './ko_converted_data_cleaned.json'
output_file = './data(test)/train_origin.tsv'
num_samples = 100  # 저장할 랜덤 샘플 개수


# 'ko_converted.json' 파일에서 필요한 값을 불러온다.
with open(json_file, 'r', encoding='utf-8') as f:
    data = json.load(f)
ko_origin_sentences = [item['ko_original'] for item in data]

matched_sentences = []

# 'validation.tsv' 파일을 읽어와서 'ko' 항목을 검사한다.
with open(tsv_file, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f, delimiter='\t')
    sentences = [row for row in reader]  # 모든 문장을 리스트로 저장

# 랜덤하게 100개의 문장을 선택하여 'ko' 항목과 'ko_converted' 값을 비교하고, 동일한 문장인 경우 저장한다.
# random.shuffle(sentences)
# count = 0
# with tqdm(total=num_samples, desc='Processing') as pbar:
#     for row in sentences:
#         ko_sentence = row['kr']
#         en_sentence = row['en']

#         if ko_sentence in ko_origin_sentences:
#             matched_sentences.append({'kr': ko_sentence, 'en': en_sentence})
#             count += 1
#             pbar.update(1)

#         if count == num_samples:
#             break

#이건 특수 상황
for row in tqdm(sentences):
    ko_sentence = row['kr']
    en_sentence = row['en']

    if ko_sentence in ko_origin_sentences:
        matched_sentences.append({'kr': ko_sentence, 'en': en_sentence})


# 동일한 문장들을 TSV 파일로 저장한다.
with open(output_file, 'w', encoding='utf-8', newline='') as f:
    fieldnames = ['kr', 'en']
    writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t')
    writer.writeheader()
    writer.writerows(matched_sentences)

print("Matched sentences(type I) have been saved to", output_file)




  from .autonotebook import tqdm as notebook_tqdm
  0%|          | 73/111813 [00:01<50:30, 36.88it/s]


KeyboardInterrupt: 

In [35]:
import csv
import random
import json
from nltk.translate.bleu_score import sentence_bleu
from tqdm import tqdm

#3-1. 테스트할 데이터 분리하기. (원본 문장과 노이즈가 적용된 문장 분리)
#II: 노이즈만 분리 (100 개)
tsv_file = 'data/validation.tsv'
json_file = './ko_converted_data_cleaned.json'
output_file = './data(test)/data_type_II.tsv'
num_samples = 100  # 저장할 랜덤 샘플 개수


# 'ko_converted.json' 파일에서 필요한 값을 불러온다.
with open(json_file, 'r', encoding='utf-8') as f:
    data = json.load(f)
ko_converted_sentences = [item['ko_converted'] for item in data]

matched_sentences = []

# 'validation.tsv' 파일을 읽어와서 'ko' 항목을 검사한다.
with open(tsv_file, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f, delimiter='\t')
    sentences = [row for row in reader]  # 모든 문장을 리스트로 저장

# 랜덤하게 100개의 문장을 선택하여 'ko' 항목과 'ko_converted' 값을 비교하고, 동일한 문장인 경우 저장한다.
random.shuffle(sentences)
count = 0
with tqdm(total=num_samples, desc='Processing') as pbar:
    for row in sentences:
        ko_sentence = row['kr']
        en_sentence = row['en']

        if ko_sentence in ko_converted_sentences:
            matched_sentences.append({'kr': ko_sentence, 'en': en_sentence})
            count += 1
            pbar.update(1)

        if count == num_samples:
            break

# 동일한 문장들을 TSV 파일로 저장한다.
with open(output_file, 'w', encoding='utf-8', newline='') as f:
    fieldnames = ['kr', 'en']
    writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t')
    writer.writeheader()
    writer.writerows(matched_sentences)

print("Matched sentences(type II) have been saved to", output_file)


Processing: 100%|██████████| 100/100 [00:07<00:00, 13.83it/s]

Matched sentences(type II) have been saved to ./data(test)/data_type_II.tsv





In [36]:
#III: 기존에서 I, II의 데이터를 제외해서 (100 개)
import json
import csv
import random

validation_file = './data/validation.tsv'
data_type_I_file = './data(test)/data_type_I.tsv'
data_type_II_file = './data(test)/data_type_II.tsv'
output_file = './data(test)/data_type_III.tsv'
num_samples = 100  # 저장할 랜덤 샘플 개수

# validation.tsv 파일에서 랜덤하게 num_samples 개의 샘플 추출
with open(validation_file, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f, delimiter='\t')
    validation_sentences = [row for row in reader]

random.shuffle(validation_sentences)

# data_type_I.tsv 파일에서 중복된 문장 제거
with open(data_type_I_file, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f, delimiter='\t')
    data_type_I_sentences = {row['kr'] for row in reader}

# data_type_II.tsv 파일에서 중복된 문장 제거
with open(data_type_II_file, 'r', encoding='utf-8') as f:
    reader = csv.DictReader(f, delimiter='\t')
    data_type_II_sentences = {row['kr'] for row in reader}

matched_sentences = []

count = 0
with tqdm(total=num_samples, desc='Processing') as pbar:
    for row in validation_sentences:
        ko_sentence = row['kr']
        en_sentence = row['en']

        if ko_sentence not in data_type_I_sentences and ko_sentence not in data_type_II_sentences:
            matched_sentences.append({'kr': ko_sentence, 'en': en_sentence})
            count += 1
            pbar.update(1)

        if count == num_samples:
            break

# 동일한 문장들을 TSV 파일로 저장한다.
with open(output_file, 'w', encoding='utf-8', newline='') as f:
    fieldnames = ['kr', 'en']
    writer = csv.DictWriter(f, fieldnames=fieldnames, delimiter='\t')
    writer.writeheader()
    writer.writerows(matched_sentences)

print("Matched sentences(type III) have been saved to", output_file)


Processing: 100%|██████████| 100/100 [00:00<?, ?it/s]

Matched sentences(type III) have been saved to ./data(test)/data_type_III.tsv





In [3]:
import csv
from tqdm import tqdm

def process_data(input_file):
    # 입력 파일을 불러오기
    with open(input_file, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f, delimiter='\t')
        sentences = [row for row in reader]

    model_outputs = {"ref": [], "model": []}

    for row in tqdm(sentences):
        model_outputs["model"].append(translate(row['kr']))
        model_outputs["ref"].append(row['en'])
        
    return model_outputs, sentences

# data_type_I.tsv 파일을 처리하여 모델 출력 구하기
print('data_type_I 번역 시작')
input_file_I = './data(test)/data_type_I.tsv'
output_data_I, input_set_I = process_data(input_file_I)

# data_type_II.tsv 파일을 처리하여 모델 출력 구하기
print('data_type_II 번역 시작')
input_file_II = './data(test)/data_type_II.tsv'
output_data_II, input_set_II = process_data(input_file_II)

print('data_type_III 번역 시작')
input_file_III = './data(test)/data_type_III.tsv'
output_data_III, input_set_III = process_data(input_file_III)

data_type_I 번역 시작


100%|██████████| 100/100 [01:50<00:00,  1.10s/it]


data_type_II 번역 시작


100%|██████████| 100/100 [01:49<00:00,  1.10s/it]


data_type_III 번역 시작


100%|██████████| 100/100 [01:54<00:00,  1.15s/it]


In [10]:
from nltk.translate.bleu_score import SmoothingFunction
#평가 함수 만들기

from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from tqdm import tqdm

def evaluate_model_predictions(ref_data, model_output):
    print("Evaluating model predictions.")
    total_bleu_score = 0.0
    scores = []

    for i, item in enumerate(ref_data):
        en_text = item
        translation = model_output[i]

        # 두 항목간의 BLEU 평가를 진행한다. 경계 스무딩 사용해서 평가 진행
        reference = [en_text.split()]
        candidate = translation.split()
        bleu_score = sentence_bleu(reference, candidate, smoothing_function=SmoothingFunction().method3)
        scores.append(bleu_score)
        total_bleu_score += bleu_score

    average_bleu_score = total_bleu_score / len(ref_data)
    print("Average BLEU score:", average_bleu_score)

    return scores

In [18]:
#평가 진행
score_I = evaluate_model_predictions(output_data_I['ref'], output_data_I['model'])
score_II = evaluate_model_predictions(output_data_II['ref'], output_data_II['model'])
score_III = evaluate_model_predictions(output_data_III['ref'], output_data_III['model'])

Evaluating model predictions.
Average BLEU score: 0.45602219142455347
Evaluating model predictions.
Average BLEU score: 0.4244658708624342
Evaluating model predictions.
Average BLEU score: 0.34034082576848945
7, 1.0: Thank you for giving me this opportunity., Thank you for giving me this opportunity.
11, 1.0: The biggest feature is that it can be used as a treatment and hair essence., The biggest feature is that it can be used as a treatment and hair essence.
16, 1.0: We'll pick the final three first., We'll pick the final three first.
20, 1.0: Chronic periodontal disease reduces respiratory function., Chronic periodontal disease reduces respiratory function.
30, 1.0: For more information, please contact our call center., For more information, please contact our call center.
12, 1.0: We knew that the sales negotiations are ongoing, so we are looking for a new supplier, but we haven't found it yet., > What is this?
19, 1.0: Then you can use FFF2., I am sending you an e-mail to inform yo

In [19]:
import json

# JSON 파일 불러오기
with open("./data_google_papago/responses_google_data_i.json", "r", encoding="utf-8") as json_file:
    g_data_1 = json.load(json_file)
with open("./data_google_papago/responses_google_data_ii.json", "r", encoding="utf-8") as json_file:
    g_data_2 = json.load(json_file)
with open("./data_google_papago/responses_google_data_iii.json", "r", encoding="utf-8") as json_file:
    g_data_3 = json.load(json_file)
with open("./data_google_papago/responses_papago_data_i.json", "r", encoding="utf-8") as json_file:
    n_data_1 = json.load(json_file)
with open("./data_google_papago/responses_papago_data_ii.json", "r", encoding="utf-8") as json_file:
    n_data_2 = json.load(json_file)
with open("./data_google_papago/responses_papago_data_iii.json", "r", encoding="utf-8") as json_file:
    n_data_3 = json.load(json_file)


# key와 value 분리
g_values1 = list(g_data_1.values())
g_values2 = list(g_data_2.values())
g_values3 = list(g_data_3.values())
n_values1 = list(n_data_1.values())
n_values2 = list(n_data_2.values())
n_values3 = list(n_data_3.values())

evaluate_model_predictions(output_data_I['ref'], g_values1)
evaluate_model_predictions(output_data_II['ref'], g_values2)
evaluate_model_predictions(output_data_III['ref'], g_values3)
evaluate_model_predictions(output_data_I['ref'], n_values1)
evaluate_model_predictions(output_data_II['ref'], n_values2)
evaluate_model_predictions(output_data_III['ref'], n_values3)

Evaluating model predictions.
Average BLEU score: 0.3646768046296858
Evaluating model predictions.
Average BLEU score: 0.13391106056343174
Evaluating model predictions.
Average BLEU score: 0.24450656731965015
Evaluating model predictions.
Average BLEU score: 0.44699019034733084
Evaluating model predictions.
Average BLEU score: 0.12864758485808458
Evaluating model predictions.
Average BLEU score: 0.28168597388759037


[0.2636932645412712,
 0.3079300751569292,
 0.3827521065936582,
 0.43167001068522526,
 0.05795599612995367,
 0.031251907639724415,
 0.7292571723872933,
 0.16449759298465816,
 0.8091067115702212,
 0.5214865845309236,
 0.03716499092256817,
 0.6606328636027614,
 0.08116697886877472,
 0.025612540390806925,
 0.10937121222607606,
 0.5444460596606694,
 0.3128974322923816,
 0.6606328636027614,
 0.4989070972910272,
 0.1561969968460128,
 0.13065113298388567,
 0,
 0.04814971807094068,
 0.5169731539571706,
 0.28319415510892393,
 0.15415064977510756,
 0,
 0,
 0.06496183867338828,
 0.537284965911771,
 1.0,
 0.20235553926673694,
 0.43724109850912707,
 0.041961149062965476,
 0.4518010018049224,
 0.04932351569489709,
 0.08310415003234632,
 0.14323145079400493,
 0.2259005009024612,
 0.2767783451247154,
 0.2259005009024612,
 0.06722636787666482,
 0,
 0.5578002860768766,
 0.2557539057896621,
 0,
 0.3535533905932738,
 0.22637359354764466,
 0.31702331385234306,
 0.7910665071754358,
 0.21105340631872635,
 0.6

In [32]:
#평가 진행
score_I = evaluate_model_predictions(output_data_I['ref'], output_data_I['model'])
score_II = evaluate_model_predictions(output_data_II['ref'], output_data_II['model'])
score_III = evaluate_model_predictions(output_data_III['ref'], output_data_III['model'])

maximum = 3
count = 0
for i, d in enumerate(output_data_I['model']):
    if score_I[i] <= 0.5:
        print(f'{i}, {score_I[i]}: {d}/ {output_data_I["ref"][i]} / {input_set_I[i]}')
        print(f'Google: {list(g_data_1.values())[i]}')
        print(f'Papago: {list(n_data_1.values())[i]}')
        count += 1
    if not (count < maximum):
        break

count = 0
for i, d in enumerate(output_data_I['model']):
    if score_II[i] >= 0.9999:
        print(f'{i}, {score_II[i]}: {d}/ {output_data_II["ref"][i]} / {input_set_II[i]}')
        print(f'Google: {list(g_data_2.values())[i]}')
        print(f'Papago: {list(n_data_2.values())[i]}')
        count += 1
    if not (count < maximum):
        break

count = 0
for i, d in enumerate(output_data_I['model']):
    if score_III[i] >= 0.9999:
        print(f'{i}, {score_III[i]}: {d}/ {output_data_III["ref"][i]} / {input_set_III[i]}')
        print(f'Google: {list(g_data_3.values())[i]}')
        print(f'Papago: {list(n_data_3.values())[i]}')
        count += 1
    if not (count < maximum):
        break

Evaluating model predictions.
Average BLEU score: 0.45602219142455347
Evaluating model predictions.
Average BLEU score: 0.4244658708624342
Evaluating model predictions.
Average BLEU score: 0.34034082576848945


NameError: name 'input_set_I' is not defined