In [1]:
import nest_asyncio

nest_asyncio.apply()

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
import pandas as pd
import os
import pathlib

root_dir = pathlib.PurePath(os.path.dirname(os.getcwd())).parent
data_dir = os.path.join(root_dir, 'data')

In [18]:
%pip install bert_score

Collecting bert_score
  Using cached bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Using cached bert_score-0.3.13-py3-none-any.whl (61 kB)
Installing collected packages: bert_score
Successfully installed bert_score-0.3.13

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


# 번역 태스크 평가

## 데이터 제작

먼저, [opus_books](https://huggingface.co/datasets/Helsinki-NLP/opus_books) 데이터셋을 사용하여 번역 태스크 평가 데이터를 제작합니다.

In [7]:
from datasets import load_dataset

ds = load_dataset("Helsinki-NLP/opus_books", "de-en") # 독일어 => 영어
original_df = ds["train"].to_pandas().sample(20).reset_index(drop=True)

In [9]:
original_df['de'] = original_df['translation'].apply(lambda x: x['de'])
original_df['en'] = original_df['translation'].apply(lambda x: x['en'])

In [10]:
original_df.head()

Unnamed: 0,id,translation,de,en
0,12831,{'de': '»Sie sind wohl aus Hinterpommern?« fra...,»Sie sind wohl aus Hinterpommern?« fragte die ...,"""Where were you brought up?"" asked the daughte..."
1,33046,{'de': 'Kann gesät werden?« fragte er nach kur...,Kann gesät werden?« fragte er nach kurzem Stil...,"'Well, can we begin sowing?' he asked after a ..."
2,50130,"{'de': ',,Na, ich sag' dir, Tom, wo du so 'n b...",",,Na, ich sag' dir, Tom, wo du so 'n blaues Li...","""Well, where you see one of them blue lights f..."
3,37699,{'de': 'Aber das war ein wahres Glück; denn St...,Aber das war ein wahres Glück; denn Stepan Ark...,"However, that was all for the best, for Oblons..."
4,32847,"{'de': '›Somit‹, sagte Alexei Alexandrowitsch ...","›Somit‹, sagte Alexei Alexandrowitsch zu sich ...","'Well then,' thought he, 'the question of her ..."


In [11]:
import pandas as pd
from uuid import uuid4

# AutoRAG 데이터로 변환합니다.
autorag_dicts = []
for i, row in original_df.iterrows():
	autorag_dicts.append({
		'qid': str(uuid4()),
        'query': 'Translate the following text into English.',
		'retrieval_gt': [[]],
		'generation_gt': [row['en']],
		'retrieved_contents': [row['de']],
		'retrieved_ids': [],
		'retrieve_scores': [],
	})
autorag_df = pd.DataFrame(autorag_dicts)

In [12]:
autorag_df.head()

Unnamed: 0,qid,query,retrieval_gt,generation_gt,retrieved_contents,retrieved_ids,retrieve_scores
0,1f342c71-9097-4664-81eb-cc24ee1a58fa,Translate the following text into English.,[[]],"[""Where were you brought up?"" asked the daught...",[»Sie sind wohl aus Hinterpommern?« fragte die...,[],[]
1,dc7cf7f4-0716-415a-a8a2-25a7cb9c7b03,Translate the following text into English.,[[]],"['Well, can we begin sowing?' he asked after a...",[Kann gesät werden?« fragte er nach kurzem Sti...,[],[]
2,5be0b526-dbe2-4891-a531-9d66f4e4ecbc,Translate the following text into English.,[[]],"[""Well, where you see one of them blue lights ...","[,,Na, ich sag' dir, Tom, wo du so 'n blaues L...",[],[]
3,50a4eaa7-73c4-4636-8cf5-ecd1d005ea4b,Translate the following text into English.,[[]],"[However, that was all for the best, for Oblon...",[Aber das war ein wahres Glück; denn Stepan Ar...,[],[]
4,b9967527-973d-4da0-bafa-2b46c12a29a7,Translate the following text into English.,[[]],"['Well then,' thought he, 'the question of her...","[›Somit‹, sagte Alexei Alexandrowitsch zu sich...",[],[]


In [13]:
from datetime import datetime

empty_corpus_df = pd.DataFrame([{
	'doc_id': 'empty',
	'contents': 'empty',
	'metadata': {'last_modified_datetime': datetime.now()} 
}], columns=['doc_id', 'contents', 'metadata'])

In [16]:
yaml_path = os.path.join(root_dir, 'config', 'evaluation', 'translate', 'config.yaml')

In [None]:
from autorag.evaluator import Evaluator
import tempfile

with tempfile.NamedTemporaryFile(suffix='.parquet') as qa_path:
	with tempfile.NamedTemporaryFile(suffix='.parquet') as corpus_path:
		autorag_df.to_parquet(qa_path.name)
		empty_corpus_df.to_parquet(corpus_path.name)
		evaluator = Evaluator(qa_data_path=qa_path.name,
							  corpus_data_path=corpus_path.name,
							  project_dir=os.path.join(root_dir, 'autorag_project', 'evaluation', 'translate'))
		evaluator.start_trial(yaml_path, skip_validation=True)

## 결과 확인

In [20]:
result_df = pd.read_parquet(os.path.join(root_dir, 'autorag_project', 'evaluation', 'translate', '0', 'node_line', 'generator', 'best_0.parquet'))

In [21]:
result_df.head()

Unnamed: 0,qid,query,retrieval_gt,generation_gt,retrieved_contents,retrieved_ids,retrieve_scores,prompts,generated_texts,generated_tokens,generated_log_probs,bleu,meteor,bert_score,sem_score
0,1f342c71-9097-4664-81eb-cc24ee1a58fa,Translate the following text into English.,[[]],"[""Where were you brought up?"" asked the daught...",[»Sie sind wohl aus Hinterpommern?« fragte die...,[],[],Translate the following text into English. \n ...,"""They are probably from Hinterpommern?"" asked ...","[139002, 553, 7038, 591, 59801, 88410, 79406, ...","[-0.21484652, -0.39739433, -0.3023416, -2.3392...",8.840994,0.264838,0.872279,0.875736
1,dc7cf7f4-0716-415a-a8a2-25a7cb9c7b03,Translate the following text into English.,[[]],"['Well, can we begin sowing?' he asked after a...",[Kann gesät werden?« fragte er nach kurzem Sti...,[],[],Translate the following text into English. \n ...,"""Can it be sown?"" he asked after a brief silence.","[1, 8475, 480, 413, 265, 940, 16842, 501, 7747...","[-0.021347981, -4.310693e-05, -0.030859487, 0....",19.67498,0.535714,0.945938,0.948707
2,5be0b526-dbe2-4891-a531-9d66f4e4ecbc,Translate the following text into English.,[[]],"[""Well, where you see one of them blue lights ...","[,,Na, ich sag' dir, Tom, wo du so 'n blaues L...",[],[],Translate the following text into English. \n ...,"""Well, I'll tell you, Tom, where you see such ...","[1, 16936, 11, 17291, 5485, 481, 11, 11838, 11...","[-0.02312109, -9.8536635e-05, 0.0, -0.8082196,...",19.059633,0.575534,0.917599,0.932155
3,50a4eaa7-73c4-4636-8cf5-ecd1d005ea4b,Translate the following text into English.,[[]],"[However, that was all for the best, for Oblon...",[Aber das war ein wahres Glück; denn Stepan Ar...,[],[],Translate the following text into English. \n ...,But that was a true stroke of luck; for Stepan...,"[7943, 484, 673, 261, 1343, 20112, 328, 12751,...","[-0.02039835, -0.025665293, -3.0545007e-06, -0...",14.535768,0.464085,0.911697,0.915011
4,b9967527-973d-4da0-bafa-2b46c12a29a7,Translate the following text into English.,[[]],"['Well then,' thought he, 'the question of her...","[›Somit‹, sagte Alexei Alexandrowitsch zu sich...",[],[],Translate the following text into English. \n ...,"""Thus,"" said Alexei Alexandrovich to himself, ...","[1, 84787, 3532, 2059, 12734, 17432, 52159, 23...","[-0.42983776, -0.08217332, -3.3451433e-05, -0....",12.180839,0.408726,0.921092,0.90236


In [23]:
def print_translate_result(df, idx: int):
	print(
		f"Original Content : {df.iloc[idx]['retrieved_contents'][0]}\n\nGround Truth : {df.iloc[idx]['generation_gt'][0]}\n\nPredicted Translation: {df.iloc[idx]['generated_texts']}\n\nBLEU: {df.iloc[idx]['bleu']}\n\nMETEOR: {df.iloc[idx]['meteor']}\n\nBert Score: {df.iloc[idx]['bert_score']}\n\nSem Score: {df.iloc[idx]['sem_score']}")

In [24]:
print_translate_result(result_df, 0)

Original Content : »Sie sind wohl aus Hinterpommern?« fragte die junge Frau so impertinent, daß sich die alte Frau die Frage nicht verkneifen konnte, ob sie sich damit selber verteidigen wolle.

Ground Truth : "Where were you brought up?" asked the daughter-in-law, with so impertinent a look that Madame Bovary asked her if she were not perhaps defending her own case.

Predicted Translation: "They are probably from Hinterpommern?" asked the young woman so impertinently that the older woman couldn't help but wonder whether she was trying to defend herself with that question.

BLEU: 8.840994001530047

METEOR: 0.26483781918564525

Bert Score: 0.8722787499427795

Sem Score: 0.8757357058500955


In [25]:
print_translate_result(result_df, 1)

Original Content : Kann gesät werden?« fragte er nach kurzem Stillschweigen.

Ground Truth : 'Well, can we begin sowing?' he asked after a pause.

Predicted Translation: "Can it be sown?" he asked after a brief silence.

BLEU: 19.67497981115564

METEOR: 0.5357142857142857

Bert Score: 0.9459381699562073

Sem Score: 0.948706626672618


In [26]:
print_translate_result(result_df, 2)

Original Content : ,,Na, ich sag' dir, Tom, wo du so 'n blaues Licht siehst, kannst du sicher sein, daß da 'n Geist dahinter steckt. 's ist doch mal so bekannt.

Ground Truth : "Well, where you see one of them blue lights flickering around, Tom, you can bet there's a ghost mighty close behind it. It stands to reason.

Predicted Translation: "Well, I'll tell you, Tom, where you see such a blue light, you can be sure that there's a spirit behind it. It's well known."

BLEU: 19.05963299154425

METEOR: 0.5755342118978483

Bert Score: 0.9175985455513

Sem Score: 0.9321554958164447


In [27]:
summary_df = pd.read_csv(os.path.join(root_dir, 'autorag_project', 'evaluation', 'translate', '0', 'node_line', 'generator', 'summary.csv'))

In [28]:
summary_df

Unnamed: 0,filename,module_name,module_params,execution_time,average_output_token,bleu,meteor,bert_score,sem_score,is_best
0,0.parquet,OpenAILLM,"{'llm': 'gpt-4o-mini', 'temperature': 1.0, 'ba...",0.164339,23.1,15.548133,0.423367,0.903976,0.897078,True
