In [1]:
import tempfile
import uuid

from dotenv import load_dotenv

load_dotenv()

True

In [2]:
import nest_asyncio

nest_asyncio.apply()

In [10]:
import pandas as pd
import os
import pathlib

root_dir = pathlib.PurePath(os.path.dirname(os.getcwd())).parent
data_dir = os.path.join(root_dir, 'data')

# 요약 태스크 평가

## 데이터 제작

먼저, [BillSum](https://huggingface.co/datasets/FiscalNote/billsum) 데이터를 사용하여 평가를 준비합니다. AutoRAG를 통해 LLM 요약 생성은 물론 LLM의 성능 평가까지 진행하도록 합시다. 

In [3]:
from datasets import load_dataset

ds = load_dataset("FiscalNote/billsum")
original_df = ds["train"].to_pandas().sample(20)

README.md:   0%|          | 0.00/7.27k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/91.8M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/15.8M [00:00<?, ?B/s]

ca_test-00000-of-00001.parquet:   0%|          | 0.00/6.12M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/18949 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3269 [00:00<?, ? examples/s]

Generating ca_test split:   0%|          | 0/1237 [00:00<?, ? examples/s]

In [5]:
original_df.reset_index(drop=True, inplace=True)
original_df.head()

Unnamed: 0,text,summary,title
0,SECTION 1. SHORT TITLE.\n\n This Act may be...,Deceptive Practices and Voter Intimidation Pre...,A bill to prohibit deceptive practices in Fede...
1,SECTION 1. SHORT TITLE.\n\n This Act may be...,Drill Responsibly in Leased Lands Act of 2008 ...,To amend the Naval Petroleum Reserves Producti...
2,SECTION 1. SHORT TITLE.\n\n This Act may be...,Determination of Russia-Iran Weapons Transfer ...,Determination of Russia-Iran Weapons Transfer ...
3,s on the Budget.--Section \n301(a) of the Cong...,Amends the Congressional Budget Act of 1974 to...,Strengthen Social Security and Medicare Act of...
4,SECTION 1. SHORT TITLE.\n\n This Act may be...,Disabled Veterans Tax Termination Act - Amends...,"To amend title 10, United States Code, to perm..."


In [7]:
import pandas as pd
from uuid import uuid4

# AutoRAG 데이터로 변환합니다.
autorag_dicts = []
for i, row in original_df.iterrows():
	autorag_dicts.append({
		'qid': str(uuid4()),
        'query': 'Summarize the following document.',
		'retrieval_gt': [[]],
		'generation_gt': [row['summary']],
		'retrieved_contents': [row['text']],
		'retrieved_ids': [],
		'retrieve_scores': [],
	})
autorag_df = pd.DataFrame(autorag_dicts)

In [8]:
autorag_df.head()

Unnamed: 0,qid,query,retrieval_gt,generation_gt,retrieved_contents,retrieved_ids,retrieve_scores
0,ee7495f5-3149-4cd6-a12c-e81578387900,Summarize the following document.,[[]],[Deceptive Practices and Voter Intimidation Pr...,[SECTION 1. SHORT TITLE.\n\n This Act may b...,[],[]
1,2a11d3cd-b1e0-46f1-8fc1-408cab3d7247,Summarize the following document.,[[]],[Drill Responsibly in Leased Lands Act of 2008...,[SECTION 1. SHORT TITLE.\n\n This Act may b...,[],[]
2,57a1b948-e243-41da-8a74-b75c5ff08655,Summarize the following document.,[[]],[Determination of Russia-Iran Weapons Transfer...,[SECTION 1. SHORT TITLE.\n\n This Act may b...,[],[]
3,bd5d22c9-5a16-4b85-878a-6cef82b15b22,Summarize the following document.,[[]],[Amends the Congressional Budget Act of 1974 t...,[s on the Budget.--Section \n301(a) of the Con...,[],[]
4,f495fe57-9d32-46fc-a46a-c7cea6575f53,Summarize the following document.,[[]],[Disabled Veterans Tax Termination Act - Amend...,[SECTION 1. SHORT TITLE.\n\n This Act may b...,[],[]


In [19]:
from datetime import datetime

empty_corpus_df = pd.DataFrame([{
	'doc_id': 'empty',
	'contents': 'empty',
	'metadata': {'last_modified_datetime': datetime.now()} 
}], columns=['doc_id', 'contents', 'metadata'])

In [20]:
yaml_path = os.path.join(root_dir, 'config', 'evaluation', 'summary', 'config.yaml')

In [None]:
from autorag.evaluator import Evaluator
import tempfile

with tempfile.NamedTemporaryFile(suffix='.parquet') as qa_path:
	with tempfile.NamedTemporaryFile(suffix='.parquet') as corpus_path:
		autorag_df.to_parquet(qa_path.name)
		empty_corpus_df.to_parquet(corpus_path.name)
		evaluator = Evaluator(qa_data_path=qa_path.name, 
							  corpus_data_path=corpus_path.name, 
							  project_dir=os.path.join(root_dir, 'autorag_project', 'evaluation', 'summary'))
		evaluator.start_trial(yaml_path, skip_validation=True)

## 결과 확인

In [27]:
result_df = pd.read_parquet(os.path.join(root_dir, 'autorag_project', 'evaluation', 'summary', '0', 'node_line', 'generator', 'best_0.parquet'))

In [28]:
result_df.head()

Unnamed: 0,qid,query,retrieval_gt,generation_gt,retrieved_contents,retrieved_ids,retrieve_scores,prompts,generated_texts,generated_tokens,generated_log_probs,rouge,sem_score
0,ee7495f5-3149-4cd6-a12c-e81578387900,Summarize the following document.,[[]],[Deceptive Practices and Voter Intimidation Pr...,[SECTION 1. SHORT TITLE.\n\n This Act may b...,[],[],Summarize the following Context.\nSummarize th...,"The ""Deceptive Practices and Voter Intimidatio...","[976, 392, 1923, 953, 585, 93703, 326, 631, 87...","[-1.6643718e-05, -0.062359765, 0.0, 0.0, 0.0, ...",0.24356,0.961812
1,2a11d3cd-b1e0-46f1-8fc1-408cab3d7247,Summarize the following document.,[[]],[Drill Responsibly in Leased Lands Act of 2008...,[SECTION 1. SHORT TITLE.\n\n This Act may b...,[],[],Summarize the following Context.\nSummarize th...,"The ""Drill Responsibly in Leased Lands Act of ...","[976, 392, 5822, 492, 36777, 8800, 306, 2018, ...","[-1.8193366e-05, -0.023069482, 0.0, 0.0, 0.0, ...",0.321138,0.961793
2,57a1b948-e243-41da-8a74-b75c5ff08655,Summarize the following document.,[[]],[Determination of Russia-Iran Weapons Transfer...,[SECTION 1. SHORT TITLE.\n\n This Act may b...,[],[],Summarize the following Context.\nSummarize th...,"The ""Determination of Russia-Iran Weapons Tran...","[976, 392, 130588, 3273, 328, 19420, 31893, 87...","[-4.406056e-05, -0.07463003, -3.1281633e-07, 0...",0.256881,0.952403
3,bd5d22c9-5a16-4b85-878a-6cef82b15b22,Summarize the following document.,[[]],[Amends the Congressional Budget Act of 1974 t...,[s on the Budget.--Section \n301(a) of the Con...,[],[],Summarize the following Context.\nSummarize th...,The document outlines amendments to the Congre...,"[976, 3213, 74737, 84324, 316, 290, 101581, 39...","[-0.0052344752, -0.0027878843, -0.23834187, -0...",0.228571,0.922384
4,f495fe57-9d32-46fc-a46a-c7cea6575f53,Summarize the following document.,[[]],[Disabled Veterans Tax Termination Act - Amend...,[SECTION 1. SHORT TITLE.\n\n This Act may b...,[],[],Summarize the following Context.\nSummarize th...,"The ""Disabled Veterans Tax Termination Act"" co...","[976, 392, 33579, 56708, 18489, 12167, 3273, 5...","[-9.436451e-05, -0.5263896, -1.9361265e-07, -7...",0.294314,0.959486


In [29]:
def print_summary_result(df, idx: int):
	print(
		f"Original Content : {df.iloc[idx]['retrieved_contents'][0]}\n\nGround Truth Summary: {df.iloc[idx]['generation_gt'][0]}\n\nPredicted Summary: {df.iloc[idx]['generated_texts']}\n\nRouge: {df.iloc[idx]['rouge']}\n\nSem Score: {df.iloc[idx]['sem_score']}")

In [30]:
print_summary_result(result_df, 0)

Original Content : SECTION 1. SHORT TITLE.

    This Act may be cited as the ``Deceptive Practices and Voter 
Intimidation Prevention Act of 2006''.

SEC. 2. DECEPTIVE PRACTICES IN ELECTIONS.

    (a) Civil Action.--
            (1) In general.--Subsection (b) of section 2004 of the 
        Revised Statutes (42 U.S.C. 1971(b)) is amended--
                    (A) by striking ``No person'' and inserting the 
                following:
            ``(1) No person''; and
                    (B) by inserting at the end the following new 
                paragraph:
            ``(2) No person, whether acting under color of law or 
        otherwise, shall knowingly deceive any other person regarding--
                    ``(A) the time, place, or manner of conducting a 
                general, primary, run-off, or special election for the 
                office of President, Vice President, presidential 
                elector, Member of the Senate, Member of the House of 
             

In [31]:
print_summary_result(result_df, 1)

Original Content : SECTION 1. SHORT TITLE.

    This Act may be cited as the ``Drill Responsibly in Leased Lands 
Act of 2008''.

SEC. 2. NATIONAL PETROLEUM RESERVE IN ALASKA: LEASE SALES.

    Section 107(a) of the Naval Petroleum Reserves Production Act of 
1976 is amended to read as follows:
    ``(a) In General.--The Secretary shall conduct an expeditious 
environmentally responsible program of competitive leasing of oil and 
gas in the National Petroleum Reserve in Alaska in accordance with this 
Act. Such program shall include no fewer than one lease sale in the 
Reserve each year during the period 2009 through 2013.''.

SEC. 3. NATIONAL PETROLEUM RESERVE IN ALASKA: PIPELINE CONSTRUCTION.

    The Secretary of Transportation shall facilitate, in an 
environmentally responsible manner and in coordination with the 
Secretary of the Interior, the construction of pipelines necessary to 
transport oil and gas from or through the National Petroleum Reserve in 
Alaska to existing transp

In [32]:
summary_df = pd.read_csv(os.path.join(root_dir, 'autorag_project', 'evaluation', 'summary', '0', 'node_line', 'generator', 'summary.csv'))

In [34]:
summary_df

Unnamed: 0,filename,module_name,module_params,execution_time,average_output_token,rouge,sem_score,is_best
0,0.parquet,OpenAILLM,"{'llm': 'gpt-4o-mini', 'temperature': 1.0, 'ba...",0.817463,249.2,0.249982,0.947394,True


## RAGAS 사용 (RAGAS Summarization Conciseness Score)

In [37]:
from ragas import EvaluationDataset, SingleTurnSample, evaluate


# 먼저 AutoRAG의 데이터를 RAGAS로 변환합니다.
def autorag_to_ragas(autorag_df: pd.DataFrame, corpus_df: pd.DataFrame) -> EvaluationDataset:
	samples = []
	for idx, row in autorag_df.iterrows():
		samples.append(SingleTurnSample(
			reference_contexts=row['retrieved_contents'],
			response=row['generated_texts'],
			reference=row['generation_gt'][0],
		))
	return EvaluationDataset(samples)
ragas_dataset = autorag_to_ragas(result_df, empty_corpus_df)

In [36]:
from ragas.llms import LangchainLLMWrapper
from langchain_openai import ChatOpenAI
ragas_llm = LangchainLLMWrapper(ChatOpenAI(model="gpt-4o"))

In [None]:
from ragas.metrics import SummarizationScore

result = evaluate(ragas_dataset, metrics=[SummarizationScore()], llm=ragas_llm)

In [40]:
result

{'summary_score': 0.5652}