In [1]:
import numpy as np
import pandas as pd
from collections import Counter
import torch
from faiss_module_bw import make_db, make_fewshot_db
from model import setup_llm_pipeline

from save_module import save
from seed_module import seed_everything
from utils_module import make_dict, extract_answer, format_docs

seed_everything(52)
from sklearn.model_selection import KFold
def calculate_f1_score(true_sentence, predicted_sentence, sum_mode=True):

    #공백 제거
    true_sentence = ''.join(true_sentence.split())
    predicted_sentence = ''.join(predicted_sentence.split())
    
    true_counter = Counter(true_sentence)
    predicted_counter = Counter(predicted_sentence)

    #문자가 등장한 개수도 고려
    if sum_mode:
        true_positive = sum((true_counter & predicted_counter).values())
        predicted_positive = sum(predicted_counter.values())
        actual_positive = sum(true_counter.values())

    #문자 자체가 있는 것에 focus를 맞춤
    else:
        true_positive = len((true_counter & predicted_counter).values())
        predicted_positive = len(predicted_counter.values())
        actual_positive = len(true_counter.values())

    #f1 score 계산
    precision = true_positive / predicted_positive if predicted_positive > 0 else 0
    recall = true_positive / actual_positive if actual_positive > 0 else 0
    f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return precision, recall, f1_score

def calculate_average_f1_score(true_sentences, predicted_sentences):
    
    total_precision = 0
    total_recall = 0
    total_f1_score = 0
    
    for true_sentence, predicted_sentence in zip(true_sentences, predicted_sentences):
        precision, recall, f1_score = calculate_f1_score(true_sentence, predicted_sentence)
        total_precision += precision
        total_recall += recall
        total_f1_score += f1_score
    
    avg_precision = total_precision / len(true_sentences)
    avg_recall = total_recall / len(true_sentences)
    avg_f1_score = total_f1_score / len(true_sentences)
    
    return {
        'average_precision': avg_precision,
        'average_recall': avg_recall,
        'average_f1_score': avg_f1_score
    }

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
k_folds = 4
kf = KFold(n_splits=k_folds, shuffle=True, random_state=52)
train_df = pd.read_csv('train.csv')
model_id = "meta-llama/Meta-Llama-3.1-8B-Instruct"
llm = setup_llm_pipeline(model_id)
fold_result = []

Loading checkpoint shards: 100%|██████████| 4/4 [00:22<00:00,  5.60s/it]


In [3]:

# from vectordb_module import FAISSDatabaseManager
# from run_blendrag import run
#     # EleutherAI/polyglot-ko-1.3b
#     #"meta-llama/Meta-Llama-3.1-8B-Instruct"
#     # maywell/TinyWand-kiqu
#     # yanolja/EEVE-Korean-Instruct-2.8B-v1.0
#     # MLP-KTLim/llama-3-Korean-Bllossom-8B
#     # train에도 RAG를 쓸 때 사용
# for fold, (train_index, val_index) in enumerate(kf.split(train_df)):
#     fold_results = []
#     print(f"\nFold {fold + 1}/{k_folds}")
#     # 수정된 부분: .iloc[] 사용
#     train_set = train_df.iloc[train_index]
#     val_set = train_df.iloc[val_index]
    
#     train_db = FAISSDatabaseManager(db_path=f'./serm_best_field_train_db', chunk_strategy="serm", search_strategy="serm_best_field")
#     train_db.make_db(train_set)
    
#     val_db = FAISSDatabaseManager(db_path=f'./serm_best_field_train_db', chunk_strategy="serm", search_strategy="serm_best_field")
#     val_db.make_db(train_set)
    
#     fewshot_db = FAISSDatabaseManager(db_path=f'./fewshot_db_{fold}',search_strategy="knn_best_field")
#     fewshot_db.make_db(val_set,fewshot=True)
    
#     pred = run(train_db= train_db,
#         test_db= val_db,
#         fewshot_db=fewshot_db, 
#         dataset= val_set.to_dict(orient='records') ,
#         llm=llm,
#         verbose=False)
#     result = pd.DataFrame()
#     result['pred'] = [result['Answer'] for result in pred]
#     val_set.index = range(len(val_set))
#     result['gt'] = val_set['Answer']
        
#     result = calculate_average_f1_score(result['gt'], result['pred'])
#     print(result)
#     fold_result.append(result)
#     break
#     print(f"Fold {fold + 1} ended")
# # 모든 fold의 결과를 집계하고 메트릭 계산
# all_results = [result for fold_result in fold_results for result in fold_result]
# print(f"Average F1 Score: {np.mean([result['average_f1_score'] for result in all_results])}")
# print(f"Average Precision: {np.mean([result['average_precision'] for result in all_results])}")
# print(f"Average Recall: {np.mean([result['average_recall'] for result in all_results])}")


0.5691677502 | 0.6880094825892175

|  0.703940744417009 temp 0.2

In [4]:
from run_bw2 import run
for fold, (train_index, val_index) in enumerate(kf.split(train_df)):
    fold_results = []
    print(f"\nFold {fold + 1}/{k_folds}")
    # 수정된 부분: .iloc[] 사용
    train_set = train_df.iloc[train_index]
    val_set = train_df.iloc[val_index]
    
    train_db = make_db(train_set,'./train_faiss_db')

    fewshot_db = make_fewshot_db(train_set,None)
    
    pred = results = run(train_db= train_db,
        test_db= train_db,
        fewshot_db=fewshot_db, 
        dataset= val_set.to_dict(orient='records') ,
        llm=llm,
        verbose=False)
    result = pd.DataFrame()
    result['pred'] = [result['Answer'] for result in pred]
    val_set.index = range(len(val_set))
    result['gt'] = val_set['Answer']
        
    result = calculate_average_f1_score(result['gt'], result['pred'])
    print(result)
    fold_result.append(result)
    break
    fewshot_db = None
    train_db = None
    val_db = None
    torch.cuda.empty_cache()
    
# 모든 fold의 결과를 집계하고 메트릭 계산
all_results = [result for fold_result in fold_results for result in fold_result]
print(f"Average F1 Score: {np.mean([result['average_f1_score'] for result in all_results])}")
print(f"Average Precision: {np.mean([result['average_precision'] for result in all_results])}")
print(f"Average Recall: {np.mean([result['average_recall'] for result in all_results])}")


Fold 1/4
Loading FAISS DB from: ./train_faiss_db


  return torch.load(io.BytesIO(b))


Loaded Fewshot Set: [{'Source': '1-1 2024 주요 재정통계 1권', 'Source_path': './train_source/1-1 2024 주요 재정통계 1권.pdf', 'Question': '2024년 중앙정부 재정체계는 어떻게 구성되어 있나요?', 'Answer': '2024년 중앙정부 재정체계는 예산(일반·특별회계)과 기금으로 구분되며, 2024년 기준으로 일반회계 1개, 특별회계 21개, 기금 68개로 구성되어 있습니다.'}]
Creating FAISS DB
Done.


  attn_output = torch.nn.functional.scaled_dot_product_attention(
  8%|▊         | 10/124 [01:12<14:49,  7.80s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
100%|██████████| 124/124 [18:20<00:00,  8.87s/it]

{'average_precision': 0.5956608636739278, 'average_recall': 0.7539088542231868, 'average_f1_score': 0.6230294326970625}
Average F1 Score: nan
Average Precision: nan
Average Recall: nan



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


## 정규화 전부 제거
0.5845866120343769

앙상블 리트리버
k=3 0.5/0.5
0.6033479893221689

k=3 0.3/0.7
0.6230294326970625

k=3 0.15/0.85


context위치변경

1트 

<|start_header_id|>system<|end_header_id|>
Now Do it for real.<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Question: {input}\n\nContext: {context}<|eot_id|>

k-fold 0.63

2트

0.6395752909263238

3트

0.6523232543460189

## new prompt 예제에서 context에서 answer를 도출했다고 말함 그리고 그런식으로 작성하라고 함

temp = 0.4, topp = 0.6 repetition_penalty = 1.1

fewshot num = 3, fewshot context = 1 , context = 2 청크 512, 오버랩 64, 

LB: 

fold 1:0.6356956690740023 (원래)


(NeW)
LB: 0.5846913577

fold 1 0.6375135913067492
 
 """<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Today Date: 8 Aug 2024

You are the financial expert who helps me with my financial information Q&As.
You earn 10 points when you answer me and follow the rules and lose 7 points when you don't.

12,500 백만원 = 125 억원 = 12,500,000,000 원
5,400 백만원 = 54 억원 = 5,400,000,000 원

Here are some rules you should follow.
- Please use contexts to answer the question.
- Please your answers should be concise.
- Please answers must be written in Korean.
- Please answer the question in 1-3 sentences.

Please learn the answering like examples below.<|eot_id|>
""" +f"{fewshot_str}" + """<|start_header_id|>system<|end_header_id|>
Now Do it for real.
Given the following contexts about Question:
{context}<|start_header_id|>user<|end_header_id|>
{input}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>\n\n
"""

## search_type="similarity_score_threshold"
temp = 0.4, topp = 0.6 repetition_penalty = 1.1

fewshot num = 3, fewshot context = 1 , context = 2 청크 512, 오버랩 64, 

LB: 0.6014109705	

fold 1: 0.6478771022993277

~~

fewshot num = 3, fewshot context = 1 , context = 5 청크 512, 오버랩 64, 

LB: 0.5791770154

fold 1: 0.661329586886551

~~

fewshot num = 3, fewshot context = 0 , context = 5 청크 512, 오버랩 64, 

LB: 0.5722539904

fold 1: 0.6533704753399003

"""<|begin_of_text|><|start_header_id|>system<|end_header_id|>
Today Date: 8 Aug 2024
1,000,000 원= 100 만원
10 백만원 = 10,000,000 원
100 백만원 = 100,000,000 원
100 백만원 = 1 억원
1,000 백만원 = 10 억원
You are the financial expert who helps me with my financial information Q&As.
You earn 10 points when you answer me and follow the rules and lose 7 points when you don't.

Here are some rules you should follow.
- Please use contexts to answer the question.
- Please your answers should be concise.
- Please answers must be written in Korean.
- Please answer the question in 1-3 sentences.

Please answer like the example below.<|eot_id|>
""" +f"{fewshot_str}" + """<|start_header_id|>system<|end_header_id|>
Now Do it for me.
Given the following contexts about Question:
{context}<|start_header_id|>user<|end_header_id|>
{input}<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>\n\n
"""

temp = 0.4, topp = 0.6 repetition_penalty = 1.1

fewshot num = 3, fewshot context = 1 , context = 3 청크 512, 오버랩 64, 

LB: 

fold 1: 0.7253077903895752

===

temp = 0.2, topp = 0.6 repetition_penalty = 1.1

fewshot num = 3, fewshot context = 1 , context = 3

LB: 

fold 1:  0.6544007302469212

temp = 0.1, topp = 0.7 repetition_penalty = 1.1

fewshot num = 3, fewshot context = 1 , context = 3

LB: 

fold 1:  0.6554498759729692

temp = 0.4, topp = 0.6 repetition_penalty = 1.1

fewshot num = 3, fewshot context = 3 , context = 3

LB: 0.5859080344

fold 1: 0.6586463841048915

temp = 0.4, topp = 0.6 repetition_penalty = 1.1

fewshot num = 3, fewshot context = 0 , context = 3

LB: 

fold 1: 0.6381098591323223

temp = 0.4, topp = 0.6 repetition_penalty = 1.1

fewshot num = 10, fewshot context = 0 , context = 3

LB: 

fold 1: 0.6327671925994144