In [None]:
import os
import pandas as pd
import numpy as np
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
from sentence_transformers import SentenceTransformer
import tqdm

In [None]:
BASE_DIR = " "
OUTPUT_DIR =  " "

In [None]:
SLM_MODEL_ID = "microsoft/Phi-3.5-mini-instruct"
EMBED_MODEL_ID = "BAAI/bge-small-en-v1.5" # "sentence-transformers/all-MiniLM-L6-v2" # 로컬 임베딩 (무료, 빠름)

In [None]:
TOP_K_ENTITIES = 10     # Local Search에서 찾을 엔티티 개수
TOP_K_REPORTS = 3       # Global Search에서 찾을 리포트 개수
TOP_K_VECTORS = 5       # Vector Search에서 찾을 청크 개수

DEVICE ="cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# 1-1. 로컬 임베딩 모델
embed_model = SentenceTransformer(EMBED_MODEL_ID, device=DEVICE)

# 1-2. SLM (Phi-3.5)
phi_config = AutoConfig.from_pretrained(SLM_MODEL_ID, trust_remote_code=True)
MAX_CONTEXT = getattr(phi_config, "max_position_embeddings", 4096)

phi_tokenizer = AutoTokenizer.from_pretrained(SLM_MODEL_ID, trust_remote_code=True)
phi_model = AutoModelForCausalLM.from_pretrained(
    SLM_MODEL_ID,
    device_map="cuda",
    torch_dtype="auto"
)
print(MAX_CONTEXT)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration_phi3.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/microsoft/Phi-3.5-mini-instruct:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/306 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.67G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/195 [00:00<?, ?B/s]

131072


## 2-1. 데이터 임베딩
- Community Reports, entities, relationships

In [None]:
GRAPHRAG_OUTPUT_DIR = os.path.join(BASE_DIR,"graphrag_new/output/")

In [None]:
def load_and_embed(parquet_name, text_col, embedding_name, use_title=False):
    file_path = os.path.join(GRAPHRAG_OUTPUT_DIR, parquet_name)
    emb_path = os.path.join(GRAPHRAG_OUTPUT_DIR, embedding_name)

    if not os.path.exists(file_path):
        print(f"Warning: {file_path} not found.")
        return None, None

    df = pd.read_parquet(file_path)

    if use_title and 'title' in df.columns:
        print(" -> Combining 'title' and content for better accuracy.")
        texts = (df['title'].fillna("") + "; " + df[text_col].fillna("").tolist())
    else:
        texts = df[text_col].fillna("").tolist()

    embeddings = embed_model.encode(texts, batch_size=64, show_progress_bar=True,
                                        convert_to_numpy=True, normalize_embeddings=True)
    np.save(emb_path, embeddings)
    print("Embeddings saved.")

    return df, embeddings

In [None]:
# [1] Reports 로드
reports_df, report_embs = load_and_embed(
    parquet_name="community_reports.parquet",  # GraphRAG가 만든 파일
    text_col="full_content",                                # 요약 내용이 담긴 컬럼명
    embedding_name="embeddings_reports.npy", # 저장할 캐시 파일명
    use_title=True
)

# [2] Text Units 로드
text_units_df, text_unit_embs = load_and_embed(
    parquet_name="text_units.parquet",         # GraphRAG가 만든 파일
    text_col="text",                                        # 원본 텍스트가 담긴 컬럼명
    embedding_name="embeddings_text_units.npy"              # 저장할 캐시 파일명
)

# [3] Entities 로드
entities_df, entity_embs = load_and_embed(
    parquet_name="entities.parquet",           # GraphRAG가 만든 파일
    text_col="description",                                 # 엔티티 설명이 담긴 컬럼명
    embedding_name="embeddings_entities.npy"                # 저장할 캐시 파일명
)

# (4) Local Search용: Relationships (임베딩 불필요, 조인용)
rel_path = os.path.join(GRAPHRAG_OUTPUT_DIR, "relationships.parquet")
relationships_df = pd.read_parquet(rel_path) if os.path.exists(rel_path) else None

 -> Combining 'title' and content for better accuracy.


Batches:   0%|          | 0/7 [00:00<?, ?it/s]

Embeddings saved.


Batches:   0%|          | 0/108 [00:00<?, ?it/s]

Embeddings saved.


Batches:   0%|          | 0/156 [00:00<?, ?it/s]

Embeddings saved.


In [None]:
len(entities_df)

9980

In [None]:
len(text_units_df)

6892

In [None]:
len(reports_df)

430

In [None]:
def retrieve_context(question, method = "graph"):
    """
    method: 'local' (Graph), 'global' (Summary), 'vector' (Base)
    """
    q_emb = embed_model.encode([question],
                               convert_to_numpy=True,
                               normalize_embeddings=True).flatten()

    context_text = ""
    retrieved_indices = []

    # local search
    if method == 'local' and entities_df is not None:
        pieces = []
        # 1. 유사한 Entity 찾기
        sims = np.dot(entity_embs, q_emb)
        top_idx = np.argsort(sims)[-TOP_K_ENTITIES:][::-1]
        seed_entities = entities_df.iloc[top_idx]

        visited_nodes = set(seed_entities['title'].tolist())

        for _, row in seed_entities.iterrows():
            name = row['title']
            desc = row.get('description', '')
            pieces.append(f"### Entity (Seed): {name}\nDesc: {desc}")

            # 관계 찾기
            if relationships_df is not None:
                my_rels = relationships_df[
                    (relationships_df['source']== name) |
                    (relationships_df['target']== name)
                ]

                if 'weight' in my_rels.columns:
                    my_rels = my_rels.sort_values(by='weight', ascending=False)

                top_rels = my_rels.head(5)

                rel_texts = []
                neighbors_to_fetch = []

                for _, r in top_rels.iterrows():

                    rel_texts.append(f"- {r['source']} -> {r['target']}: {r.get('description','')}")

                    neighbor = r['target'] if r['source'] == name else r['source']
                    if neighbor not in visited_nodes:
                        neighbors_to_fetch.append(neighbor)
                        visited_nodes.add(neighbor)

                if rel_texts:
                    pieces.append(f"Relations:\n" + "\n".join(rel_texts))

                if neighbors_to_fetch:

                    neighbor_info = entities_df[entities_df['title'].isin(neighbors_to_fetch)]
                    for _, n_row in neighbor_info.iterrows():
                        n_desc = n_row.get('description', '')[:200]
                        pieces.append(f"### Entity (Neighbor of {name}): {n_row['title']}\nDesc: {n_desc}...")

        if text_units_df is not None:
            sims_vec = np.dot(text_unit_embs, q_emb)
            top_vec_idx = np.argsort(sims_vec)[-3:][::-1] # Source는 3개만
            selected_units = text_units_df.iloc[top_vec_idx]
            for _, row in selected_units.iterrows():
                pieces.append(f"### Evidence Chunk:\n{row['text']}")

        context_text = "\n\n".join(pieces)
        references = list(visited_nodes)

    # Global Search
    if method == 'global' and reports_df is not None:

        sims = np.dot(report_embs, q_emb)
        top_idx = np.argsort(sims)[-TOP_K_REPORTS:][::-1]
        selected = reports_df.iloc[top_idx]

        pieces = []
        for _, row in selected.iterrows():
            pieces.append(f"### Community Report: {row['title']}\n{row['full_content']}")
        context_text = "\n\n".join(pieces)
        references = selected['title'].tolist()

    # Vector Search
    elif method == "vector" and text_units_df is not None:
        sims = np.dot(text_unit_embs, q_emb)
        top_idx = np.argsort(sims)[-TOP_K_VECTORS:][::-1]
        selected = text_units_df.iloc[top_idx]
        context_text = "\n\n".join(selected["text"].tolist())
        references = top_idx.tolist()

    return context_text, references

### generate answer ver 2

In [None]:
def generate_answer(question, context=None, mode="rag", max_new_tokens=256):
    """
    mode:
      - "rag": Context 기반 답변 (없으면 I don't know)
      - "internal": 모델 내부 지식 기반 답변 (SLM Only용)
    """
    if mode == "rag":
        system_prompt = (
            "You are a truthful and concise QA assistant. "
            "Answer the question based ONLY on the provided context. "
            "First, think step-by-step to deduce the answer. "
            "Respond with the shortest possible answer (one sentence maximum).\n"
            "If the context does not contain enough information, respond exactly: I don't know.\n"
            "Do NOT repeat the question.\n"
            "Do NOT output anything except the following two lines:\n"
            "Thought: <brief explanation of how you arrived at the answer or why you replied 'I don't know'>\n"
            "Answer: <final answer or 'I don't know'>"
        )
        user_prompt = f"Context:\n{context}\n\nQuestion: {question}"

    else:  # mode == "internal"
        system_prompt = (
            "You are a concise and reliable QA assistant. "
            "Answer the question based on your internal knowledge. "
            "First, think step-by-step to deduce the answer. "
            "Respond with a short and direct answer (one sentence maximum). "
            "If you do not know or are not fully certain, respond exactly: I don't know.\n"
            "Do NOT output anything except the following two lines:\n"
            "Thought: <brief explanation of how you arrived at the answer or why you replied 'I don't know'>\n"
            "Answer: <final short answer>"
        )
        user_prompt = f"Question: {question}"

    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt}
    ]

    # 1차 토크나이징
    encoding = phi_tokenizer.apply_chat_template(
        messages,
        add_generation_prompt=True,
        return_tensors="pt",
        return_dict=True,
    ).to(phi_model.device)

    def get_input_len(enc):
        return enc["input_ids"].shape[1]

    input_len = get_input_len(encoding)
    is_truncated = False

    # 길이 초과 시 Context 잘라내기 (RAG일 때만)
    if mode == "rag" and context is not None and input_len > MAX_CONTEXT:
        is_truncated = True

        excess_tokens = input_len - MAX_CONTEXT + 200
        estimated_char_cut = int(excess_tokens * 3.5)

        if len(context) > estimated_char_cut:
            new_context = context[:-estimated_char_cut] + "...(truncated)"
        else:
            new_context = ""

        # Context만 줄여서 프롬프트 재구성
        messages[1]["content"] = f"Context:\n{new_context}\n\nQuestion: {question}"

        encoding = phi_tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            return_tensors="pt",
            return_dict=True,
        ).to(phi_model.device)

        input_len = get_input_len(encoding)

    # 그래도 MAX_CONTEXT를 초과하면 토큰 단위 강제 자르기 (모든 mode 공통)
    if input_len > MAX_CONTEXT:
        is_truncated = True
        for k in encoding:
            encoding[k] = encoding[k][:, -MAX_CONTEXT:]
        input_len = MAX_CONTEXT

    # 생성
    with torch.no_grad():
        outputs = phi_model.generate(
            **encoding,
            max_new_tokens=max_new_tokens,
            do_sample=False,
            temperature=0.0,
            eos_token_id=phi_tokenizer.eos_token_id,
            pad_token_id=phi_tokenizer.eos_token_id,
        )

    # 답변만 추출
    generated_tokens = outputs[0][input_len:]
    full_output = phi_tokenizer.decode(generated_tokens, skip_special_tokens=True)

    final_answer = full_output
    reasoning = ""

    # 포맷 파싱
    if "Answer:" in full_output:
        parts = full_output.split("Answer:")
        reasoning = parts[0].replace("Thought:", "").strip()
        final_answer = parts[-1].strip()

    # "I don't know" 정규화
    clean_answer = final_answer.lower().strip(" .")
    if "i don't know" in clean_answer or "do not know" in clean_answer:
        final_answer = "I don't know"

    return final_answer, reasoning, input_len, is_truncated


In [None]:
import time

In [None]:
def run_experiment_suite(eval_dataset,output_filename='second_slm_results.csv'):
    results = []
    print(f"\실험 시작! 총 {len(eval_dataset)}개 질문")

    save_path = os.path.join(OUTPUT_DIR, output_filename)

    for i, item in enumerate(tqdm.tqdm(eval_dataset)):
        sample_id = item['id']
        question = item["question"]
        ground_truth = item.get("answer", "")

        # SLM Only (No RAG) - Baseline
        start_s = time.time()
        s_ans, s_thought, s_len, s_trunc = generate_answer(question, context=None, mode="internal")
        time_s =  time.time() - start_s

        # SLM + Vector RAG
        start_v = time.time()
        v_ctx, _ = retrieve_context(question, method="vector")
        v_ans, v_thought, v_len, v_trunc = generate_answer(question, v_ctx, mode="rag")
        time_v = time.time() - start_v

        # SLM + Graph RAG (Local)
        start_gl = time.time()
        gl_ctx, _ = retrieve_context(question, method="local")
        gl_ans, gl_thought, gl_len, gl_trunc = generate_answer(question, gl_ctx, mode="rag")
        time_gl = time.time() - start_gl

        # SLM + Graph RAG (Global)
        start_gg = time.time()
        gg_ctx, _ = retrieve_context(question, method="global")
        gg_ans, gg_thought, gg_len, gg_trunc = generate_answer(question, gg_ctx,mode="rag")
        time_gg = time.time() - start_gg

        # LLM Only 별도

        results.append({
            "id": sample_id,
            "question": question,
            "ground_truth": ground_truth,

            # SLM 결과
            "slm_only_answer": s_ans,
            "slm_only_thought": s_thought,
            "slm_time": time_s,

            # Vector 결과
            "vector_answer": v_ans,
            "vector_thought": v_thought,
            "vector_tokens": v_len,
            "vector_truncated": v_trunc,
            "vector_time_sec": time_v,
            "vector_context": v_ctx,

            # Graph(local) 결과
            "local_answer": gl_ans,
            "local_thought": gl_thought,
            "local_tokens": gl_len,
            "local_truncated": gl_trunc,
            "local_time_sec": time_gl,
            "local_context": gl_ctx,

            # Graph(global) 결과
            "global_answer": gg_ans,
            "global_thought": gg_thought,
            "global_tokens": gg_len,
            "global_truncated": gg_trunc,
            "global_time_sec": time_gg,
            "global_context": gg_ctx,

        })

        # 10개마다 중간 저장
        if (i+1) % 10 == 0:
            pd.DataFrame(results).to_csv(save_path, index=False)

    # 최종 저장
    final_df = pd.DataFrame(results)
    final_df.to_csv(save_path, index = False)
    print(f"\n[완료] 모든 결과가 하나로 통합되어 저장되었습니다: {save_path}")

    return final_df

## LLM ONLY

In [None]:
!pip install openai nest_asyncio



In [None]:
import os

from google.colab import userdata

try:
    os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')
    print("API Key가 성공적으로 로드되었습니다.")

except Exception as e:
    print("키 로드 실패: 키를 등록했는지 확인하세요.")

API Key가 성공적으로 로드되었습니다.


In [None]:
from openai import AsyncOpenAI
# TARGET_MODEL = "gpt-4o-mini"
TARGET_MODEL = "gpt-4o"

In [None]:
client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"] )

In [None]:
nest_asyncio.apply()

NameError: name 'nest_asyncio' is not defined

In [None]:
async def get_llm_response(sample_id, question, model_name):
    system_prompt = (
            "You are a concise and reliable QA assistant. "
            "Answer the question based on your internal knowledge. "
            "First, think step-by-step to deduce the answer. "
            "Respond with a short and direct answer (one sentence maximum). "
            "If you do not know or are not fully certain, respond exactly: I don't know.\n"
            "Do NOT output anything except the following two lines:\n"
            "Thought: <brief explanation of how you arrived at the answer or why you replied 'I don't know'>\n"
            "Answer: <final short answer>"
        )
    start_time = time.time()

    try:
        response = await client.chat.completions.create(
            model=model_name,
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": question},
            ],
            temperature=0.0,
            max_tokens=512,
        )
        elapsed_sec = time.time() - start_time

        content = response.choices[0].message.content

        reasoning = ""
        final_answer = content

        if "Answer:" in content:
            parts = content.split("Answer:", 1)
            reasoning = parts[0].replace("Thought:", "").strip()
            final_answer = parts[1].strip()

        clean = final_answer.lower().strip(" .,!?:;")
        if "i don't know" in clean or "do not know" in clean:
            final_answer = "I don't know"

        return {
            "id": sample_id,
            "llm_answer": final_answer,
            "llm_thought": reasoning,
            "llm_time_sec": elapsed_sec,
            "llm_model": model_name,
        }

    except Exception as e:
        print(f"Error on ID {sample_id}: {e}")
        return {
            "id": sample_id,
            "llm_answer": "Error",
            "llm_thought": str(e),
            "llm_time_sec": 0.0,
            "llm_model": model_name,
        }


In [None]:
async def run_llm_benchmark(dataset):
    n = len(dataset)
    print(f"실험 시작: 총 {n}개 | 모델: {TARGET_MODEL}")

    semaphore = asyncio.Semaphore(20)

    async def worker(item):
        async with semaphore:
            return await get_llm_response(item["id"], item["question"], TARGET_MODEL)

    tasks = [worker(item) for item in dataset]
    results = []

    for f in tqdm.tqdm(asyncio.as_completed(tasks), total=len(tasks)):
        result = await f
        results.append(result)

    df = pd.DataFrame(results).sort_values("id")
    df.to_csv(os.path.join(OUTPUT_DIR, f"second_llm_only_{TARGET_MODEL}_{n}.csv"), index=False)
    return df

## 데이터셋

In [None]:
ds_path = os.path.join(BASE_DIR, "datasets/hotpotqa_val_700_question.csv")

In [None]:
# from datasets import load_from_disk

# dataset = load_from_disk(ds_path)
# dataset.keys()

dataset = pd.read_csv(ds_path)

In [None]:
dataset.columns

Index(['id', 'question', 'answer'], dtype='object')

In [None]:
dataset.shape

(700, 3)

## 실행

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', 1000)

In [None]:
eval_dataset = dataset.to_dict(orient="records")  # 각 row가 dict가 됨

slm_result_df = run_experiment_suite(
    eval_dataset,
    output_filename="second_slm_result.csv"
)

In [None]:
slm_result_df.columns

Index(['id', 'question', 'ground_truth', 'slm_only_answer', 'slm_only_thought', 'slm_time', 'vector_answer', 'vector_thought', 'vector_tokens', 'vector_truncated', 'vector_time_sec', 'vector_context', 'local_answer', 'local_thought', 'local_tokens', 'local_truncated', 'local_time_sec', 'local_context', 'global_answer', 'global_thought', 'global_tokens', 'global_truncated', 'global_time_sec', 'global_context'], dtype='object')

In [None]:
slm_result_df.shape

(700, 24)

In [None]:
import asyncio

async def main():
    df = await run_llm_benchmark(eval_dataset)
    display(df.head())
    return df

In [None]:
llm_result_df =  await main()

실험 시작: 총 700개 | 모델: gpt-4o


100%|██████████| 700/700 [00:31<00:00, 22.32it/s]


Unnamed: 0,id,llm_answer,llm_thought,llm_time_sec,llm_model
281,5a70f0a75542994082a3e403,Scanian.,"The dialect spoken in the province of Scania that refers to Spettekaka as ""spiddekaga"" is Scanian.",0.580012,gpt-4o
638,5a70f39c5542994082a3e429,Eliot Cutler.,"In the 2010 Maine gubernatorial race, the independent candidate who finished ahead of Libby Mitchell was Eliot Cutler.",2.248319,gpt-4o
418,5a7100435542994082a3e4a3,"No, only Shikashika is a frozen drink.","""Finding Kraftland"" is a documentary film, while ""Shikashika"" refers to a type of frozen drink from Peru.",0.708228,gpt-4o
297,5a712beb5542994082a3e61c,I don't know,"""Little Fugitive"" is a 1953 film, and no Golden Globe Award actor is known for starring in it.",0.649361,gpt-4o
8,5a7137dc5542994082a3e68a,NBA Rookie of the Year.,"The award initiated following the 1952-53 NBA season is the NBA Rookie of the Year Award, but Kevin Durant was named NBA Rookie of the Year for the 2007-08 season.",0.799469,gpt-4o


In [None]:
llm_result_df.head()

Unnamed: 0,id,llm_answer,llm_thought,llm_time_sec,llm_model
281,5a70f0a75542994082a3e403,Scanian.,"The dialect spoken in the province of Scania that refers to Spettekaka as ""spiddekaga"" is Scanian.",0.580012,gpt-4o
638,5a70f39c5542994082a3e429,Eliot Cutler.,"In the 2010 Maine gubernatorial race, the independent candidate who finished ahead of Libby Mitchell was Eliot Cutler.",2.248319,gpt-4o
418,5a7100435542994082a3e4a3,"No, only Shikashika is a frozen drink.","""Finding Kraftland"" is a documentary film, while ""Shikashika"" refers to a type of frozen drink from Peru.",0.708228,gpt-4o
297,5a712beb5542994082a3e61c,I don't know,"""Little Fugitive"" is a 1953 film, and no Golden Globe Award actor is known for starring in it.",0.649361,gpt-4o
8,5a7137dc5542994082a3e68a,NBA Rookie of the Year.,"The award initiated following the 1952-53 NBA season is the NBA Rookie of the Year Award, but Kevin Durant was named NBA Rookie of the Year for the 2007-08 season.",0.799469,gpt-4o


In [None]:
llm_result_df["llm_answer"].value_counts()
llm_result_df[llm_result_df["llm_answer"] == "Error"].head()


Unnamed: 0,id,llm_answer,llm_thought,llm_time_sec,llm_model


In [None]:
total_df = pd.merge(
    slm_result_df,
    llm_result_df,
    on='id',
    how = 'left'
)

In [None]:
total_df.columns

Index(['id', 'question', 'ground_truth', 'slm_only_answer', 'slm_only_thought', 'slm_time', 'vector_answer', 'vector_thought', 'vector_tokens', 'vector_truncated', 'vector_time_sec', 'vector_context', 'local_answer', 'local_thought', 'local_tokens', 'local_truncated', 'local_time_sec', 'local_context', 'global_answer', 'global_thought', 'global_tokens', 'global_truncated', 'global_time_sec', 'global_context', 'llm_answer', 'llm_thought', 'llm_time_sec', 'llm_model'], dtype='object')

In [None]:
total_df.to_csv(os.path.join(OUTPUT_DIR, 'second_total_result_gpt-4o.csv'), index = False)

In [None]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.width', 1000)