In [None]:
import pandas as pd

pretest_data_path = "./data/test.xlsx"
pretest_data = pd.read_excel(pretest_data_path)

pretest_data

In [None]:
from dotenv import load_dotenv
import os


load_dotenv()

## OpenRouter Integration

In [None]:
# from langchain.chat_models import init_chat_model
# openai_api_key = os.getenv("OPENROUTER_API_KEY")
# openai_api_base = "https://openrouter.ai/api/v1"

# llm_model = "qwen/qwen3-235b-a22b:free"
# llm_model = "qwen/qwen3-32b:free"
# model_name = "qwen"
# llm_model = "deepseek/deepseek-v3-base:free"
# model_name = "deepseek"
# llm_model = "microsoft/phi-4-reasoning:free"
# model_name = 'phi4reason'
# llm_model = "nousresearch/deephermes-3-mistral-24b-preview:free"
# model_name = "deephermes"

# model = init_chat_model(
#     model=llm_model,
#     model_provider="openai",
#     openai_api_base=openai_api_base,
#     openai_api_key=openai_api_key,
# )

## Azure OpenAI Integration

In [None]:
# from langchain_openai import AzureChatOpenAI

# model = AzureChatOpenAI(
#     azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
#     azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
#     openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
#     api_key=os.environ["AZURE_API_KEY"],
# )

# model_name = "gpt"

## Gemini Integration

In [None]:
from langchain.chat_models import init_chat_model

model = init_chat_model(
    # "gemini-2.0-flash",
    "gemini-2.5-flash-preview-04-17",
    model_provider="google_genai",
    api_key=os.environ["GEMINI_API_KEY"],
)

model_name = 'gemini'

In [None]:
import time

def invoke_with_retry(model, messages, max_retries, retry_delay):
    for attempt in range(max_retries):
        try:
            return model.invoke(messages)
        except Exception as e:
            print(
                f"Attempt {attempt + 1} failed: {e}. Retrying in {retry_delay} seconds..."
            )
            time.sleep(retry_delay)
    raise Exception("Max retries exceeded.")

In [None]:
from model import blame_prompt, error_prompt
from langchain_core.messages import HumanMessage, SystemMessage

max_retries = 20
retry_delay = 5  # seconds

conversations = pretest_data["conversations"]
results = []

for conv in conversations:
    blame_messages = [
        SystemMessage(blame_prompt),
        HumanMessage(conv),
    ]
    error_messages = [
        SystemMessage(error_prompt),
        HumanMessage(conv),
    ]
    
    blame_res = invoke_with_retry(model, blame_messages, max_retries, retry_delay)
    error_res = invoke_with_retry(model, error_messages, max_retries, retry_delay)
    
    blame_flag = 1 if blame_res.content == 'True' else 0
    error_flag = 1 if error_res.content == 'True' else 0
    print('----------------------------------')
    # print("Original Conversation:\n", conv, sep='')
    print("Blame:", blame_flag)
    print("Error:", error_flag)

    results.append({
        "Conversation": conv,
        "Blame": blame_flag,
        "Error": error_flag
    })

df = pd.DataFrame(results)
df

In [None]:
import datetime
current_time = datetime.datetime.now().strftime("%Y_%m_%d_%H_%M")
df.to_csv(f'./out/llm_label_test_result_{current_time}_{model_name}.csv')