In [1]:
import os
import asyncio
import pandas as pd
from ragas.metrics import Faithfulness
from ragas.dataset_schema import SingleTurnSample
from ragas.llms import LangchainLLMWrapper
from langchain_openai import ChatOpenAI
from pathlib import Path
from dotenv import load_dotenv

# Muat .env
load_dotenv()
print("OPENAI_API_KEY:", os.getenv("OPENAI_API_KEY"))  

# Direktori utama
base_dir = Path(r"D:\Perkuliahan\Sem 8\STBI\Project\information-retrieval\evaluasi")

  from .autonotebook import tqdm as notebook_tqdm


OPENAI_API_KEY: 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


In [2]:
# Fungsi untuk memproses semua folder sekaligus
def process_all_queries():
    results = []
    
    # Iterasi folder Query1-Query10
    for i in range(1, 11):
        folder = base_dir / f"Prompt{i}"
        if not folder.exists():
            print(f"Folder {folder} tidak ditemukan.")
            continue

        user_input_file = folder / "user_input.txt"
        contexts_file = folder / "retrieved_contexts.txt"
        response_file = folder / "response.txt"

        if not all([user_input_file.exists(), contexts_file.exists(), response_file.exists()]):
            print(f"File hilang di {folder}.")
            continue

        try:
            with open(user_input_file, "r", encoding="utf-8") as f:
                user_input = f.read().strip()
            with open(contexts_file, "r", encoding="utf-8") as f:
                # Asumsi konteks per baris; sesuaikan jika pakai delimiter (misalnya, ---)
                retrieved_contexts = [line.strip() for line in f.readlines() if line.strip()]
            with open(response_file, "r", encoding="utf-8") as f:
                response = f.read().strip()

            # Validasi data tidak kosong
            if not user_input or not retrieved_contexts or not response:
                print(f"Data kosong ditemukan di {folder}")
                continue

            # Buat SingleTurnSample
            sample = SingleTurnSample(
                user_input=user_input,
                response=response,
                retrieved_contexts=retrieved_contexts
            )

            print(f"Processing {folder.name}...")
            
            # Inisialisasi LLM dan scorer
            llm = ChatOpenAI(model="gpt-3.5-turbo", api_key=os.getenv("OPENAI_API_KEY"))
            llm_wrapper = LangchainLLMWrapper(llm)
            scorer= Faithfulness(llm=llm_wrapper)
            
            # Evaluasi Faithfulness
            score = asyncio.run(scorer.single_turn_ascore(sample))
            
            if score is not None:
                print(f"Faithfulness Score untuk {folder.name}: {score:.4f}")
                results.append({
                    "folder": folder.name,
                    "question": user_input,
                    "contexts": str(retrieved_contexts),  # Convert list to string for CSV
                    "answer": response,
                    "faithfulness": score
                })
            else:
                print(f"Evaluasi gagal untuk {folder.name}")
                
        except Exception as e:
            print(f"Error memproses {folder.name}: {str(e)}")
            continue

    # Simpan hasil ke CSV
    if results:
        results_df = pd.DataFrame(results)
        output_file = base_dir / "faithfulness_all_queries.csv"
        try:
            results_df.to_csv(output_file, index=False, encoding='utf-8')
            print(f"Hasil disimpan ke {output_file}")
            print(f"Rata-rata Faithfulness: {results_df['faithfulness'].mean():.4f}")
        except Exception as e:
            print(f"Error menyimpan file CSV: {str(e)}")
    else:
        print("Tidak ada hasil untuk disimpan.")

    return results

# Jalankan fungsi
results = process_all_queries()

Processing Prompt1...
Faithfulness Score untuk Prompt1: 1.0000
Processing Prompt2...
Faithfulness Score untuk Prompt2: 1.0000
Processing Prompt3...
Faithfulness Score untuk Prompt3: 1.0000
Processing Prompt4...
Faithfulness Score untuk Prompt4: 1.0000
Processing Prompt5...
Faithfulness Score untuk Prompt5: 1.0000
Processing Prompt6...
Faithfulness Score untuk Prompt6: 1.0000
Processing Prompt7...
Faithfulness Score untuk Prompt7: 0.6667
Processing Prompt8...
Faithfulness Score untuk Prompt8: 0.8000
Processing Prompt9...
Faithfulness Score untuk Prompt9: 1.0000
Processing Prompt10...
Faithfulness Score untuk Prompt10: 1.0000
Hasil disimpan ke D:\Perkuliahan\Sem 8\STBI\Project\information-retrieval\evaluasi\faithfulness_all_queries.csv
Rata-rata Faithfulness: 0.9467
