In [4]:
import os
import asyncio
import pandas as pd
from ragas.metrics import LLMContextRecall
from ragas.dataset_schema import SingleTurnSample
from ragas.llms import LangchainLLMWrapper
from langchain_openai import ChatOpenAI
from pathlib import Path
from dotenv import load_dotenv

# Muat variabel lingkungan
load_dotenv()

base_dir = Path(r"D:\Perkuliahan\Sem 8\STBI\Project\information-retrieval\evaluasi_relevan_context")

# Fungsi untuk memproses semua folder
def process_all_queries():
    results = []
    
    # Iterasi folder Prompt1-Prompt10
    for i in range(1, 11):
        folder = base_dir / f"Prompt{i}"
        if not folder.exists():
            print(f"Folder {folder} tidak ditemukan.")
            continue

        user_input_file = folder / "user_input.txt"
        contexts_file = folder / "retrieved_contexts.txt"
        ground_truth_file = folder / "ground_truth.txt"

        # Periksa keberadaan semua file
        if not all([user_input_file.exists(), contexts_file.exists(), ground_truth_file.exists()]):
            print(f"File hilang di {folder}.")
            continue

        try:
            # Baca user_input
            with open(user_input_file, "r", encoding="utf-8") as f:
                user_input = f.read().strip()
            # Baca retrieved_contexts
            with open(contexts_file, "r", encoding="utf-8") as f:
                retrieved_contexts = [line.strip() for line in f.readlines() if line.strip()]
            # Baca ground_truth
            with open(ground_truth_file, "r", encoding="utf-8") as f:
                ground_truth = f.read().strip()

            # Validasi data tidak kosong
            if not user_input or not retrieved_contexts or not ground_truth:
                print(f"Data kosong ditemukan di {folder}")
                continue

            # Buat SingleTurnSample
            sample = SingleTurnSample(
                user_input=user_input,
                retrieved_contexts=retrieved_contexts,
                reference=ground_truth
            )

            print(f"Processing {folder.name}...")
            
            # Inisialisasi LLM dan scorer
            llm = ChatOpenAI(model="gpt-3.5-turbo", api_key=os.getenv("OPENAI_API_KEY"))
            llm_wrapper = LangchainLLMWrapper(llm)
            scorer = LLMContextRecall(llm=llm_wrapper)
            
            # Evaluasi Context Relevance
            score = asyncio.run(scorer.single_turn_ascore(sample))
            
            if score is not None:
                print(f"Context Relevance Score untuk {folder.name}: {score:.4f}")
                results.append({
                    "folder": folder.name,
                    "question": user_input,
                    "contexts": str(retrieved_contexts),
                    "context_relevance": score
                })
            else:
                print(f"Evaluasi gagal untuk {folder.name}")
                
        except Exception as e:
            print(f"Error memproses {folder.name}: {str(e)}")
            continue

    # Simpan hasil ke CSV
    if results:
        results_df = pd.DataFrame(results)
        output_file = base_dir / "context_relevance_all_queries.csv"
        try:
            results_df.to_csv(output_file, index=False, encoding='utf-8')
            print(f"Hasil disimpan ke {output_file}")
            print(f"Rata-rata Context Relevance: {results_df['context_relevance'].mean():.4f}")
        except Exception as e:
            print(f"Error menyimpan file CSV: {str(e)}")
    else:
        print("Tidak ada hasil untuk disimpan.")

    return results

# Jalankan fungsi
results = process_all_queries()

Processing Prompt1...
Context Relevance Score untuk Prompt1: 1.0000
Processing Prompt2...
Context Relevance Score untuk Prompt2: 1.0000
Processing Prompt3...
Context Relevance Score untuk Prompt3: 1.0000
Processing Prompt4...
Context Relevance Score untuk Prompt4: 1.0000
Processing Prompt5...
Context Relevance Score untuk Prompt5: 1.0000
Processing Prompt6...
Context Relevance Score untuk Prompt6: 1.0000
Processing Prompt7...
Context Relevance Score untuk Prompt7: 1.0000
Processing Prompt8...
Context Relevance Score untuk Prompt8: 1.0000
Processing Prompt9...
Context Relevance Score untuk Prompt9: 1.0000
Processing Prompt10...
Context Relevance Score untuk Prompt10: 1.0000
Hasil disimpan ke D:\Perkuliahan\Sem 8\STBI\Project\information-retrieval\evaluasi_relevan_context\context_relevance_all_queries.csv
Rata-rata Context Relevance: 1.0000
