In [1]:

# ========================== 📦 IMPORT SECTION ==========================
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import time
from ast import literal_eval
from typing import List, Dict

import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm

from ragas.dataset_schema import Sample, EvaluationDataset, SingleTurnSample
from src.tag.src.text2sql_pipeline import generate_sql  # path kamu
from src.tag.evaluation.run_text2sql import run_text2sql_workflow
#from src.tag.src.text2sqlchain2 import generate_sql
from src.tag.src.query_executor import execute_text2sql_response
from src.tag.evaluation.eval_metrics import evaluate_retriever  # gunakan retriever-style
from src.tag.database.db_connection import connect_db
from src.tag.database.schema_loader import load_schema

from langchain_community.utilities import SQLDatabase



  from .autonotebook import tqdm as notebook_tqdm
  embedding_model = OllamaEmbeddings(model="nomic-embed-text")


In [2]:
conn = connect_db()
schema = load_schema(conn)

In [3]:
DATASET_PATH = os.path.join("data", "Dataset Testing 2.xlsx")
df = pd.read_excel(DATASET_PATH)

dataset = []

for i, row in df.iterrows():
    if row["is_valid"]:
        dataset.append({
            "user_input": str(row["user_input"]),
            "reference_contexts": literal_eval(row["reference_contexts_2"])
        })
    if len(dataset) == 15:
        break
print(f"Jumlah soal: {len(dataset)}")


Jumlah soal: 15


In [4]:
def retrieve_contexts_from_text2sql_zero(question: str) -> list[str]:
    try:
        response = generate_sql(schema, question, top_k=100, shot_mode="zero-shot", llm_mode="gemini")
        rows, columns = execute_text2sql_response(conn, response)
        if not rows:
            return ["data tidak ditemukan"]
        return [" | ".join(map(str, row)) for row in rows]
    except Exception as e:
        print(f"[!] Error: {question} → {e}")
        return []


In [11]:
def retrieve_contexts_from_text2sql_few(question: str) -> list[str]:
    try:
        response = generate_sql(schema, question, top_k=100, shot_mode="few-shot", llm_mode="gemini")
        rows, columns = execute_text2sql_response(conn, response)
        if not rows:
            return ["data tidak ditemukan"]
        return [" | ".join(map(str, row)) for row in rows]
    except Exception as e:
        print(f"[!] Error: {question} → {e}")
        return []


In [5]:
samples = []

for item in tqdm(dataset, desc="Menjalankan Text2SQL dan Eksekusi"):
    q = item["user_input"]
    ref = item["reference_contexts"]
    ret = retrieve_contexts_from_text2sql_zero(q)

    sample = SingleTurnSample(
        question=q,
        reference_contexts=ref,
        retrieved_contexts=ret
    )
    samples.append(sample)

evaluation_dataset = EvaluationDataset(samples)


  chain = LLMChain(llm=llm, prompt=prompt)
  return chain.run(inputs).strip()
Menjalankan Text2SQL dan Eksekusi: 100%|██████████| 15/15 [00:44<00:00,  2.94s/it]


In [6]:
result = evaluate_retriever(evaluation_dataset, experiment_name="tag_retriever_v1")
df_result_zero = result.to_pandas()
df_result_zero


Evaluating: 100%|██████████| 30/30 [00:00<00:00, 89.78it/s]


Unnamed: 0,retrieved_contexts,reference_contexts,precision,recall
0,[data tidak ditemukan],[(1) Persentase TKDN untuk belanja modal (cape...,0.0,0.0
1,[200903042500200 | Peraturan Menteri Komunikas...,[Lembaga Penyiaran Asing dilarang didirikan di...,0.0,0.0
2,[201103002500100 | Peraturan Menteri Komunikas...,[(1) Pelaksanaan Diklat REOR sebagaimana dimak...,0.0,0.0
3,[Jaringan Dokumentasi dan Informasi Hukum Keme...,[Dalam Peraturan Menteri ini yang dimaksud den...,0.0,0.0
4,[22 | (1) Registrar Nama Domain Selain Instans...,[Registri Nama Domain dan Registrar Nama Domai...,0.0,0.0
5,[data tidak ditemukan],[Informasi tarif retail layanan jelajah (roami...,0.0,0.0
6,[201503017500100 | Peraturan Menteri Komunikas...,[Pembaca kartu cerdas nirkontak (Contactless S...,1.0,1.0
7,[201003001100000 | Peraturan Menteri Komunikas...,[(1) Laporan sebagaimana dimaksud dalam Pasal ...,0.0,0.0
8,[data tidak ditemukan],[(1) Permohonan Nomor PI dapat dilakukan oleh ...,0.0,0.0
9,[data tidak ditemukan],[Setiap alat dan perangkat telekomunikasi jara...,0.0,0.0


In [10]:
avg_precision = df_result_zero['precision'].mean()
avg_recall = df_result_zero['recall'].mean()

print(f"Rata-rata Precision: {avg_precision:.4f}")
print(f"Rata-rata Recall: {avg_recall:.4f}")

Rata-rata Precision: 0.1111
Rata-rata Recall: 0.1333


In [12]:
samples = []

for item in tqdm(dataset, desc="Menjalankan Text2SQL dan Eksekusi"):
    q = item["user_input"]
    ref = item["reference_contexts"]
    ret = retrieve_contexts_from_text2sql_few(q)

    sample = SingleTurnSample(
        question=q,
        reference_contexts=ref,
        retrieved_contexts=ret
    )
    samples.append(sample)

evaluation_dataset_few = EvaluationDataset(samples)


Menjalankan Text2SQL dan Eksekusi: 100%|██████████| 15/15 [00:58<00:00,  3.93s/it]


In [13]:
result = evaluate_retriever(evaluation_dataset_few, experiment_name="tag_retriever_v2")
df_result_few = result.to_pandas()
df_result_few


Evaluating: 100%|██████████| 30/30 [00:00<00:00, 161.63it/s]


Unnamed: 0,retrieved_contexts,reference_contexts,precision,recall
0,[18A | (1) Dalam penyediaan akses dan layanan ...,[(1) Persentase TKDN untuk belanja modal (cape...,0.0,0.0
1,[9 | Dalam membuka kantor penyiaran asing seba...,[Lembaga Penyiaran Asing dilarang didirikan di...,0.0,0.0
2,[2 | (1) Setiap pengoperasian alat dan perangk...,[(1) Pelaksanaan Diklat REOR sebagaimana dimak...,0.0,0.0
3,[Jaringan Dokumentasi dan Informasi Hukum Keme...,[Dalam Peraturan Menteri ini yang dimaksud den...,0.0,0.0
4,[36 | (1) Pendaftaran Nama Domain sebagaimana ...,[Registri Nama Domain dan Registrar Nama Domai...,1.0,1.0
5,[data tidak ditemukan],[Informasi tarif retail layanan jelajah (roami...,0.0,0.0
6,[1 | Pembaca kartu cerdas nirkontak (Contactle...,[Pembaca kartu cerdas nirkontak (Contactless S...,1.0,1.0
7,[data tidak ditemukan],[(1) Laporan sebagaimana dimaksud dalam Pasal ...,0.0,0.0
8,[1 | Dalam Peraturan Menteri ini yang dimaksud...,[(1) Permohonan Nomor PI dapat dilakukan oleh ...,0.0,0.0
9,[data tidak ditemukan],[Setiap alat dan perangkat telekomunikasi jara...,0.0,0.0


In [14]:
avg_precision = df_result_few['precision'].mean()
avg_recall = df_result_few['recall'].mean()

print(f"Rata-rata Precision: {avg_precision:.4f}")
print(f"Rata-rata Recall: {avg_recall:.4f}")

Rata-rata Precision: 0.1880
Rata-rata Recall: 0.2333


In [9]:
df_result.to_json("tag_retriever_v1.json", index=False)
print("Hasil evaluasi telah disimpan ke 'tag_retriever_v1.json'.")

Hasil evaluasi telah disimpan ke 'tag_retriever_v1.json'.
