In [1]:

# ========================== 📦 IMPORT SECTION ==========================
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), "..")))

import time
from ast import literal_eval
from typing import List, Dict

import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm

from ragas.dataset_schema import Sample, EvaluationDataset, SingleTurnSample
from src.tag.src.text2sql_pipeline import generate_sql  # path kamu
from src.tag.evaluation.run_text2sql import run_text2sql_workflow
#from src.tag.src.text2sqlchain2 import generate_sql
from src.tag.src.query_executor import execute_text2sql_response
from src.tag.evaluation.eval_metrics import evaluate_retriever  # gunakan retriever-style
from src.tag.database.db_connection import connect_db
from src.tag.database.schema_loader import load_schema

from langchain_community.utilities import SQLDatabase



  from .autonotebook import tqdm as notebook_tqdm
  embedding_model = OllamaEmbeddings(model="nomic-embed-text")


In [2]:
conn = connect_db()
schema = load_schema(conn)

In [3]:
DATASET_PATH = os.path.join("data", "Dataset Testing 2.xlsx")
df = pd.read_excel(DATASET_PATH)

dataset = []

for i, row in df.iterrows():
    if row["is_valid"]:
        dataset.append({
            "user_input": str(row["user_input"]),
            "reference_contexts": literal_eval(row["reference_contexts_2"])
        })
    if len(dataset) == 25:
        break
print(f"Jumlah soal: {len(dataset)}")


Jumlah soal: 25


In [None]:
def retrieve_contexts_from_text2sql_zero(question: str) -> list[str]:
    try:
        response = generate_sql(schema, question, top_k=100, shot_mode="zero-shot", llm_mode="gemini")
        rows, columns = execute_text2sql_response(conn, response)
        if not rows:
            return ["data tidak ditemukan"]
        return [" | ".join(map(str, row)) for row in rows]
    except Exception as e:
        print(f"[!] Error: {question} → {e}")
        return []


In [4]:
def retrieve_contexts_from_text2sql_few(question: str) -> list[str]:
    try:
        response = generate_sql(schema, question, top_k=100, shot_mode="few-shot", llm_mode="gemini")
        rows, columns = execute_text2sql_response(conn, response)
        if not rows:
            return ["data tidak ditemukan"]
        return [" | ".join(map(str, row)) for row in rows]
    except Exception as e:
        print(f"[!] Error: {question} → {e}")
        return []


In [None]:
samples = []

for item in tqdm(dataset, desc="Menjalankan Text2SQL dan Eksekusi"):
    q = item["user_input"]
    ref = item["reference_contexts"]
    ret = retrieve_contexts_from_text2sql_zero(q)

    sample = SingleTurnSample(
        question=q,
        reference_contexts=ref,
        retrieved_contexts=ret
    )
    samples.append(sample)

evaluation_dataset = EvaluationDataset(samples)


In [None]:
result = evaluate_retriever(evaluation_dataset, experiment_name="tag_retriever_v1")
df_result_zero = result.to_pandas()
df_result_zero


In [None]:
avg_precision = df_result_zero['precision'].mean()
avg_recall = df_result_zero['recall'].mean()

print(f"Rata-rata Precision: {avg_precision:.4f}")
print(f"Rata-rata Recall: {avg_recall:.4f}")

In [5]:
import time
from tqdm import tqdm

samples = []

for i, item in enumerate(tqdm(dataset, desc="Menjalankan Text2SQL dan Eksekusi")):
    q = item["user_input"]
    ref = item["reference_contexts"]

    try:
        ret = retrieve_contexts_from_text2sql_few(q)
    except Exception as e:
        print(f"[!] Error executing SQL for question: {q}")
        print(f"    → {e}")
        
        # Lakukan rollback untuk mengakhiri transaction yang gagal
        try:
            conn.rollback()
            print("[✓] Transaction rollback executed.")
        except Exception as rollback_err:
            print(f"[X] Failed to rollback transaction: {rollback_err}")
        
        ret = []  # Tetap buat list kosong agar tidak error saat membuat sample

    sample = SingleTurnSample(
        question=q,
        reference_contexts=ref,
        retrieved_contexts=ret
    )
    samples.append(sample)

    # Pause setiap 10 pertanyaan
    if (i + 1) % 10 == 0:
        print(f"[i] Processed {i+1} samples. Sleeping for 10 seconds...")
        time.sleep(10)

evaluation_dataset_few = EvaluationDataset(samples)


Menjalankan Text2SQL dan Eksekusi:   0%|          | 0/25 [00:00<?, ?it/s]

  chain = LLMChain(llm=llm, prompt=prompt)
  return chain.run(inputs).strip()
Menjalankan Text2SQL dan Eksekusi:  36%|███▌      | 9/25 [00:41<01:14,  4.66s/it]

[i] Processed 10 samples. Sleeping for 10 seconds...


Menjalankan Text2SQL dan Eksekusi:  76%|███████▌  | 19/25 [01:36<00:28,  4.77s/it]

[i] Processed 20 samples. Sleeping for 10 seconds...


Menjalankan Text2SQL dan Eksekusi: 100%|██████████| 25/25 [02:14<00:00,  5.37s/it]


In [None]:
evaluation_dataset_few = EvaluationDataset(samples)

In [6]:
result = evaluate_retriever(evaluation_dataset_few, experiment_name="tag_retriever_v2")
df_result_few = result.to_pandas()
df_result_few


Evaluating: 100%|██████████| 50/50 [00:00<00:00, 164.28it/s]


Unnamed: 0,retrieved_contexts,reference_contexts,precision,recall
0,[1 | Dalam Peraturan Menteri ini yang dimaksud...,[(1) Persentase TKDN untuk belanja modal (cape...,0.0,0.0
1,[data tidak ditemukan],[Lembaga Penyiaran Asing dilarang didirikan di...,0.0,0.0
2,[2 | (1) Setiap pengoperasian alat dan perangk...,[(1) Pelaksanaan Diklat REOR sebagaimana dimak...,0.142857,0.666667
3,[data tidak ditemukan],[Dalam Peraturan Menteri ini yang dimaksud den...,0.0,0.0
4,[36 | (1) Pendaftaran Nama Domain sebagaimana ...,[Registri Nama Domain dan Registrar Nama Domai...,0.333333,0.5
5,[data tidak ditemukan],[Informasi tarif retail layanan jelajah (roami...,0.0,0.0
6,[1 | Peraturan Menteri Komunikasi dan Informat...,[Pembaca kartu cerdas nirkontak (Contactless S...,1.0,1.0
7,[Pasal 2 PERMENKOMINFO Nomor 26 Tahun 2015 | 2...,[(1) Laporan sebagaimana dimaksud dalam Pasal ...,0.125,0.333333
8,[1 | Dalam Peraturan Menteri ini yang dimaksud...,[(1) Permohonan Nomor PI dapat dilakukan oleh ...,0.0,0.0
9,[data tidak ditemukan],[Setiap alat dan perangkat telekomunikasi jara...,0.0,0.0


In [8]:
avg_precision = df_result_few['precision'].mean()
avg_recall = df_result_few['recall'].mean()

print(f"Rata-rata Precision: {avg_precision:.4f}")
print(f"Rata-rata Recall: {avg_recall:.4f}")

Rata-rata Precision: 0.1240
Rata-rata Recall: 0.1733


In [None]:
df_result.to_json("tag_retriever_v1.json", index=False)
print("Hasil evaluasi telah disimpan ke 'tag_retriever_v1.json'.")