In [None]:
import os
import glob
import re
import uuid
import torch
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer, AutoModelForMaskedLM, BitsAndBytesConfig
from qdrant_client import QdrantClient, models
from qdrant_client.models import PointStruct, SparseVector, Distance, VectorParams, SparseVectorParams
from langchain_text_splitters import MarkdownHeaderTextSplitter

# ==========================================
# 1. KONFIGURASI DAN SETUP
# ==========================================

# Path Folder Markdown (SESUAIKAN JIKA PERLU)
FOLDER_PATH = r"C:\\Users\\Ilmu Komputer\\OneDrive\\Desktop\\portofolio\\RAG\\defi-rag-agent\\src\\evaluation\\data_md\\real_md"

# Model IDs
DENSE_MODEL_ID = "Qwen/Qwen3-Embedding-4B"
SPARSE_MODEL_ID = "naver/splade-v3"
QDRANT_PATH = "./qdrant_custom_db"
COLLECTION_NAME = "hybrid_qwen_splade_optimized"

# Setup Device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üöÄ Running on: {device}")

# Konfigurasi Kuantisasi 4-bit (Hemat VRAM GPU)
bnb_config = None
if device == "cuda":
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

# ==========================================
# 2. CLASS EMBEDDER (DENSE + SPARSE)
# ==========================================

def last_token_pool(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
    """Helper function khusus untuk Qwen/GTE embedding agar ambil token terakhir"""
    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
    if left_padding:
        return last_hidden_states[:, -1]
    else:
        sequence_lengths = attention_mask.sum(dim=1) - 1
        batch_size = last_hidden_states.shape[0]
        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]

class CustomEmbedder:
    def __init__(self):
        print("‚è≥ Loading Dense Model (Qwen 4-bit)...")
        self.dense_tokenizer = AutoTokenizer.from_pretrained(DENSE_MODEL_ID, trust_remote_code=True)
        self.dense_model = AutoModel.from_pretrained(
            DENSE_MODEL_ID,
            trust_remote_code=True,
            quantization_config=bnb_config,
            device_map="auto" if device == "cuda" else None,
            torch_dtype=torch.float16 if device == "cuda" else torch.float32
        )
        if device == "cpu": self.dense_model.to("cpu")

        print("‚è≥ Loading Sparse Model (Splade v3)...")
        self.sparse_tokenizer = AutoTokenizer.from_pretrained(SPARSE_MODEL_ID)
        self.sparse_model = AutoModelForMaskedLM.from_pretrained(SPARSE_MODEL_ID)
        self.sparse_model.to(device)

    def get_dense_vector(self, text):
        inputs = self.dense_tokenizer(
            text, max_length=8192, padding=True, truncation=True, return_tensors='pt'
        )
        inputs = {k: v.to(self.dense_model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.dense_model(**inputs)
            embeddings = last_token_pool(outputs.last_hidden_state, inputs['attention_mask'])
            embeddings = F.normalize(embeddings, p=2, dim=1)
            
        return embeddings[0].cpu().tolist()

    def get_sparse_vector(self, text):
        inputs = self.sparse_tokenizer(
            text, return_tensors="pt", padding=True, truncation=True
        )
        inputs = {k: v.to(self.sparse_model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.sparse_model(**inputs)
        
        # SPLADE Logic: ReLU -> Log -> Max Pooling
        logits = outputs.logits[0]
        relu_log = torch.log(1 + torch.relu(logits))
        # Attention mask filter
        attention_mask = inputs["attention_mask"][0].unsqueeze(-1)
        weighted_log = relu_log * attention_mask
        
        max_val, _ = torch.max(weighted_log, dim=0)
        
        # Extract Non-Zero indices
        indices = torch.nonzero(max_val).squeeze().cpu().tolist()
        values = max_val[indices].cpu().tolist()
        
        # Safety check untuk scalar/single output
        if isinstance(indices, int):
            indices = [indices]
            values = [values]
            
        return SparseVector(indices=indices, values=values)

# ==========================================
# 3. SETUP DATABASE QDRANT
# ==========================================

embedder = CustomEmbedder()

print(f"\nüíΩ Membuka database Qdrant lokal di: {QDRANT_PATH}")
client = QdrantClient(path=QDRANT_PATH)

if client.collection_exists(COLLECTION_NAME):
    client.delete_collection(COLLECTION_NAME)

print("‚öôÔ∏è Membuat Collection baru...")
client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config={
        "dense_vector": VectorParams(size=2560, distance=Distance.COSINE) # Qwen size=1536/2560 depending on model version, Qwen2.5-1.5B usually 1536, check output. Qwen-4B might be larger. 
        # UPDATE: Qwen-Embedding default is usually compatible. Let's assume standard logic.
        # NOTE: Jika error size mismatch, cek len(d_vec) sekali print.
    },
    sparse_vectors_config={
        "sparse_vector": SparseVectorParams()
    }
)

# ==========================================
# 4. FUNGSI UTILITY (CLEANING & SPLITTING)
# ==========================================

def clean_title(filename):
    """Membersihkan nama file dari ekstensi dan timestamp (misal: _250130_133808)"""
    name = os.path.splitext(filename)[0]
    # Regex: Hapus pola [-_] diikuti minimal 6 digit angka sampai akhir string
    name = re.sub(r'[-_]\d{6,}.*', '', name)
    # Ganti separator dengan spasi
    name = name.replace("-", " ").replace("_", " ")
    return " ".join(name.split()) # Hapus spasi ganda

headers_to_split_on = [("#", "Header 1"), ("##", "Header 2"), ("###", "Header 3")]
md_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

md_files = glob.glob(os.path.join(FOLDER_PATH, "*.md"))
print(f"üìÇ Ditemukan {len(md_files)} file Markdown.")

# ==========================================
# 5. PROSES EMBEDDING & UPSERT
# ==========================================

points = []
print("üöÄ Mulai proses embedding...")

# LOOP 1: FILE
for file_index, file_path in enumerate(md_files):
    file_name = os.path.basename(file_path)
    
    # BERSIHKAN JUDUL
    doc_title = clean_title(file_name)
    
    with open(file_path, "r", encoding="utf-8") as f:
        file_content = f.read()
        
    docs = md_splitter.split_text(file_content)
    total_chunks = len(docs)
    
    print(f"\nüìÑ File [{file_index+1}/{len(md_files)}]: {file_name}")
    print(f"   üßπ Clean Title: '{doc_title}'")

    # LOOP 2: CHUNK
    for i, doc in enumerate(docs):
        raw_content = doc.page_content.strip()
        if not raw_content: continue

        # Ambil header hierarchy
        header_keys = [name for _, name in headers_to_split_on]
        found_headers = []
        for key in header_keys:
            if key in doc.metadata:
                found_headers.append(doc.metadata[key])
        
        # KONSTRUKSI BREADCRUMB
        # [Judul File, Header 1, Header 2]
        context_chain = [doc_title] + found_headers
        breadcrumb_str = " > ".join(context_chain)
        
        # FORMAT FINAL TEXT
        # Penting: Spasi di sekitar titik dua agar SPLADE & Qwen paham pemisahan
        final_text = f"{breadcrumb_str} : {raw_content}"

        print(f"   üëâ Chunk [{i+1}/{total_chunks}] | {breadcrumb_str}")

        # Create Vectors
        try:
            d_vec = embedder.get_dense_vector(final_text)
            s_vec = embedder.get_sparse_vector(final_text)
            
            # Validasi Size Vector saat runtime pertama kali (untuk safety)
            if file_index == 0 and i == 0:
                print(f"   üìè Detected Dense Vector Size: {len(d_vec)}")

            metadata = doc.metadata
            metadata["source_file"] = file_name
            metadata["doc_title"] = doc_title
            metadata["chunk_index"] = i
            
            points.append(PointStruct(
                id=str(uuid.uuid4()), 
                vector={
                    "dense_vector": d_vec,
                    "sparse_vector": s_vec
                },
                payload={
                    "text": final_text,
                    "original_content": raw_content,
                    "metadata": metadata
                }
            ))
        except Exception as e:
            print(f"   ‚ùå Error embedding chunk: {e}")

# Upload ke Qdrant
if points:
    print(f"\n‚¨ÜÔ∏è Mengupload {len(points)} points ke Qdrant...")
    # Batch upsert jika data sangat banyak (optional optimization)
    BATCH_SIZE = 50
    for i in range(0, len(points), BATCH_SIZE):
        batch = points[i:i+BATCH_SIZE]
        client.upsert(collection_name=COLLECTION_NAME, points=batch)
        print(f"   Saved batch {i} - {i+len(batch)}")
    print("‚úÖ Semua data berhasil diupload!")
else:
    print("‚ö†Ô∏è Tidak ada data point yang dihasilkan.")

# ==========================================
# 6. FUNGSI PENCARIAN (TESTING)
# ==========================================

def search_hybrid(query_text):
    print(f"\nüîç Searching for: '{query_text}'")
    
    q_dense = embedder.get_dense_vector(query_text)
    q_sparse = embedder.get_sparse_vector(query_text)
    
    results = client.query_points(
        collection_name=COLLECTION_NAME,
        prefetch=[
            models.Prefetch(
                query=q_dense, using="dense_vector", limit=20
            ),
            models.Prefetch(
                query=q_sparse, using="sparse_vector", limit=20
            ),
        ],
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        limit=3
    )
    
    for i, hit in enumerate(results.points):
        print(f"\nüèÜ Rank {i+1} (Score: {hit.score:.4f})")
        print(f"   üìÑ Source: {hit.payload['metadata'].get('doc_title', 'Unknown')}")
        print(f"   üìù Content Snippet: {hit.payload['text'][:200]}...")

search_hybrid("Bagaimana struktur kurikulum?")

üöÄ Running on: cuda
‚è≥ Loading Dense Model (Qwen 4-bit)...


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

‚è≥ Loading Sparse Model (Splade v3)...

üíΩ Membuka database Qdrant lokal di: ./qdrant_custom_db
‚öôÔ∏è Membuat Collection baru...
üìÇ Ditemukan 13 file Markdown.
üöÄ Mulai proses embedding...

üìÑ File [1/13]: Dukungan-Pelaksanaan-Kebijakan-Efisiensi-Belanja-APBN-Tahun-Anggaran-2025pdf.md
   üßπ Clean Title: 'Dukungan Pelaksanaan Kebijakan Efisiensi Belanja APBN Tahun Anggaran 2025pdf'
   üëâ Chunk [1/1] | Dukungan Pelaksanaan Kebijakan Efisiensi Belanja APBN Tahun Anggaran 2025pdf > KEMENTERIAN KEUANGAN REPUBLIK INDONESIA > DIREKTORAT JENDERAL PERBENDAHARAAN
   üìè Detected Dense Vector Size: 2560

üìÑ File [2/13]: Dukungan-Pelaksanaan-Kebijakan-Pemerintahpdf.md
   üßπ Clean Title: 'Dukungan Pelaksanaan Kebijakan Pemerintahpdf'
   üëâ Chunk [1/1] | Dukungan Pelaksanaan Kebijakan Pemerintahpdf > KEMENTERIAN KEUANGAN REPUBLIK INDONESIA > DIREKTORAT JENDERAL PERBENDAHARAAN

üìÑ File [3/13]: Efisiensi-KL-TA-2025_250130_133808.md
   üßπ Clean Title: 'Efisiensi KL TA 2025'
   

Error during conversion: ChunkedEncodingError(ProtocolError('Response ended prematurely'))


   üëâ Chunk [2/2] | Efisiensi KL TA 2025 > REPUBLIK INDONESIA > IDENTIFIKASI RENCANA EFISIENSI

üìÑ File [4/13]: Inpres-1-Tahun_2025.md
   üßπ Clean Title: 'Inpres 1 Tahun 2025'
   üëâ Chunk [1/3] | Inpres 1 Tahun 2025
   üëâ Chunk [2/3] | Inpres 1 Tahun 2025 > PRESIDEN REPUBLIK INDONESIA > EFISIENSI BELANJA DALAM PELAKSANAAN ANGGARAN PENDAPATAN DAN BELANJA NEGARA DAN ANGGARAN PENDAPATAN DAN BELANJA DAERAH > TAHUN ANGGARAN 2025
   üëâ Chunk [3/3] | Inpres 1 Tahun 2025 > PRESIDEN REPUBLIK INDONESIA > 2. Menteri Dalam Negeri untuk:

üìÑ File [5/13]: panduan-app-spkdriver.md
   üßπ Clean Title: 'panduan app spkdriver'
   üëâ Chunk [1/4] | panduan app spkdriver
   üëâ Chunk [2/4] | panduan app spkdriver > PANDUAN > PENGGUNAAN APLIKASI SPK-DRIVER USER DRIVER > Universitas Pendidikan Ganesha
   üëâ Chunk [3/4] | panduan app spkdriver > PANDUAN APP SPK-DRIVER
   üëâ Chunk [4/4] | panduan app spkdriver > PANDUAN APP SPK-DRIVER > 8. Data berhasil ditambahkan dan halama serta status

In [2]:
search_hybrid("Siapa Penanggung jawab pertama dalam struktur Duta KIP Alumni?")


üîç Searching for: 'Siapa Penanggung jawab pertama dalam struktur Duta KIP Alumni?'

üèÜ Rank 1 (Score: 1.0000)
   üìÑ Source: SK 815 DUTA KETERBUKAAN INFORMASI PUBLIK ALUMNI TAHUN 2025
   üìù Content Snippet: SK 815 DUTA KETERBUKAAN INFORMASI PUBLIK ALUMNI TAHUN 2025 > KEPUTUSAN REKTOR UNIVERSITAS PENDIDIKAN GANESHA > DUTA KETERBUKAAN INFORMASI PUBLIK (KIP) ALUMNI UNIVERSITAS PENDIDIKAN GANESHA MASA BHAKTI...

üèÜ Rank 2 (Score: 0.6667)
   üìÑ Source: SK 814 DUTA KETERBUKAAN INFORMASI PUBLIK MAHASISWA TAHUN 2025
   üìù Content Snippet: SK 814 DUTA KETERBUKAAN INFORMASI PUBLIK MAHASISWA TAHUN 2025 > KEPUTUSAN REKTOR UNIVERSITAS PENDIDIKAN GANESHA > DUTA KETERBUKAAN INFORMASI PUBLIK (KIP) MAHASISWA UNIVERSITAS PENDIDIKAN GANESHA TAHUN...

üèÜ Rank 3 (Score: 0.4000)
   üìÑ Source: SK 814 DUTA KETERBUKAAN INFORMASI PUBLIK MAHASISWA TAHUN 2025
   üìù Content Snippet: SK 814 DUTA KETERBUKAAN INFORMASI PUBLIK MAHASISWA TAHUN 2025 > KEPUTUSAN REKTOR UNIVERSITAS PENDIDIKAN GANESHA >

In [4]:
search_hybrid("Siapa saja Dosen Pendamping dalam SK ini?")


üîç Query: 'Siapa saja Dosen Pendamping dalam SK ini?'
   Score: 0.5333 | Text: LAMPIRAN > KEPUTUSAN REKTOR UNIVERSITAS PENDIDIKAN GANESHA: **NOMOR** 814/UN48/KM.05.04/2025
**TANGGAL** 17 MARET 2025
**TENTANG** DUTA KETERBUKAAN INFORMASI PUBLIK UNIVERSITAS PENDIDIKAN GANESHA TAHUN 2025  
**Pengarah** : Prof. Dr. I Wayan Lasmawan, M.Pd.  
**Penanggung jawab** :
1. Prof. Dr. Gede Rasben Dantes, S.T., M.T.I.
2. Prof.Dr. I Wayan Artanayasa, S.Pd., M.Pd., AIFO-FIT.
3. Prof. Dr. Drs. I Ketut Sudiana, M.Kes.
4. Drs. I Made Yasa, M.Pd.
5. Prof. Dr. Komang Setemen, S.Si., M.T.
6. I Made Karunia, S.T., M.Kom.  
**Dosen Pendamping** :
1. Dr. Kadek Wirahyuni, S.Pd., M.Pd.
2. I Kadek Edi Yudiana, M.Pd.  
**Ketua** : Komang Danda Widya Anugrah  
<table>
<thead>
<tr>
<th></th>
<th>Nama</th>
<th>NIM</th>
<th>Prodi</th>
<th>Fakultas</th>
</tr>
</thead>
<tbody>
<tr>
<td rowspan="9">Anggota</td>
<td>Kadek Ayu Pitriyani</td>
<td>2215011021</td>
<td>Pendidikan Kesejahteraan Keluarga</td>
<td>FTK</td>
</t

embed dengan e5 base


In [1]:
from langchain_text_splitters import MarkdownHeaderTextSplitter
from qdrant_client.http import models
import torch
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer, AutoModelForMaskedLM
from qdrant_client import QdrantClient, models
from qdrant_client.models import PointStruct, SparseVector
import os
import glob
import uuid

# ==========================================
# 1. KONFIGURASI DAN UTILITAS
# ==========================================
# Model IDs
DENSE_MODEL_ID = "intfloat/multilingual-e5-base"
SPARSE_MODEL_ID = "naver/splade-v3"

# Setup Device (GPU Prioritas)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üöÄ Running on: {device}")

# --- Helper Function untuk E5-Base (Average Pooling) ---
def average_pool(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
    """
    Standard pooling untuk model E5 (Average Pooling).
    Menggantikan last_token_pool yang khusus untuk Qwen/LLM decoder-only.
    """
    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
    return last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]

class CustomEmbedder:
    def __init__(self):
        print(f"‚è≥ Loading Dense Model ({DENSE_MODEL_ID}) without quantization...")
        self.dense_tokenizer = AutoTokenizer.from_pretrained(DENSE_MODEL_ID)
        
        # MODIFIKASI: Load Model Tanpa Quantization Config
        self.dense_model = AutoModel.from_pretrained(
            DENSE_MODEL_ID,
            trust_remote_code=True,
            # quantization_config dihapus
            # device_map="auto" dihapus agar kita bisa kontrol manual dengan .to(device)
        )
        self.dense_model.to(device) # Pindah ke GPU/CPU secara eksplisit

        print("‚è≥ Loading Sparse Model (Splade v3)...")
        self.sparse_tokenizer = AutoTokenizer.from_pretrained(SPARSE_MODEL_ID)
        self.sparse_model = AutoModelForMaskedLM.from_pretrained(SPARSE_MODEL_ID)
        self.sparse_model.to(device) 

    def get_dense_vector(self, text):
        """Mengubah teks menjadi vektor dense 768 dimensi (E5-Base)"""
        # 1. Tokenize (Tambahkan "query: " jika ini adalah query, tapi untuk dokumen gunakan "passage: " sesuai paper E5)
        # Sederhananya kita tokenize biasa dulu:
        inputs = self.dense_tokenizer(
            text, 
            max_length=512, # E5 base limitnya 512, bukan 8192 (Qwen)
            padding=True, 
            truncation=True, 
            return_tensors='pt'
        )
        
        # 2. Pindahkan ke Device
        inputs = {k: v.to(self.dense_model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.dense_model(**inputs)
            # MODIFIKASI: Gunakan Average Pooling untuk E5
            embeddings = average_pool(outputs.last_hidden_state, inputs['attention_mask'])
            
            # Normalisasi
            embeddings = F.normalize(embeddings, p=2, dim=1)
            
        return embeddings[0].cpu().tolist()

    def get_sparse_vector(self, text):
        """Mengubah teks menjadi sparse vector (Splade v3)"""
        inputs = self.sparse_tokenizer(
            text, 
            return_tensors="pt", 
            padding=True, 
            truncation=True
        )
        inputs = {k: v.to(self.sparse_model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.sparse_model(**inputs)
        
        logits = outputs.logits[0]
        attention_mask = inputs["attention_mask"][0].unsqueeze(-1)
        
        relu_log = torch.log(1 + torch.relu(logits))
        weighted_log = relu_log * attention_mask
        max_val, _ = torch.max(weighted_log, dim=0)
        
        indices = torch.nonzero(max_val).squeeze().cpu().tolist()
        values = max_val[indices].cpu().tolist()
        
        if isinstance(indices, int):
            indices = [indices]
            values = [values]
            
        return SparseVector(indices=indices, values=values)

# ==========================================
# 2. MAIN LOGIC
# ==========================================

# Inisialisasi Embedder
embedder = CustomEmbedder()

# Inisialisasi Qdrant (Local Mode)
print("\nüíΩ Membuka database Qdrant lokal...")
client = QdrantClient(path="./qdrant_custom_db") 
COLLECTION_NAME = "hybrid_e5_splade_no_quant"

# Setup Collection
if client.collection_exists(COLLECTION_NAME):
    client.delete_collection(COLLECTION_NAME)

print("‚öôÔ∏è Membuat Collection baru...")
client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config={
        "dense_vector": models.VectorParams(
            size=768, # MODIFIKASI: E5-Base ukurannya 768, bukan 2560
            distance=models.Distance.COSINE
        )
    },
    sparse_vectors_config={
        "sparse_vector": models.SparseVectorParams()
    }
)

# --- Konfigurasi Splitter ---
# Ganti path ini sesuai path Anda
folder_path = r"C:\\Users\\Ilmu Komputer\\OneDrive\\Desktop\\portofolio\\RAG\\defi-rag-agent\\src\\evaluation\\data_md\\real_md"
md_files = glob.glob(os.path.join(folder_path, "*.md"))

headers_to_split_on = [("#", "Header 1"), ("##", "Header 2"), ("###", "Header 3")]
md_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

print(f"üìÇ Ditemukan {len(md_files)} file Markdown.")
print("üöÄ Mulai proses embedding (Tanpa Quantization)...")

points = []

# LOOP FILE & CHUNK
for file_index, file_path in enumerate(md_files):
    file_name = os.path.basename(file_path)
    
    with open(file_path, "r", encoding="utf-8") as f:
        file_content = f.read()
        
    docs = md_splitter.split_text(file_content)
    total_chunks = len(docs)
    
    print(f"\nüìÑ File [{file_index+1}/{len(md_files)}]: {file_name} | Total Chunks: {total_chunks}")

    for i, doc in enumerate(docs):
        raw_content = doc.page_content
        if not raw_content.strip():
            continue

        header_keys = [name for _, name in headers_to_split_on]
        found_headers = []
        for key in header_keys:
            if key in doc.metadata:
                found_headers.append(doc.metadata[key])
        
        header_breadcrumb = " > ".join(found_headers)
        
        # E5 best practice: tambahkan prefix "passage: " untuk dokumen
        # Namun jika ingin plain text, pastikan konsisten dengan query nanti
        if header_breadcrumb:
            final_text = f"passage: {header_breadcrumb}: {raw_content}" 
        else:
            final_text = f"passage: {raw_content}"

        display_header = header_breadcrumb if header_breadcrumb else "(Tanpa Header)"
        print(f"   üëâ Chunk [{i+1}/{total_chunks}] | Header: {display_header}")

        # Generate vectors
        d_vec = embedder.get_dense_vector(final_text)
        s_vec = embedder.get_sparse_vector(final_text) # Splade biasanya tidak perlu prefix passage
        
        metadata = doc.metadata
        metadata["source_file"] = file_name
        metadata["chunk_index"] = i

        points.append(PointStruct(
            id=str(uuid.uuid4()), 
            vector={
                "dense_vector": d_vec,
                "sparse_vector": s_vec
            },
            payload={
                "text": final_text, # Menyimpan text asli (dengan prefix)
                "metadata": metadata
            }
        ))

# Upload ke Qdrant
if points:
    print(f"\n‚¨ÜÔ∏è Mengupload {len(points)} points ke Qdrant...")
    client.upsert(
        collection_name=COLLECTION_NAME,
        points=points
    )
    print("‚úÖ Semua data berhasil diupload!")
else:
    print("‚ö†Ô∏è Tidak ada data point yang dihasilkan.")

# ==========================================
# 3. FUNGSI PENCARIAN (UPDATED)
# ==========================================
def search_hybrid(query_text):
    print(f"\nüîç Query: '{query_text}'")
    
    # E5 best practice: tambahkan prefix "query: " untuk pencarian
    dense_query_text = f"query: {query_text}"
    
    # Embed Query
    q_dense = embedder.get_dense_vector(dense_query_text)
    q_sparse = embedder.get_sparse_vector(query_text) # Splade pakai raw text
    
    results = client.query_points(
        collection_name=COLLECTION_NAME,
        prefetch=[
            models.Prefetch(
                query=q_dense,
                using="dense_vector",
                limit=10
            ),
            models.Prefetch(
                query=q_sparse,
                using="sparse_vector",
                limit=10
            ),
        ],
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        limit=3
    )
    
    for hit in results.points:
        print(f"   Score: {hit.score:.4f} | Text Sample: {hit.payload['text'][:100]}...")

# Test Case
search_hybrid("Bagaimana membuat nasi goreng dengan aroma smoky?")

üöÄ Running on: cuda
‚è≥ Loading Dense Model (intfloat/multilingual-e5-base) without quantization...
‚è≥ Loading Sparse Model (Splade v3)...

üíΩ Membuka database Qdrant lokal...
‚öôÔ∏è Membuat Collection baru...
üìÇ Ditemukan 13 file Markdown.
üöÄ Mulai proses embedding (Tanpa Quantization)...

üìÑ File [1/13]: Dukungan-Pelaksanaan-Kebijakan-Efisiensi-Belanja-APBN-Tahun-Anggaran-2025pdf.md | Total Chunks: 1
   üëâ Chunk [1/1] | Header: KEMENTERIAN KEUANGAN REPUBLIK INDONESIA > DIREKTORAT JENDERAL PERBENDAHARAAN

üìÑ File [2/13]: Dukungan-Pelaksanaan-Kebijakan-Pemerintahpdf.md | Total Chunks: 1
   üëâ Chunk [1/1] | Header: KEMENTERIAN KEUANGAN REPUBLIK INDONESIA > DIREKTORAT JENDERAL PERBENDAHARAAN

üìÑ File [3/13]: Efisiensi-KL-TA-2025_250130_133808.md | Total Chunks: 2
   üëâ Chunk [1/2] | Header: REPUBLIK INDONESIA
   üëâ Chunk [2/2] | Header: REPUBLIK INDONESIA > IDENTIFIKASI RENCANA EFISIENSI

üìÑ File [4/13]: Inpres-1-Tahun_2025.md | Total Chunks: 3
   üëâ Chunk [1/3