In [None]:
from langchain_text_splitters import MarkdownHeaderTextSplitter
from qdrant_client.http import models
import torch
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer, AutoModelForMaskedLM, BitsAndBytesConfig
from qdrant_client import QdrantClient, models
from qdrant_client.models import PointStruct, SparseVector

# ==========================================
# 1. KONFIGURASI DAN UTILITAS
# ==========================================
# Model IDs
DENSE_MODEL_ID = "Qwen/Qwen3-Embedding-4B"
SPARSE_MODEL_ID = "naver/splade-v3"

# Setup Device (GPU Prioritas)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üöÄ Running on: {device}")

# Konfigurasi Kuantisasi 4-bit (Hanya aktif jika di CUDA)
bnb_config = None
if device == "cuda":
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

class CustomEmbedder:
    def __init__(self):
        print("‚è≥ Loading Dense Model (Qwen 4-bit)...")
        self.dense_tokenizer = AutoTokenizer.from_pretrained(DENSE_MODEL_ID, trust_remote_code=True)
        
        # Load Model dengan Kuantisasi
        self.dense_model = AutoModel.from_pretrained(
            DENSE_MODEL_ID,
            trust_remote_code=True,
            quantization_config=bnb_config, # Aktifkan 4-bit
            device_map="auto" if device == "cuda" else None,
            torch_dtype=torch.float16 if device == "cuda"
        )
        # Jika CPU, manual pindah (karena quantization_config hanya support GPU biasanya)
        if device == "cpu":
            self.dense_model.to("cpu")

        print("‚è≥ Loading Sparse Model (Splade v3)...")
        self.sparse_tokenizer = AutoTokenizer.from_pretrained(SPARSE_MODEL_ID)
        self.sparse_model = AutoModelForMaskedLM.from_pretrained(SPARSE_MODEL_ID)
        self.sparse_model.to(device) # Pindah model ke GPU

    def get_dense_vector(self, text):
        """Mengubah teks menjadi vektor dense 1536 dimensi (Qwen)"""
        # 1. Tokenize
        inputs = self.dense_tokenizer(
            text, 
            max_length=8192, 
            padding=True, 
            truncation=True, 
            return_tensors='pt'
        )
        
        # 2. PINDAHKAN INPUT KE GPU (FIX ERROR DEVICE)
        inputs = {k: v.to(self.dense_model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.dense_model(**inputs)
            # Untuk GTE-Qwen, embedding diambil dari last_hidden_state pada token terakhir (EOS)
            # Sesuai dokumentasi resmi Alibaba-NLP
            embeddings = last_token_pool(outputs.last_hidden_state, inputs['attention_mask'])
            
            # Normalisasi
            embeddings = F.normalize(embeddings, p=2, dim=1)
            
        return embeddings[0].cpu().tolist()

    def get_sparse_vector(self, text):
        """Mengubah teks menjadi sparse vector (Splade v3)"""
        # 1. Tokenize
        inputs = self.sparse_tokenizer(
            text, 
            return_tensors="pt", 
            padding=True, 
            truncation=True
        )
        
        # 2. PINDAHKAN INPUT KE GPU (FIX ERROR DEVICE)
        inputs = {k: v.to(self.sparse_model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.sparse_model(**inputs)
        
        # 3. Logika SPLADE (ReLU -> Log -> Max)
        logits = outputs.logits[0] # (seq_len, vocab_size)
        attention_mask = inputs["attention_mask"][0].unsqueeze(-1)
        
        # SPLADE formula
        relu_log = torch.log(1 + torch.relu(logits))
        weighted_log = relu_log * attention_mask
        
        # Max Pooling (ambil nilai maksimum tiap token di seluruh sequence)
        max_val, _ = torch.max(weighted_log, dim=0)
        
        # Filter nilai 0 (Sparse)
        indices = torch.nonzero(max_val).squeeze().cpu().tolist()
        values = max_val[indices].cpu().tolist()
        
        # Handle jika indices cuma 1 angka (bukan list)
        if isinstance(indices, int):
            indices = [indices]
            values = [values]
            
        return SparseVector(indices=indices, values=values)

def last_token_pool(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
    """Helper function khusus untuk Qwen/GTE embedding"""
    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
    if left_padding:
        return last_hidden_states[:, -1]
    else:
        sequence_lengths = attention_mask.sum(dim=1) - 1
        batch_size = last_hidden_states.shape[0]
        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
    








# ==========================================
# 2. MAIN LOGIC
# ==========================================

# Inisialisasi Embedder
embedder = CustomEmbedder()

# Inisialisasi Qdrant (Local Mode)
print("\nüíΩ Membuka database Qdrant lokal...")
client = QdrantClient(path="./qdrant_custom_db") 
COLLECTION_NAME = "hybrid_qwen_splade"

# Setup Collection (Hapus dulu kalau sudah ada biar bersih)
if client.collection_exists(COLLECTION_NAME):
    client.delete_collection(COLLECTION_NAME)

print("‚öôÔ∏è Membuat Collection baru...")
client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config={
        "dense_vector": models.VectorParams(
            size=2560,
            distance=models.Distance.COSINE
        )
    },
    sparse_vectors_config={
        "sparse_vector": models.SparseVectorParams()
    }
)


















headers_to_split_on = [("#", "Header 1"), ("##", "Header 2"), ("###", "Header 3")]
md_splitter = MarkdownHeaderTextSplitter(headers_to_split_on=headers_to_split_on)

md_path = "C:\\Users\\Ilmu Komputer\\OneDrive\\Desktop\\portofolio\\RAG\\defi-rag-agent\\src\\evaluation\\data_md\\parsed_document.md"

with open(md_path, "r", encoding="utf-8") as f:
    file_content = f.read()
    # docs ini adalah list dari objek Document, BUKAN list string
    docs = md_splitter.split_text(file_content) 

print("üöÄ Mulai proses embedding dan upload...")
points = []

# Ubah nama variabel loop dari 'text' ke 'doc' agar tidak bingung
for i, doc in enumerate(docs):
    print(f"   Processing doc {i+1}/{len(docs)}...")
    
    # AMBIL CONTENT TEKS NYA SAJA
    real_text = doc.page_content  
    
    # (Opsional) Gabungkan metadata header ke dalam teks untuk konteks embedding yang lebih baik
    # context_text = f"{doc.metadata} \n {real_text}" 
    # Tapi untuk sekarang kita pakai real_text saja agar sesuai ekspektasi Anda:
    
    # Generate vectors (sekarang inputnya sudah pasti string)
    d_vec = embedder.get_dense_vector(real_text)
    s_vec = embedder.get_sparse_vector(real_text)
    
    # Siapkan payload
    # Kita masukkan juga metadata (header) agar nanti bisa difilter di Qdrant
    payload_data = {
        "text": real_text,
        "metadata": doc.metadata 
    }

    # Buat PointStruct
    points.append(PointStruct(
        id=i,
        vector={
            "dense_vector": d_vec,
            "sparse_vector": s_vec
        },
        payload=payload_data
    ))

# Upload ke Qdrant
client.upsert(
    collection_name=COLLECTION_NAME,
    points=points
)
print("‚úÖ Data berhasil diupload!")






























# 3. FUNGSI PENCARIAN
# ==========================================
def search_hybrid(query_text):
    print(f"\nüîç Query: '{query_text}'")
    
    # Embed Query
    q_dense = embedder.get_dense_vector(query_text)
    q_sparse = embedder.get_sparse_vector(query_text)
    
    # Search dengan RRF (Reciprocal Rank Fusion)
    results = client.query_points(
        collection_name=COLLECTION_NAME,
        prefetch=[
            models.Prefetch(
                query=q_dense,
                using="dense_vector",
                limit=10
            ),
            models.Prefetch(
                query=q_sparse,
                using="sparse_vector",
                limit=10
            ),
        ],
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        limit=3
    )
    
    for hit in results.points:
        print(f"   Score: {hit.score:.4f} | Text: {hit.payload['text']}")

# Test Case
search_hybrid("Bagaimana membuat nasi goreng dengan aroma smoky?")

In [None]:
search_hybrid("apa matakuliah yang harus diambil prodi Bimbingan konseling semester 1?")

In [None]:
search_hybrid("Apa tugas dosen PA?")

In [None]:

# 3. FUNGSI PENCARIAN
# ==========================================
def search_hybrid(query_text):
    print(f"\nüîç Query: '{query_text}'")
    
    # Embed Query
    q_dense = embedder.get_dense_vector(query_text)
    q_sparse = embedder.get_sparse_vector(query_text)
    
    # Search dengan RRF (Reciprocal Rank Fusion)
    results = client.query_points(
        collection_name=COLLECTION_NAME,
        prefetch=[
            models.Prefetch(
                query=q_dense,
                using="dense_vector",
                limit=10
            ),
            models.Prefetch(
                query=q_sparse,
                using="sparse_vector",
                limit=10
            ),
        ],
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        limit=3
    )
    
    for i, hit in enumerate(results.points, start=1):
        print(f"--- RESULT {i} ---")                 # Pemisah antar looping
        print(f"Score: {hit.score:.4f}")            # Tampilkan skor
        print(f"Text:\n{hit.payload['text']}")      # Teks di bawah skor
        print("-" * 40)                             # Garis pemisah tambahan


# Test Case
search_hybrid("Apa makna Duduk bersila dewa ganesha di logo undiksha?")

In [None]:
search_hybrid("Apa warna bendera FMIPA?")

In [None]:
search_hybrid("Bagaimana Prosedur Permohonan Aktif Kembali Setelah Cuti Akademik ")

In [None]:
search_hybrid("Apa visi universitas pendidikan ganesha?")

In [None]:
search_hybrid("Apa visi misi universitas pendidikan ganesha?")

In [None]:
search_hybrid("Berapa jumlah sks yang dapat diambil jika IP diatas 3?")

In [None]:
search_hybrid("Berapa min kehadiran untuk mengikuti UAS?")

In [None]:
search_hybrid("Butuh berapa SKS agar mahasiswa dapat mengajukan Skripsi")

In [None]:
search_hybrid("Pakaian apa yang tidak diperkenankan untuk mengikuti perkuliahan?")

In [None]:
search_hybrid("saya adalah mahasiswa ilkom semester 7, bisa ga pindah ke prodi pendidikan bahasa inggris?")

In [None]:
search_hybrid("siapa aja dosen di prodi biologi?")

In [2]:
from langchain_text_splitters import MarkdownHeaderTextSplitter
from qdrant_client.http import models
import torch
import torch.nn.functional as F
from transformers import AutoModel, AutoTokenizer, AutoModelForMaskedLM, BitsAndBytesConfig
from qdrant_client import QdrantClient, models
from qdrant_client.models import PointStruct, SparseVector

# ==========================================
# 1. KONFIGURASI DAN UTILITAS
# ==========================================
# Model IDs
DENSE_MODEL_ID = "Qwen/Qwen3-Embedding-4B"
SPARSE_MODEL_ID = "naver/splade-v3"

# Setup Device (GPU Prioritas)
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üöÄ Running on: {device}")

# Konfigurasi Kuantisasi 4-bit (Hanya aktif jika di CUDA)
bnb_config = None
if device == "cuda":
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.float16
    )

class CustomEmbedder:
    def __init__(self):
        print("‚è≥ Loading Dense Model (Qwen 4-bit)...")
        self.dense_tokenizer = AutoTokenizer.from_pretrained(DENSE_MODEL_ID, trust_remote_code=True)
        
        # Load Model dengan Kuantisasi
        self.dense_model = AutoModel.from_pretrained(
            DENSE_MODEL_ID,
            trust_remote_code=True,
            quantization_config=bnb_config, # Aktifkan 4-bit
            device_map="auto" if device == "cuda" else None,
            torch_dtype=torch.float16 if device == "cuda" else "cpu"
        )
        # Jika CPU, manual pindah (karena quantization_config hanya support GPU biasanya)
        if device == "cpu":
            self.dense_model.to("cpu")

        print("‚è≥ Loading Sparse Model (Splade v3)...")
        self.sparse_tokenizer = AutoTokenizer.from_pretrained(SPARSE_MODEL_ID)
        self.sparse_model = AutoModelForMaskedLM.from_pretrained(SPARSE_MODEL_ID)
        self.sparse_model.to(device) # Pindah model ke GPU

    def get_dense_vector(self, text):
        """Mengubah teks menjadi vektor dense 1536 dimensi (Qwen)"""
        # 1. Tokenize
        inputs = self.dense_tokenizer(
            text, 
            max_length=8192, 
            padding=True, 
            truncation=True, 
            return_tensors='pt'
        )
        
        # 2. PINDAHKAN INPUT KE GPU (FIX ERROR DEVICE)
        inputs = {k: v.to(self.dense_model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.dense_model(**inputs)
            # Untuk GTE-Qwen, embedding diambil dari last_hidden_state pada token terakhir (EOS)
            # Sesuai dokumentasi resmi Alibaba-NLP
            embeddings = last_token_pool(outputs.last_hidden_state, inputs['attention_mask'])
            
            # Normalisasi
            embeddings = F.normalize(embeddings, p=2, dim=1)
            
        return embeddings[0].cpu().tolist()

    def get_sparse_vector(self, text):
        """Mengubah teks menjadi sparse vector (Splade v3)"""
        # 1. Tokenize
        inputs = self.sparse_tokenizer(
            text, 
            return_tensors="pt", 
            padding=True, 
            truncation=True
        )
        
        # 2. PINDAHKAN INPUT KE GPU (FIX ERROR DEVICE)
        inputs = {k: v.to(self.sparse_model.device) for k, v in inputs.items()}

        with torch.no_grad():
            outputs = self.sparse_model(**inputs)
        
        # 3. Logika SPLADE (ReLU -> Log -> Max)
        logits = outputs.logits[0] # (seq_len, vocab_size)
        attention_mask = inputs["attention_mask"][0].unsqueeze(-1)
        
        # SPLADE formula
        relu_log = torch.log(1 + torch.relu(logits))
        weighted_log = relu_log * attention_mask
        
        # Max Pooling (ambil nilai maksimum tiap token di seluruh sequence)
        max_val, _ = torch.max(weighted_log, dim=0)
        
        # Filter nilai 0 (Sparse)
        indices = torch.nonzero(max_val).squeeze().cpu().tolist()
        values = max_val[indices].cpu().tolist()
        
        # Handle jika indices cuma 1 angka (bukan list)
        if isinstance(indices, int):
            indices = [indices]
            values = [values]
            
        return SparseVector(indices=indices, values=values)

def last_token_pool(last_hidden_states: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
    """Helper function khusus untuk Qwen/GTE embedding"""
    left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
    if left_padding:
        return last_hidden_states[:, -1]
    else:
        sequence_lengths = attention_mask.sum(dim=1) - 1
        batch_size = last_hidden_states.shape[0]
        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
    








# ==========================================
# 2. MAIN LOGIC
# ==========================================

# Inisialisasi Embedder
embedder = CustomEmbedder()

# Inisialisasi Qdrant (Local Mode)
print("\nüíΩ Membuka database Qdrant lokal...")
client = QdrantClient(path="./qdrant_custom_db") 
COLLECTION_NAME = "hybrid_qwen_splade"



# 3. FUNGSI PENCARIAN
# ==========================================
def search_hybrid(query_text):
    print(f"\nüîç Query: '{query_text}'")
    
    # Embed Query
    q_dense = embedder.get_dense_vector(query_text)
    q_sparse = embedder.get_sparse_vector(query_text)
    
    # Search dengan RRF (Reciprocal Rank Fusion)
    results = client.query_points(
        collection_name=COLLECTION_NAME,
        prefetch=[
            models.Prefetch(
                query=q_dense,
                using="dense_vector",
                limit=10
            ),
            models.Prefetch(
                query=q_sparse,
                using="sparse_vector",
                limit=10
            ),
        ],
        query=models.FusionQuery(fusion=models.Fusion.RRF),
        limit=3
    )
    



    for i, hit in enumerate(results.points, start=1):
        print(f"--- RESULT {i} ---")                 # Pemisah antar looping
        print(f"Score: {hit.score:.4f}")            # Tampilkan skor
        print(f"Judul:\n{hit.payload['metadata']}")      # Teks di bawah skor
        print(f"Text:\n{hit.payload['text']}")      # Teks di bawah skor
        print("-" * 40) 

# Test Case
search_hybrid("Berapa sks agar bisa ambil skripsi?")

üöÄ Running on: cuda
‚è≥ Loading Dense Model (Qwen 4-bit)...


`torch_dtype` is deprecated! Use `dtype` instead!


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

‚è≥ Loading Sparse Model (Splade v3)...

üíΩ Membuka database Qdrant lokal...

üîç Query: 'Berapa sks agar bisa ambil skripsi?'
--- RESULT 1 ---
Score: 0.8333
Judul:
{'Header 1': 'Skripsi'}
Text:
| No  | Nama MK | Kode MK | Bobot SKS/JS | Semester | MK Prasyarat |
| --- | ------- | ------- | ------------ | -------- | ------------ |
| 72. | Skripsi | MNJ1877 | 6/6          | 8        |              |  
Jumlah SKS: 6/6  
TOTAL SKS: 152/152  
Keterangan: Untuk mata kuliah bersyarat dapat ditempuh jika mata kuliah yang diprasyaratkan telah memperoleh nilai minimal C.
----------------------------------------
--- RESULT 2 ---
Score: 0.5000
Judul:
{'Header 1': '7.1.1 Penjelasan Umum'}
Text:
1. Dalam keadaan terpaksa, seorang mahasiswa dapat mengambil cuti akademik, yaitu menghentikan studinya untuk sementara waktu atas ijin Dekan.
2. Cuti Akademik hanya dapat dilaksanakan sekali selama masa studi, dengan batas waktu maksimal 1 semester.
3. Pada akhir semester berjalan mahasiswa harus melapo

In [6]:
search_hybrid("Berapa sks yang setidaknya perlu dicapai agar bisa ambil topik skripsi?")


üîç Query: 'Berapa sks yang setidaknya perlu dicapai agar bisa ambil topik skripsi?'
--- RESULT 1 ---
Score: 0.7500
Judul:
{'Header 1': '8.1.3 Prosedur Pengajuan Rancangan Penelitian'}
Text:
1. Mahasiswa yang telah mencapai 120 sks boleh mengajukan usulan penelitian kepada Ketua Program Studi dengan mengikuti ketentuan yang diatur pada Pedoman Penulisan Skripsi.
2. Format dan cakupan usulan penelitian diatur dalam Pedoman Penulisan Karya Ilmiah yang ditetapkan oleh Undiksha.
3. Penilaian usulan penelitian dapat dilakukan oleh Ketua Program Studi, atau forum seminar, sesuai dengan kondisi dan kesepakatan di Program Studi yang bersangkutan.
----------------------------------------
--- RESULT 2 ---
Score: 0.5000
Judul:
{'Header 1': '8.1.5.2 Mekanisme Bimbingan Skripsi'}
Text:
1. Setiap langkah yang akan dikerjakan oleh mahasiswa, baik dalam proses penelitian maupun dalam penyusunan skripsi, harus lebih dahulu dikonsultasikan dengan dosen pembimbing. Mahasiswa yang belum berkonsultasi le