In [1]:
from qdrant_client import QdrantClient, models

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
client = QdrantClient(url="http://localhost:6333")

In [3]:
collection_name = "bank_compliance_v1"

client.create_collection(
    collection_name=collection_name,
    vectors_config={
        "dense": models.VectorParams(
            size=384,
            distance=models.Distance.COSINE,
            hnsw_config=models.HnswConfigDiff(
                m=8,
                ef_construct=100
            )
        )
    },
    sparse_vectors_config={
        "sparse": models.SparseVectorParams(
            modifier=models.Modifier.IDF
        )
    },
    optimizers_config=models.OptimizersConfigDiff(
        indexing_threshold=1000
    )
)

client.create_payload_index(collection_name, "file_name", models.PayloadSchemaType.KEYWORD)
client.create_payload_index(collection_name, "page_label", models.PayloadSchemaType.INTEGER)
client.create_payload_index(collection_name, "article_no", models.PayloadSchemaType.KEYWORD)

UpdateResult(operation_id=6, status=<UpdateStatus.COMPLETED: 'completed'>)

In [4]:
import os
from llama_index.core import SimpleDirectoryReader

def banking_metadata_extractor(file_path):
    """
    Her dosya okunduƒüunda dosya bazlƒ± sabit metadata'larƒ± ekler.
    LlamaIndex sayfa numaralarƒ±nƒ± (page_label) otomatik olarak d√∂k√ºmana ekleyecektir.
    """
    file_name = os.path.basename(file_path)
    
    # Dosya ismine g√∂re d√∂k√ºman ba≈ülƒ±ƒüƒ± atama
    doc_titles = {
        "BBSEBH.pdf": "Bankalarƒ±n Bilgi Sistemleri ve Elektronik Bankacƒ±lƒ±k Hizmetleri Hakkƒ±nda Y√∂netmelik",
        "sir_saklama_yukumlulugu.pdf": "Sƒ±r Niteliƒüindeki Bilgilerin Payla≈üƒ±lmasƒ± Hakkƒ±nda Y√∂netmelik"
    }
    
    return {
        "file_name": file_name,
        "document_title": doc_titles.get(file_name, "Bilinmeyen Mevzuat"),
        "file_path": file_path,
        "category": "Banking Regulation",
        "ingestion_date": "2024-05-22"
    }

# Veriyi Y√ºkleme
reader = SimpleDirectoryReader(
    input_dir="./data", 
    file_metadata=banking_metadata_extractor
)
documents = reader.load_data()

print(documents[0].metadata)

{'page_label': '1', 'file_name': 'BBSEBH.pdf', 'document_title': 'Bankalarƒ±n Bilgi Sistemleri ve Elektronik Bankacƒ±lƒ±k Hizmetleri Hakkƒ±nda Y√∂netmelik', 'file_path': '/home/berk/finreg-navigator/data/BBSEBH.pdf', 'category': 'Banking Regulation', 'ingestion_date': '2024-05-22'}


In [5]:
from llama_index.core.node_parser import HierarchicalNodeParser

node_parser = HierarchicalNodeParser.from_defaults(
    chunk_sizes=[2048,512,128],
    chunk_overlap=20
)

nodes = node_parser.get_nodes_from_documents(documents)

In [None]:
import re

def enrich_metadata_with_inheritance(nodes):
    """
    Her node'un ebeveyninden madde numarasƒ±nƒ± miras almasƒ±nƒ± saƒülar.
    """
    node_dict = {n.node_id: n for n in nodes}
    
    for node in nodes:
        match = re.search(r"MADDE\s+(\d+)", node.text)
        if match:
            node.metadata["article_no"] = f"Madde {match.group(1)}"
        else:
            parent_id = node.parent_node.node_id if node.parent_node else None
            while parent_id and "article_no" not in node.metadata:
                parent = node_dict.get(parent_id)
                if parent:
                    parent_match = re.search(r"MADDE\s+(\d+)", parent.text)
                    if parent_match:
                        node.metadata["article_no"] = f"Madde {parent_match.group(1)}"
                        break
                    parent_id = parent.parent_node.node_id if parent.parent_node else None
                else:
                    break
            
            if "article_no" not in node.metadata:
                node.metadata["article_no"] = "Genel H√ºk√ºmler"
                
    return nodes

enriched_nodes = enrich_metadata_with_inheritance(nodes)

In [8]:
import mlflow

mlflow.set_tracking_uri("http://localhost:5000")
mlflow.set_experiment("Banking_RAG_Ingestion")

with mlflow.start_run(run_name="Initial_Ingestion_Hybrid"):
    mlflow.log_param("chunk_sizes", "[2048, 512, 128]")
    mlflow.log_param("vector_store", "Qdrant")
    mlflow.log_param("collection_name", "banking_compliance_v1")
    
    mlflow.log_metric("total_nodes", len(enriched_nodes))
    
    print(f"Toplam {len(enriched_nodes)} node olu≈üturuldu ve MLflow'a loglandƒ±.")

2026/02/20 23:18:21 INFO mlflow.tracking.fluent: Experiment with name 'Banking_RAG_Ingestion' does not exist. Creating a new experiment.


Toplam 1751 node olu≈üturuldu ve MLflow'a loglandƒ±.
üèÉ View run Initial_Ingestion_Hybrid at: http://localhost:5000/#/experiments/1/runs/4cb7a4b627444b88b5215c302bd82d05
üß™ View experiment at: http://localhost:5000/#/experiments/1


In [None]:
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import StorageContext, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# 1. Embedding Modelini Tanƒ±mla (Dense)
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

# 2. Qdrant Store Yapƒ±landƒ±rmasƒ±
vector_store = QdrantVectorStore(
    collection_name="bank_compliance_v1",
    client=client,
    enable_hybrid=True,
    batch_size=64,
    dense_vector_name="dense",
    sparse_vector_name="sparse"
)

# 3. Indexleme ƒ∞≈ülemi
#storage_context = StorageContext.from_defaults(vector_store=vector_store)
#index = VectorStoreIndex(
#    enriched_nodes, 
#    storage_context=storage_context,
#    embed_model=embed_model
#)

index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=embed_model
)

print("ƒ∞ndeksleme ba≈üarƒ±yla tamamlandƒ±!")

2026-02-20 23:18:31,175 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
2026-02-20 23:18:31,479 - INFO - HTTP Request: HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/modules.json "HTTP/1.1 307 Temporary Redirect"
2026-02-20 23:18:31,524 - INFO - HTTP Request: HEAD https://huggingface.co/api/resolve-cache/models/sentence-transformers/all-MiniLM-L6-v2/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/modules.json "HTTP/1.1 200 OK"
2026-02-20 23:18:31,671 - INFO - HTTP Request: HEAD https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2/resolve/main/config_sentence_transformers.json "HTTP/1.1 307 Temporary Redirect"
2026-02-20 23:18:31,715 - INFO - HTTP Request: HEAD https://huggingface.co/api/resolve-cache/models/sentence-transformers/all-MiniLM-L6-v2/c9745ed1d9f207416be6d2e6f8de32d1f16199bf/config_sentence_transformers.json "HTTP/1.1 200 OK"
2026-02-20 23:18:31,864 - INFO - HTTP Request: HEAD https://huggingface.co/sent

ƒ∞ndeksleme ba≈üarƒ±yla tamamlandƒ±!


In [10]:
retriever = index.as_retriever(similarity_top_k=3)

# Test Sorgusu
query = "A√ßƒ±k bankacƒ±lƒ±k servisleri nelerdir ve kimler eri≈üebilir?"
retrieved_nodes = retriever.retrieve(query)

for i, node in enumerate(retrieved_nodes):
    print(f"\n--- Sonu√ß {i+1} (Skor: {node.score:.4f}) ---")
    print(f"Kaynak: {node.metadata.get('file_name')} | Sayfa: {node.metadata.get('page_label')}")
    print(f"Madde: {node.metadata.get('article_no', 'Belirtilmemi≈ü')}")
    print(f"ƒ∞√ßerik √ñzeti: {node.text[:200]}...")

2026-02-20 23:22:57,545 - INFO - HTTP Request: POST http://localhost:6333/collections/bank_compliance_v1/points/query/batch "HTTP/1.1 200 OK"



--- Sonu√ß 1 (Skor: 0.7274) ---
Kaynak: BBSEBH.pdf | Sayfa: 1
Madde: Madde 1
ƒ∞√ßerik √ñzeti: n) G√ºvenlik duvarƒ±: Farklƒ± g√ºvenlik seviyelerine sahip aƒülar veya aƒüa baƒülƒ± cihazlar arasƒ±ndaki trafik akƒ±≈ü kontrol√ºn√º saƒülayan donanƒ±m ya da
yazƒ±lƒ±mlarƒ±,
o) Hassas veri: Kimlik doƒürulamada kullanƒ±lan...

--- Sonu√ß 2 (Skor: 0.7257) ---
Kaynak: BBSEBH.pdf | Sayfa: 2
Madde: Madde 4
ƒ∞√ßerik √ñzeti: bilgi sistemlerinden sorumlu √ºst d√ºzey y√∂netici ile bankanƒ±n ilgili i≈ü
birimlerinden √ºst d√ºzey y√∂neticilerin bu komiteye √ºye olmasƒ± esastƒ±r....

--- Sonu√ß 3 (Skor: 0.7224) ---
Kaynak: sir_saklama_yukumlulugu.pdf | Sayfa: 1
Madde: Madde 1
ƒ∞√ßerik √ñzeti: c) Ana sermaye: 5/9/2013 tarihli ve 28756 sayƒ±lƒ± Resm√Æ Gazete‚Äôde yayƒ±mlanan Bankalarƒ±n √ñzkaynaklarƒ±na
ƒ∞li≈ükin Y√∂netmelikte belirlenen usul ve esaslar √ßer√ßevesinde hesaplanacak ana sermayeyi,
√ß) Anonim...


In [25]:
from llama_index.llms.ollama import Ollama

llm = Ollama(model="llama3", request_timeout=120.0)

#response = llm.complete("Bankacƒ±lƒ±kta uyum (compliance) ne anlama gelir?")
#print(response)

In [29]:
from llama_index.core.retrievers import RecursiveRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import LLMRerank

# 1. √ñnce veritabanƒ±ndaki t√ºm node ili≈ükilerini alƒ±yoruz
all_nodes = enriched_nodes

# 2. Recursive Retriever tanƒ±mlama
base_retriever = index.as_retriever(similarity_top_k=20)

reranker = LLMRerank(choice_batch_size=10, top_n=5, llm=llm)

retriever = RecursiveRetriever(
    "vector",
    retriever_dict={"vector": base_retriever},
    node_dict={n.node_id: n for n in all_nodes},
    verbose=True
)

# 3. Query Engine olu≈üturma (LLM + Retriever birle≈üimi)
query_engine = RetrieverQueryEngine.from_args(
    retriever, 
    llm=llm,
    node_postprocessors=[reranker]
)

In [30]:
from llama_index.core import PromptTemplate

qa_prompt_tmpl_str = (
    "SADECE SANA VERƒ∞LEN BAƒûLAMDAKƒ∞ Bƒ∞LGƒ∞LERƒ∞ KULLANARAK CEVAP VER.\n"
    "KESƒ∞NLƒ∞KLE T√úRK√áE DI≈ûINDA Bƒ∞R Dƒ∞L KULLANMA.\n"
    "Sen kƒ±demli bir Bankacƒ±lƒ±k Uyum (Compliance) Analistisin.\n"
    "Cevabƒ±nƒ± verirken mutlaka hangi maddeye dayandƒ±ƒüƒ±nƒ± belirt (√ñrn: Madde 3 uyarƒ±nca...).\n"
    "Eƒüer baƒülamda bilgi yoksa, 'Bu bilgi d√∂k√ºmanda yer almamaktadƒ±r' de.\n"
    "---------------------\n"
    "BAƒûLAM:\n{context_str}\n"
    "---------------------\n"
    "SORU: {query_str}\n"
    "YANIT (SADECE T√úRK√áE): "
)

qa_prompt_tmpl = PromptTemplate(qa_prompt_tmpl_str)

query_engine.update_prompts(
    {"response_synthesizer:text_qa_template": qa_prompt_tmpl}
)

In [31]:
response = query_engine.query("A√ßƒ±k bankacƒ±lƒ±k servislerine kimler eri≈üebilir ve ≈üartlarƒ± nelerdir?")

print(f"Cevap: \n{response}")

for source in response.source_nodes:
    print(f"\nKullanƒ±lan Madde: {source.node.metadata.get('article_no')}")
    print(f"Kaynak Dosya: {source.node.metadata.get('file_name')}")

2026-02-20 23:41:04,133 - INFO - HTTP Request: POST http://localhost:6333/collections/bank_compliance_v1/points/query/batch "HTTP/1.1 200 OK"


[1;3;34mRetrieving with query id None: A√ßƒ±k bankacƒ±lƒ±k servislerine kimler eri≈üebilir ve ≈üartlarƒ± nelerdir?
[0m[1;3;38;5;200mRetrieving text node: bilgi sistemlerinden sorumlu √ºst d√ºzey y√∂netici ile bankanƒ±n ilgili i≈ü
birimlerinden √ºst d√ºzey y√∂neticilerin bu komiteye √ºye olmasƒ± esastƒ±r.
[0m[1;3;38;5;200mRetrieving text node: n) G√ºvenlik duvarƒ±: Farklƒ± g√ºvenlik seviyelerine sahip aƒülar veya aƒüa baƒülƒ± cihazlar arasƒ±ndaki trafik akƒ±≈ü kontrol√ºn√º saƒülayan donanƒ±m ya da
yazƒ±lƒ±mlarƒ±,
o) Hassas veri: Kimlik doƒürulamada kullanƒ±lan veriler ba≈üta olmak √ºzere; m√º≈üteriye ait olan, √ße≈üitli sebeplerle bankaca muhafaza edilen ve
√º√ß√ºnc√º ki≈üilerce ele ge√ßirilmesi halinde, bu ki≈üilerin m√º≈üteri olan ki≈üilerle ayƒ±rt edilebilme mekanizmalarƒ±nƒ±n zarar g√∂receƒüi ve dolandƒ±rƒ±cƒ±lƒ±k ya da
m√º≈üteriler adƒ±na sahte i≈ülem yapƒ±lmasƒ±na imk√¢n verebilecek nitelikteki verileri,
√∂) ƒ∞kincil merkez: ƒ∞kincil sistemlerin kullanƒ±ma hazƒ±r olacak ≈üeki

2026-02-20 23:41:27,306 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2026-02-20 23:41:51,117 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"
2026-02-20 23:41:55,926 - INFO - HTTP Request: POST http://localhost:11434/api/chat "HTTP/1.1 200 OK"


Cevap: 
Madde 17 uyarƒ±nca, Bilgi Sistemleri S√ºreklilik Planƒ±nƒ±n ya da ikincil merkezin devreye alƒ±nmasƒ± gibi hallerde derhal Sekt√∂rel SOME'yi bilgilendirir. Bu bilgi d√∂k√ºmanda yer almamaktadƒ±r.

Kullanƒ±lan Madde: Madde 29
Kaynak Dosya: BBSEBH.pdf

Kullanƒ±lan Madde: Madde 17
Kaynak Dosya: BBSEBH.pdf
