In [1]:
import os
from llama_index.core import (
    SimpleDirectoryReader,
    Settings,
    load_index_from_storage
)
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core.graph_stores import SimpleGraphStore
from llama_index.core.indices.knowledge_graph import KnowledgeGraphIndex
from llama_index.llms.openai import OpenAI
import logging

In [None]:
# Set up logging to see the processing steps / İşleme adımlarını görmek için günlükleme ayarla
logging.basicConfig(level=logging.INFO)

In [None]:
"""
Build a graph-based RAG system using LlamaIndex

Args:
    data_dir: Directory containing the text files
    persist_dir: Directory to save the index
    rebuild: Whether to rebuild the index or load from storage
    
Returns:
    kg_index: Knowledge graph
"""

In [None]:
def build_graph_rag(data_dir="data", persist_dir="storage", rebuild=False):
    """
    Build a graph-based RAG system using LlamaIndex

    Args:
        data_dir: Directory containing the text files
        persist_dir: Directory to save the index
        rebuild: Whether to rebuild the index or load from storage
        
    Returns:
        kg_index: Knowledge graph
    """
    # Initialize the LLM / LLM'i başlat
    llm = OpenAI(model="gpt-4")
    # Update global settings / Genel ayarları güncelle
    Settings.llm = llm
    Settings.chunk_size = 1024
    
    # Check if we need to build or load the index
    # İndeksin oluşturulup oluşturulmayacağını veya yükleneceğini kontrol et
    if not os.path.exists(persist_dir) or rebuild:
        print(f"Building new index from {data_dir}... / {data_dir} dizininden yeni indeks oluşturuluyor...")
        
        # Load documents from the directory / Dizin içindeki belgeleri yükle
        if not os.path.exists(data_dir):
            raise FileNotFoundError(f"Data directory {data_dir} not found / Veri dizini {data_dir} bulunamadı")
            
        documents = SimpleDirectoryReader(data_dir).load_data()
        print(f"Loaded {len(documents)} documents from {data_dir} / {data_dir} dizininden {len(documents)} belge yüklendi")
        
        # Create a graph store / Bir grafik deposu oluştur
        graph_store = SimpleGraphStore()
        storage_context = StorageContext.from_defaults(graph_store=graph_store)
        
        # Build the knowledge graph index / Bilgi grafiği indeksini oluştur
        kg_index = KnowledgeGraphIndex.from_documents(
            documents,
            storage_context=storage_context,
            max_triplets_per_chunk=10,
            include_embeddings=True,
        )
        
        # Persist the indices / İndeksleri kalıcı hale getir
        os.makedirs(persist_dir, exist_ok=True)
        kg_index.storage_context.persist(persist_dir=os.path.join(persist_dir, "kg"))
        
        print(f"Index built and saved to {persist_dir} / İndeks oluşturuldu ve {persist_dir} dizinine kaydedildi")
        
        return kg_index
    else:
        print(f"Loading existing index from {persist_dir}... / {persist_dir} dizininden mevcut indeks yükleniyor...")
        
        # Load the indices from storage / Depodan indeksleri yükle
        kg_storage_context = StorageContext.from_defaults(
            persist_dir=os.path.join(persist_dir, "kg")
        )
        kg_index = load_index_from_storage(storage_context=kg_storage_context)
        
        print("Index loaded successfully / İndeks başarıyla yüklendi")
        
        return kg_index

In [None]:
def create_query_engine(kg_index):
    """
    Create a hybrid query engine that combines graph and vector search
    
    Args:
        kg_index: Knowledge graph index
        
    Returns:
        query_engine: A hybrid query engine
    """
    # Create query engines for both indices
    # Her iki indeks için de sorgu motorları oluştur
    kg_query_engine = kg_index.as_query_engine(
        response_mode="compact",
        verbose=True
    )

    return kg_query_engine

In [None]:
# Build or load the indices / indeksleri oluştur veya yükle
kg_index = build_graph_rag(data_dir="data", rebuild=False)

# Create a hybrid query engine / Hibrit bir sorgu motoru oluştur
query_engine = create_query_engine(kg_index)

Building new index from data...
Loaded 13 documents from data


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:openai._base_client:Retrying request to /chat/completions in 20.000000 seconds
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP

Indice built and saved to storage


In [6]:
query = "What is AI?"
    
response = query_engine.query(query)
print(f"\nResponse:\n{response}")

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


[1;3;32mExtracted keywords: ['AI', 'is', 'What']
[0m

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:llama_index.core.indices.knowledge_graph.retrievers:> Querying with idx: 03508ead-34f9-4a25-9bc7-6acc8ec4cc82: AI continues to evolve rapidly, with advancements in general AI, explainable ...
INFO:llama_index.core.indices.knowledge_graph.retrievers:> Querying with idx: 56a0507c-7ae5-43b1-83d3-ed05dcd47b84: Autonomous systems, such as self-driving cars and drones, rely on AI to navig...
INFO:llama_index.core.indices.knowledge_graph.retrievers:> Querying with idx: 5ca239b4-f2ba-4946-ad73-b5f057a12753: Machine learning (ML) is a subset of AI that enables systems to learn from da...
INFO:llama_index.core.indices.knowledge_graph.retrievers:> Querying with idx: 78f465a4-7967-4e9d-ae77-e56e89290117: AI is transforming healthcare by improving diagnostics, automating administra...
INFO:llama_index.core.indices.knowledge_graph.retrievers:> Querying with idx: 2649ae2a-a793-49a8-8fe6-6524325999cc: The concept

[1;3;34mKG context:
The following are knowledge sequence in max depth 2 in the form of directed graph like:
`subject -[predicate]->, object, <-[predicate_next_hop]-, object_next_hop ...`
('Ai', 'Has advancements in', 'Explainable ai')
('Machine learning', 'Is a subset of', 'Ai')
[0m

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"



Response:
AI, or Artificial Intelligence, is a field of study that dates back to ancient times but became formalized in the 1950s. It involves creating systems that can perform tasks that would normally require human intelligence. This includes tasks like understanding natural language, recognizing patterns, and making decisions. Modern AI leverages deep learning and neural networks to achieve impressive levels of intelligence. It has a wide range of applications, from powering autonomous systems like self-driving cars and drones to transforming healthcare by improving diagnostics and personalizing treatment. AI is also used in businesses to optimize operations and enhance customer service. However, it also raises ethical concerns such as bias, data privacy, and the impact on employment. The future of AI is rapidly evolving, with advancements in areas like general AI, explainable AI, and AI-human collaboration.
