In [1]:
# Importare i moduli necessari
import sys
import os
from pathlib import Path
project_root = Path.cwd().parent  # Assumi che il notebook sia in una sottocartella
sys.path.append(str(project_root))

import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('rag-recipe-tutorial')

# Impostare le variabili d'ambiente necessarie (se non già impostate)
import os
api_key = os.getenv('OPENAI_API_KEY')
db_pwd = os.getenv('POSTGRES_PWD')

In [None]:
# codice per il bulding

def build_recipe_registry(self):
    """
    Build and initialize the RAG recipe registry.
    
    Creates a registry and populates it with RAG recipes loaded from configuration files.
    
    Returns:
        self: Builder instance for method chaining
    
    Raises:
        RuntimeError: If required dependencies are not available
    """
    if not self.sql_llm:
        raise RuntimeError("Cannot build recipe registry without LLM handler")
    if not self.vector_store_searcher:
        raise RuntimeError("Cannot build recipe registry without vector store searcher")
    if not self.db:
        raise RuntimeError("Cannot build recipe registry without database connector")
        
    logger.info("Initializing RAG recipe registry")
    
    # Create recipe registry
    self.recipe_registry = RAGRecipeRegistry()
    
    # Set up dependencies for recipe loader
    dependencies = {
        "db": self.db,
        "db_connector": self.db,  # Allow both naming conventions
        "llm_handler": self.sql_llm,
        "vector_store": self.vector_store,
        "vector_store_search": self.vector_store_searcher
    }
    
    # Create recipe loader
    recipe_loader = RAGRecipeLoader(dependencies=dependencies)
    
    # Get path to recipe configurations
    project_root = Path(__file__).resolve().parent.parent.parent
    recipe_dir = os.path.join(project_root, "configs", "rag_recipes")
    
    # Check if directory exists
    if not os.path.exists(recipe_dir):
        logger.warning(f"RAG recipes directory not found: {recipe_dir}")
        # Create a basic recipe manually as fallback
        self._create_fallback_recipe()
        return self
    
    try:
        # Load recipes from directory
        logger.info(f"Loading RAG recipes from {recipe_dir}")
        recipes = recipe_loader.load_recipes_from_directory(recipe_dir)
        
        if not recipes:
            logger.warning("No RAG recipes found in configuration directory")
            # Create a basic recipe manually as fallback
            self._create_fallback_recipe()
            return self
        
        # Register all loaded recipes
        for recipe in recipes:
            # Set the basic_rag recipe as default if it exists
            is_default = recipe.name == "basic_rag"
            self.recipe_registry.register_recipe(recipe, set_as_default=is_default)
            logger.info(f"Registered RAG recipe: {recipe.name}")
        
        # If basic_rag wasn't found, set the first recipe as default
        if not any(recipe.name == "basic_rag" for recipe in recipes):
            self.recipe_registry.set_default_recipe(recipes[0].name)
            logger.info(f"Set default RAG recipe to: {recipes[0].name}")
        
    except Exception as e:
        logger.error(f"Failed to load RAG recipes: {str(e)}")
        # Create a basic recipe manually as fallback
        self._create_fallback_recipe()
    
    return self

def _create_fallback_recipe(self):
    """
    Create a fallback basic RAG recipe when no configuration is available.
    """
    logger.warning("Creating fallback RAG recipe")
    basic_recipe = RAGRecipeBuilder("basic_rag", "Fallback basic RAG recipe") \
        .with_query_understanding(
            PassthroughQueryUnderstandingStrategy()
        ) \
        .with_retrieval(
            CosineSimRetrievalStrategy(
                vector_store_search=self.vector_store_searcher,
                tables_limit=3,
                columns_limit=5,
                queries_limit=2
            )
        ) \
        .with_context_processing(
            SimpleContextProcessor(
                include_table_descriptions=True,
                include_column_descriptions=True,
                include_sample_queries=True
            )
        ) \
        .with_prompt_building(
            StandardPromptBuilder()
        ) \
        .with_llm_interaction(
            DirectLLMInteractionStrategy(
                llm_handler=self.sql_llm,
                system_prompt="You are a SQL expert. Generate valid, executable SQL queries."
            )
        ) \
        .with_response_processing(
            SQLResponseProcessor(
                db=self.db,
                max_preview_rows=10,
                execute_query=True
            )
        ) \
        .build()
    
    self.recipe_registry.register_recipe(basic_recipe, set_as_default=True)
    logger.info(f"Registered fallback RAG recipe: {basic_recipe.name}")

In [2]:
# Caricare la configurazione
from src.config.config_loader import ConfigLoader

config = ConfigLoader.load_config(
    db_config_path=os.path.join(
        project_root, "configs", "DB_connections", "northwind_postgres.yaml"
    ),
    cache_config_path=os.path.join(
        project_root, "configs", "cache", "northwind_cache.yaml"
    ),
    sql_llm_config_path=os.path.join(
        project_root, "configs", "sql_llm", "openai_4o_mini.yaml"
    ),
    vector_store_config_path=os.path.join(
        project_root, "configs", "vector_store", "qdrant_northwind.yaml"
    ),
    prompt_config_path=os.path.join(project_root, "configs", "prompt.yaml"),
    metadata_config_path=os.path.join(project_root, "configs", "metadata_.yaml"),
    base_config_path=os.path.join(project_root, "configs", "base_config.yaml"),
)

In [3]:
# Inizializzare i componenti di base
from src.factories.database import DatabaseFactory
from src.factories.llm import LLMFactory
from src.factories.vector_store import VectorStoreFactory

# Creare i componenti essenziali
db = DatabaseFactory.create_connector(config.database)
llm = LLMFactory.create_handler(config.sql_llm)
vector_components = VectorStoreFactory.create(config.vector_store)

# Assicurarsi che il vector store sia già popolato

  functions.register_function("flatten", flatten)
INFO:hey-database:Connected to PostgreSQL database


In [4]:
# Importare le classi del sistema RAG
from src.rag.recipe_registry import RAGRecipeRegistry
from src.rag.recipe_builder import RAGRecipeBuilder
from src.rag.recipe import RAGRecipe
from src.rag.recipe_loader import RAGRecipeLoader
from src.rag.models import RAGContext, RAGResponse

# Importare le strategie
from src.rag.strategies.context_processing.simple import SimpleContextProcessor
from src.rag.strategies.query_understanding.passthrough import PassthroughQueryUnderstandingStrategy
from src.rag.strategies.retrieval.cosine_sim import CosineSimRetrievalStrategy
from src.rag.strategies.prompt_building.standard import StandardPromptBuilder
from src.rag.strategies.llm_interaction.direct import DirectLLMInteractionStrategy
from src.rag.strategies.response_processing.sql_processor import SQLResponseProcessor 

In [5]:
# 1. Creare un registry per le ricette
registry = RAGRecipeRegistry()
print("Registry inizializzato")

Registry inizializzato


In [6]:
# 2. Creare una ricetta di base usando il builder
basic_recipe = RAGRecipeBuilder("basic_rag", "Ricetta RAG di base con cosine similarity") \
    .with_query_understanding(
        PassthroughQueryUnderstandingStrategy()
    ) \
    .with_retrieval(
        CosineSimRetrievalStrategy(
            vector_store_search=vector_components.search,
            tables_limit=3,
            columns_limit=5,
            queries_limit=2
        )
    ) \
    .with_context_processing(
        SimpleContextProcessor(
            include_table_descriptions=True,
            include_column_descriptions=True,
            include_sample_queries=True
        )
    ) \
    .with_prompt_building(
        StandardPromptBuilder()
    ) \
    .with_llm_interaction(
        DirectLLMInteractionStrategy(
            llm_handler=llm,
            system_prompt="Sei un esperto SQL. Genera query SQL valide ed eseguibili in base alla domanda dell'utente e alle informazioni sullo schema del database."
        )
    ) \
    .with_response_processing(
        SQLResponseProcessor(
            db=db,
            max_preview_rows=10,
            execute_query=True
        )
    ) \
    .build()

print(f"Ricetta creata: {basic_recipe.name}")

Ricetta creata: basic_rag


In [7]:
# 3. Registrare la ricetta come predefinita
registry.register_recipe(basic_recipe, set_as_default=True)
print("Ricette disponibili:")
for recipe_info in registry.list_recipes():
    print(f"- {recipe_info['name']}: {recipe_info['description']} {'(default)' if recipe_info['is_default'] else ''}")

INFO:hey-database:Registered RAG recipe: basic_rag
INFO:hey-database:Set default RAG recipe to: basic_rag


Ricette disponibili:
- basic_rag: Ricetta RAG di base con cosine similarity (default)


In [8]:
# 4. Creare una ricetta personalizzata con parametri diversi
custom_recipe = RAGRecipeBuilder("precise_rag", "Ricetta RAG con più contesto per query complesse") \
    .with_query_understanding(
        PassthroughQueryUnderstandingStrategy()
    ) \
    .with_retrieval(
        CosineSimRetrievalStrategy(
            vector_store_search=vector_components.search,
            tables_limit=5,  # Più tabelle
            columns_limit=8,  # Più colonne
            queries_limit=3   # Più query
        )
    ) \
    .with_context_processing(
        SimpleContextProcessor(
            include_table_descriptions=True,
            include_column_descriptions=True,
            include_sample_queries=True,
            max_tables=5,
            max_columns=8
        )
    ) \
    .with_prompt_building(
        StandardPromptBuilder()
    ) \
    .with_llm_interaction(
        DirectLLMInteractionStrategy(
            llm_handler=llm,
            system_prompt="Sei un esperto SQL per analisi dati. Genera query SQL ottimizzate e complesse.",
            temperature=0.05  # Temperatura più bassa per risposte più deterministiche
        )
    ) \
    .with_response_processing(
        SQLResponseProcessor(
            db=db,
            max_preview_rows=15,  # Più righe di anteprima
            execute_query=True
        )
    ) \
    .build()

# Registrare la ricetta personalizzata
registry.register_recipe(custom_recipe)
print("\nRicette dopo l'aggiunta della ricetta personalizzata:")
for recipe_info in registry.list_recipes():
    print(f"- {recipe_info['name']}: {recipe_info['description']} {'(default)' if recipe_info['is_default'] else ''}")

INFO:hey-database:Registered RAG recipe: precise_rag



Ricette dopo l'aggiunta della ricetta personalizzata:
- basic_rag: Ricetta RAG di base con cosine similarity (default)
- precise_rag: Ricetta RAG con più contesto per query complesse 


In [9]:
# 5. Creare ed eseguire una funzione per il test delle ricette
def test_rag_recipe(recipe_name=None, query="Elenca 5 clienti"):
    """
    Testa una ricetta RAG su una query
    
    Args:
        recipe_name: Nome della ricetta (None per usare quella predefinita)
        query: Query da eseguire
    """
    try:
        print(f"\nTestando ricetta: {recipe_name or 'default'}")
        print(f"Query: {query}")
        
        # Ottieni la ricetta
        recipe = registry.get_recipe(recipe_name)
        print(f"Usando ricetta: {recipe.name}")
        
        # Esegui la ricetta sulla query
        response = recipe.execute(query)
        
        # Mostra i risultati
        print(f"\nSuccesso: {response.success}")
        
        if response.success:
            print(f"\nSQL generato:\n{response.query}")
            print(f"\nSpiegazione:\n{response.explanation}")
            
            if response.results:
                import pandas as pd
                df = pd.DataFrame(response.preview)
                print("\nAnteprima risultati:")
                display(df)
        else:
            print(f"\nErrore: {response.error}")
            
        # Restituisci la risposta per ulteriori analisi
        return response
            
    except Exception as e:
        print(f"Errore durante l'esecuzione della ricetta: {str(e)}")
        return None

In [10]:
default_response = test_rag_recipe(query="Mostrami 5 record a tuo piacimento")


Testando ricetta: default
Query: Mostrami 5 record a tuo piacimento
Usando ricetta: basic_rag


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:hey-database:Table search results count: 3
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:hey-database:Column search results count: 5
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"
INFO:hey-database:Query search results count: 0
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
ERROR:hey-database:Query execution failed


Errore nell'esecuzione della query: (psycopg2.errors.UndefinedTable) relation "schema_name.order_details" does not exist
LINE 1: SELECT * FROM schema_name.order_details LIMIT 5
                      ^

[SQL: SELECT * FROM schema_name.order_details LIMIT 5]
(Background on this error at: https://sqlalche.me/e/20/f405)

Successo: False

Errore: Failed to execute the SQL query


In [None]:
# 6. Testare la ricetta personalizzata
custom_response = test_rag_recipe(recipe_name="precise_rag", 
                                 query="Calcola il fatturato totale per categoria di prodotto e paese del cliente, mostrando solo le combinazioni con fatturato superiore a 5000")

In [None]:
# 7. Cambiare la ricetta predefinita
registry.set_default_recipe("precise_rag")
print(f"Ricetta predefinita cambiata a: {registry.get_default_recipe_name()}")

# Verificare che la predefinita sia cambiata
new_default_response = test_rag_recipe(query="Quali sono i clienti che non hanno fatto ordini nell'ultimo trimestre?")

In [None]:
# 8. Salvare una ricetta in formato YAML (opzionale)
import yaml
import os

# Creare una directory per le ricette se non esiste
recipes_dir = os.path.join(project_root, "configs", "rag_recipes")
os.makedirs(recipes_dir, exist_ok=True)

# Creare un dizionario di configurazione per la ricetta personalizzata
recipe_config = {
    "name": "precise_rag",
    "description": "Ricetta RAG con più contesto per query complesse",
    "query_understanding": {
        "type": "PassthroughQueryUnderstandingStrategy",
        "params": {}
    },
    "retrieval": {
        "type": "CosineSimRetrievalStrategy",
        "params": {
            "tables_limit": 5,
            "columns_limit": 8,
            "queries_limit": 3
        }
    },
    "context_processing": {
        "type": "SimpleContextProcessor",
        "params": {
            "include_table_descriptions": True,
            "include_column_descriptions": True,
            "include_sample_queries": True,
            "max_tables": 5,
            "max_columns": 8
        }
    },
    "prompt_building": {
        "type": "StandardPromptBuilder",
        "params": {
            "include_original_query": True
        }
    },
    "llm_interaction": {
        "type": "DirectLLMInteractionStrategy",
        "params": {
            "system_prompt": "Sei un esperto SQL per analisi dati. Genera query SQL ottimizzate e complesse.",
            "temperature": 0.05
        }
    },
    "response_processing": {
        "type": "SQLResponseProcessor",
        "params": {
            "max_preview_rows": 15,
            "execute_query": True
        }
    }
}

# Salvare la configurazione su file
recipe_file_path = os.path.join(recipes_dir, "precise_rag.yaml")
with open(recipe_file_path, 'w') as f:
    yaml.dump(recipe_config, f, default_flow_style=False)

print(f"Ricetta salvata in: {recipe_file_path}")

In [None]:
# 9. Caricare ricette da file YAML
# Prima creiamo un nuovo registry vuoto
new_registry = RAGRecipeRegistry()

# Prepariamo le dipendenze per il loader
dependencies = {
    "db": db,
    "llm_handler": llm,
    "vector_store": vector_components.store,
    "vector_store_search": vector_components.search,
    "db_connector": db
}

# Creiamo il loader con le dipendenze
loader = RAGRecipeLoader(dependencies=dependencies)

# Carichiamo la ricetta dal file
loaded_recipe = loader.load_recipe(recipe_file_path)
print(f"Ricetta caricata: {loaded_recipe.name}")

# Registriamo la ricetta caricata
new_registry.register_recipe(loaded_recipe, set_as_default=True)

# Verifichiamo che la ricetta sia stata caricata correttamente
print("\nRicette nel nuovo registry:")
for recipe_info in new_registry.list_recipes():
    print(f"- {recipe_info['name']}: {recipe_info['description']} {'(default)' if recipe_info['is_default'] else ''}")

# Testiamo la ricetta caricata
loaded_response = test_rag_recipe(
    query="Trova i primi 10 prodotti per fatturato",
    recipe_name=loaded_recipe.name
)

In [None]:
# 10. Caricamento di tutte le ricette da una directory
# Puoi salvare più ricette nella directory "rag_recipes"
# E poi caricarle tutte in una volta sola

# Creiamo un altro registry
dir_registry = RAGRecipeRegistry()

# Carichiamo tutte le ricette dalla directory
recipes = loader.load_recipes_from_directory(recipes_dir)
print(f"Caricate {len(recipes)} ricette dalla directory {recipes_dir}")

# Registriamo tutte le ricette
for recipe in recipes:
    dir_registry.register_recipe(recipe)
    
# Impostiamo la ricetta predefinita
if recipes:
    dir_registry.set_default_recipe(recipes[0].name)
    
print("\nRicette caricate dalla directory:")
for recipe_info in dir_registry.list_recipes():
    print(f"- {recipe_info['name']}: {recipe_info['description']} {'(default)' if recipe_info['is_default'] else ''}")

In [None]:
# 11. Pulizia delle risorse
# Chiudere la connessione al database
db.close()
print("Risorse rilasciate")