In [2]:
import sys
import os
from pathlib import Path
project_root = Path.cwd().parent
sys.path.append(str(project_root))
from typing import Dict, List
import logging
from src.embedding.openai_embedding import OpenAIEmbedding
from src.store.qdrant_vectorstore import QdrantStore
from src.config.models.vector_store import TablePayload, QueryPayload
from src.schema_metadata.enhancer import MetadataEnhancer
from src.llm_handler.openai_handler import OpenAIHandler
from src.connettori.postgres import PostgresManager
from src.schema_metadata.postgres_metadata_retriever import PostgresMetadataRetriever

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configurazione 
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

def initialize_store_with_metadata():
    """Inizializza lo store con i metadati delle tabelle"""
    
    # Connessione al DB
    db_manager = PostgresManager(
        host="localhost",
        port="5432",
        database="postgres",
        user="postgres",
        password="admin"
    )
    if not db_manager.connect():
        raise RuntimeError("Failed to connect to database")
    
    # Crea metadata retriever
    metadata_retriever = PostgresMetadataRetriever(
        db_manager.engine, 
        schema="test_schema"  # il tuo schema
    )
    
    # Crea LLM e enhancer
    llm = OpenAIHandler(
        api_key=OPENAI_API_KEY,
        chat_model="gpt-4o"
    )
    metadata_enhancer = MetadataEnhancer(llm)
    
    # Estrai e arricchisci i metadati
    base_metadata = metadata_retriever.get_all_tables_metadata()
    enhanced_metadata = metadata_enhancer.enhance_all_metadata(base_metadata)
    
    # Crea e inizializza vector store
    embedding_model = OpenAIEmbedding(
        api_key=OPENAI_API_KEY,
        model="text-embedding-3-small"
    )
    
    store = QdrantStore(
        path="./my_vector_store",
        collection_name="my_collection",
        embedding_model=embedding_model
    )
    
    # Inizializza con i metadati
    store.initialize(enhanced_metadata)
    
    return store

In [6]:
def print_table_results(results):
    if not results:
        print("Nessuna tabella trovata")
        return
        
    print("\nTabelle trovate:")
    for result in results:
        print(f"\nTabella: {result.table_name}")
        print(f"Score: {result.relevance_score}")
        print(f"Descrizione: {result.metadata.description}")
        print("-" * 50)

def print_query_results(results):
    if not results:
        print("Nessuna query trovata")
        return
        
    print("\nQuery simili trovate:")
    for result in results:
        print(f"\nDomanda: {result.question}")
        print(f"Score: {result.score}")
        print(f"SQL: {result.sql_query}")
        print(f"Spiegazione: {result.explanation}")
        print(f"Voti positivi: {result.positive_votes}")
        print("-" * 50)

In [4]:
print("Inizializzazione store con metadati...")
store = initialize_store_with_metadata()

Inizializzazione store con metadati...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [7]:
# Test ricerca tabelle
print("\nTest ricerca tabelle...")
queries = [
    "vendite per regione",
    "dettagli prodotti",
    "catalogo giochi"
]

for query in queries:
    print(f"\nCerca tabelle per: '{query}'")
    results = store.search_similar_tables(query, limit=3)
    print_table_results(results)


Test ricerca tabelle...

Cerca tabelle per: 'vendite per regione'


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Tabelle trovate:

Tabella: products
Score: 0.16775369866767337
Descrizione: The "products" table stores information about various products, each uniquely identified by the "id" column, which serves as the primary key. It includes details such as the product name, price, and a reference to the product's category through the "category_id" column, which is a foreign key linking to the "categories" table. This structure allows for the organization and categorization of products within a broader database system.
--------------------------------------------------

Tabella: categories
Score: 0.08898442249234045
Descrizione: The "categories" table is designed to store information about different categories, with each category uniquely identified by an "id" column. It includes a "name" column for the category's name and an optional "description" column for additional details. The table does not reference any other tables, as it contains no foreign keys.
----------------------------------------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Tabelle trovate:

Tabella: products
Score: 0.40813496947066785
Descrizione: The "products" table stores information about various products, each uniquely identified by the "id" column, which serves as the primary key. It includes details such as the product name, price, and a reference to the product's category through the "category_id" column, which is a foreign key linking to the "categories" table. This structure allows for the organization and categorization of products within a broader database system.
--------------------------------------------------

Tabella: categories
Score: 0.22879795515511997
Descrizione: The "categories" table is designed to store information about different categories, with each category uniquely identified by an "id" column. It includes a "name" column for the category's name and an optional "description" column for additional details. The table does not reference any other tables, as it contains no foreign keys.
----------------------------------------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Tabelle trovate:

Tabella: products
Score: 0.2344156504940525
Descrizione: The "products" table stores information about various products, each uniquely identified by the "id" column, which serves as the primary key. It includes details such as the product name, price, and a reference to the product's category through the "category_id" column, which is a foreign key linking to the "categories" table. This structure allows for the organization and categorization of products within a broader database system.
--------------------------------------------------

Tabella: categories
Score: 0.208913035719358
Descrizione: The "categories" table is designed to store information about different categories, with each category uniquely identified by an "id" column. It includes a "name" column for the category's name and an optional "description" column for additional details. The table does not reference any other tables, as it contains no foreign keys.
-------------------------------------------

In [8]:
# Test ricerca query simili
print("\nTest ricerca query simili...")
example_queries = [
    "mostra le vendite totali",
    "quali sono i prodotti più costosi",
    "trova i giochi più venduti"
]

for query in example_queries:
    print(f"\nCerca query simili a: '{query}'")
    results = store.search_similar_queries(query, limit=2)
    print_query_results(results)


Test ricerca query simili...

Cerca query simili a: 'mostra le vendite totali'


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Nessuna query trovata

Cerca query simili a: 'quali sono i prodotti più costosi'


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Nessuna query trovata

Cerca query simili a: 'trova i giochi più venduti'


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Nessuna query trovata


In [8]:
print("\nTest feedback positivo...")
feedback_query = {
    "question": "Qual è il gioco più venduto?",
    "sql_query": "SELECT g.game_name, SUM(rs.num_sales) as total_sales FROM games g JOIN region_sales rs ON g.id = rs.game_id GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1",
    "explanation": "Questa query trova il gioco con il maggior numero di vendite totali sommando le vendite in tutte le regioni"
}

success = store.handle_positive_feedback(
    question=feedback_query["question"],
    sql_query=feedback_query["sql_query"],
    explanation=feedback_query["explanation"]
)
print(f"Feedback salvato: {success}")


Test feedback positivo...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Feedback salvato: True


In [9]:
# Verifica che la query sia stata salvata
print("\nVerifica query salvata...")
results = store.search_similar_queries("gioco più venduto", limit=1)
print_query_results(results)


Verifica query salvata...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Query simili trovate:

Domanda: Qual è il gioco più venduto?
Score: 0.8299179812225611
SQL: SELECT g.game_name, SUM(rs.num_sales) as total_sales FROM games g JOIN region_sales rs ON g.id = rs.game_id GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1
Spiegazione: Questa query trova il gioco con il maggior numero di vendite totali sommando le vendite in tutte le regioni
Voti positivi: 1
--------------------------------------------------


In [8]:
from src.config.models.vector_store import TablePayload, QueryPayload

test_table_metadata = TablePayload(
    type='table',
    table_name='products',
    description='Table storing product information including name, price and category',
    keywords=['products', 'price', 'category'],
    columns=[
        {'name': 'id', 'type': 'INTEGER', 'nullable': False},
        {'name': 'name', 'type': 'VARCHAR(100)', 'nullable': False},
        {'name': 'price', 'type': 'DECIMAL(10,2)', 'nullable': True},
        {'name': 'category_id', 'type': 'INTEGER', 'nullable': True}
    ],
    primary_keys=['id'],
    foreign_keys=[{'constrained_columns': ['category_id'], 'referred_table': 'categories', 'referred_columns': ['id']}],
    row_count=100,
    importance_score=0.8
)

store.add_table(test_table_metadata)

ERROR:hey-database:Error adding table metadata: 'TablePayload' object has no attribute 'base_metadata'


False