In [1]:
import sys
import os
from pathlib import Path
project_root = Path.cwd().parent
sys.path.append(str(project_root))
from typing import Dict, List
import logging
from src.embedding.openai_embedding import OpenAIEmbedding
from src.store.qdrant_vectorstore import QdrantStore
from src.config.models.vector_store import TablePayload, QueryPayload
from src.schema_metadata.enhancer import MetadataEnhancer
from src.llm_handler.openai_handler import OpenAIHandler
from src.connettori.postgres import PostgresManager
from src.schema_metadata.postgres_metadata_retriever import PostgresMetadataRetriever

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configurazione 
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

def initialize_store_with_metadata():
    """Inizializza lo store con i metadati delle tabelle"""
    
    # Connessione al DB
    db_manager = PostgresManager(
        host="localhost",
        port="5432",
        database="postgres",
        user="postgres",
        password="admin"
    )
    if not db_manager.connect():
        raise RuntimeError("Failed to connect to database")
    
    # Crea metadata retriever
    metadata_retriever = PostgresMetadataRetriever(
        db_manager.engine, 
        schema="video_games"  # il tuo schema
    )
    
    # Crea LLM e enhancer
    llm = OpenAIHandler(
        api_key=OPENAI_API_KEY,
        chat_model="gpt-4o"
    )
    metadata_enhancer = MetadataEnhancer(llm)
    
    # Estrai e arricchisci i metadati
    base_metadata = metadata_retriever.get_all_tables_metadata()
    enhanced_metadata = metadata_enhancer.enhance_all_metadata(base_metadata)
    
    # Crea e inizializza vector store
    embedding_model = OpenAIEmbedding(
        api_key=OPENAI_API_KEY,
        model="text-embedding-3-small"
    )
    
    store = QdrantStore(
        path="./video_games_store",
        collection_name="video_games",
        embedding_model=embedding_model
    )
    
    # Inizializza con i metadati
    store.initialize(enhanced_metadata)
    
    return store


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/giacomo/miniconda3/envs/heydatabase/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/giacomo/miniconda3/envs/heydatabase/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/giacomo/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/home/giacomo/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1053, in launch_

AttributeError: _ARRAY_API not found


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.1.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/home/giacomo/miniconda3/envs/heydatabase/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/home/giacomo/miniconda3/envs/heydatabase/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/home/giacomo/.local/lib/python3.10/site-packages/ipykernel_launcher.py", line 17, in <module>
    app.launch_new_instance()
  File "/home/giacomo/.local/lib/python3.10/site-packages/traitlets/config/application.py", line 1053, in launch_

AttributeError: _ARRAY_API not found

In [2]:
def print_table_results(results):
    if not results:
        print("Nessuna tabella trovata")
        return
        
    print("\nTabelle trovate:")
    for result in results:
        print(f"\nTabella: {result.table_name}")
        print(f"Score: {result.relevance_score}")
        print(f"Descrizione: {result.metadata.description}")
        print("-" * 50)

def print_query_results(results):
    if not results:
        print("Nessuna query trovata")
        return
        
    print("\nQuery simili trovate:")
    for result in results:
        print(f"\nDomanda: {result.question}")
        print(f"Score: {result.score}")
        print(f"SQL: {result.sql_query}")
        print(f"Spiegazione: {result.explanation}")
        print(f"Voti positivi: {result.positive_votes}")
        print("-" * 50)

In [3]:
print("Inizializzazione store con metadati...")
store = initialize_store_with_metadata()

Inizializzazione store con metadati...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [4]:
# Test ricerca tabelle
print("\nTest ricerca tabelle...")
queries = [
    "vendite per regione",
    "dettagli prodotti",
    "catalogo giochi"
]

for query in queries:
    print(f"\nCerca tabelle per: '{query}'")
    results = store.search_similar_tables(query, limit=3)
    print_table_results(results)


Test ricerca tabelle...

Cerca tabelle per: 'vendite per regione'


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Tabelle trovate:

Tabella: region_sales
Score: 0.3595001913016593
Descrizione: The `region_sales` table records sales data for various game platforms across different regions, containing 130,640 entries. Each record includes a `region_id` and a `game_platform_id`, which are linked to the `region` and `game_platform` tables, respectively, along with `num_sales`, which represents the number of sales in millions with two decimal precision. This table is essential for analyzing and comparing game platform sales performance across different geographical areas.
--------------------------------------------------

Tabella: region
Score: 0.3240515896635337
Descrizione: The "region" table is designed to store information about different geographical or administrative regions. It contains two columns: "id," which is a unique identifier for each region and serves as the primary key, and "region_name," which holds the name of the region with a maximum length of 50 characters. The table currently c

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Tabelle trovate:

Tabella: publisher
Score: 0.18031684062067593
Descrizione: The "publisher" table is designed to store information about publishers, with each record representing a unique publisher. It contains 577 entries and includes two columns: "id," which is an integer serving as the primary key, and "publisher_name," a variable character field up to 100 characters long that holds the name of the publisher. The table does not have any foreign key relationships, indicating it functions independently within the database.
--------------------------------------------------

Tabella: region
Score: 0.16662991448977937
Descrizione: The "region" table is designed to store information about different geographical or administrative regions. It contains two columns: "id," which is a unique identifier for each region and serves as the primary key, and "region_name," which holds the name of the region with a maximum length of 50 characters. The table currently contains four records, indicati

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Tabelle trovate:

Tabella: game
Score: 0.3600812360338388
Descrizione: The "game" table stores information about various games, with each record uniquely identified by the "id" column. It includes the game's name ("game_name") and associates each game with a genre through the "genre_id" column, which references the "id" column in the "genre" table. This structure allows for categorization and easy retrieval of games based on their genres.
--------------------------------------------------

Tabella: game_publisher
Score: 0.34901523517051314
Descrizione: The `game_publisher` table serves as a junction table that establishes a many-to-many relationship between games and publishers. It contains 11,732 records, with each record linking a game (via `game_id`) to a publisher (via `publisher_id`). The `id` column uniquely identifies each record, while the foreign keys ensure referential integrity with the `game` and `publisher` tables.
--------------------------------------------------

Tabel

In [5]:
# Test ricerca query simili
print("\nTest ricerca query simili...")
example_queries = [
    "mostra le vendite totali",
    "quali sono i prodotti più costosi",
    "trova i giochi più venduti"
]

for query in example_queries:
    print(f"\nCerca query simili a: '{query}'")
    results = store.search_similar_queries(query, limit=2)
    print_query_results(results)


Test ricerca query simili...

Cerca query simili a: 'mostra le vendite totali'


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Query simili trovate:

Domanda: quale è il gioco action più venduto?
Score: 0.41406251107241776
SQL: SELECT g.game_name, SUM(rs.num_sales) AS total_sales FROM video_games.game g JOIN video_games.genre ge ON g.genre_id = ge.id JOIN video_games.game_publisher gp ON g.id = gp.game_id JOIN video_games.game_platform gpl ON gp.id = gpl.game_publisher_id JOIN video_games.region_sales rs ON gpl.id = rs.game_platform_id WHERE ge.genre_name = 'Action' GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1
Spiegazione: This query retrieves the name of the most sold 'Action' genre game by summing up the sales across all regions. It joins the necessary tables to filter games by the 'Action' genre and calculates the total sales for each game. The result is ordered by total sales in descending order, and the top result is returned, which represents the most sold 'Action' game.
Voti positivi: 1
--------------------------------------------------

Cerca query simili a: 'quali sono i prodotti più costos

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Query simili trovate:

Domanda: quale è il gioco action più venduto?
Score: 0.3743629825305107
SQL: SELECT g.game_name, SUM(rs.num_sales) AS total_sales FROM video_games.game g JOIN video_games.genre ge ON g.genre_id = ge.id JOIN video_games.game_publisher gp ON g.id = gp.game_id JOIN video_games.game_platform gpl ON gp.id = gpl.game_publisher_id JOIN video_games.region_sales rs ON gpl.id = rs.game_platform_id WHERE ge.genre_name = 'Action' GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1
Spiegazione: This query retrieves the name of the most sold 'Action' genre game by summing up the sales across all regions. It joins the necessary tables to filter games by the 'Action' genre and calculates the total sales for each game. The result is ordered by total sales in descending order, and the top result is returned, which represents the most sold 'Action' game.
Voti positivi: 1
--------------------------------------------------

Cerca query simili a: 'trova i giochi più venduti'


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Query simili trovate:

Domanda: quale è il gioco action più venduto?
Score: 0.6721288065764581
SQL: SELECT g.game_name, SUM(rs.num_sales) AS total_sales FROM video_games.game g JOIN video_games.genre ge ON g.genre_id = ge.id JOIN video_games.game_publisher gp ON g.id = gp.game_id JOIN video_games.game_platform gpl ON gp.id = gpl.game_publisher_id JOIN video_games.region_sales rs ON gpl.id = rs.game_platform_id WHERE ge.genre_name = 'Action' GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1
Spiegazione: This query retrieves the name of the most sold 'Action' genre game by summing up the sales across all regions. It joins the necessary tables to filter games by the 'Action' genre and calculates the total sales for each game. The result is ordered by total sales in descending order, and the top result is returned, which represents the most sold 'Action' game.
Voti positivi: 1
--------------------------------------------------


In [8]:
print("\nTest feedback positivo...")
feedback_query = {
    "question": "Qual è il gioco più venduto?",
    "sql_query": "SELECT g.game_name, SUM(rs.num_sales) as total_sales FROM games g JOIN region_sales rs ON g.id = rs.game_id GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1",
    "explanation": "Questa query trova il gioco con il maggior numero di vendite totali sommando le vendite in tutte le regioni"
}

success = store.handle_positive_feedback(
    question=feedback_query["question"],
    sql_query=feedback_query["sql_query"],
    explanation=feedback_query["explanation"]
)
print(f"Feedback salvato: {success}")


Test feedback positivo...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Feedback salvato: True


In [9]:
# Verifica che la query sia stata salvata
print("\nVerifica query salvata...")
results = store.search_similar_queries("gioco più venduto", limit=1)
print_query_results(results)


Verifica query salvata...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Query simili trovate:

Domanda: Qual è il gioco più venduto?
Score: 0.8299179812225611
SQL: SELECT g.game_name, SUM(rs.num_sales) as total_sales FROM games g JOIN region_sales rs ON g.id = rs.game_id GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1
Spiegazione: Questa query trova il gioco con il maggior numero di vendite totali sommando le vendite in tutte le regioni
Voti positivi: 1
--------------------------------------------------


In [8]:
from src.config.models.vector_store import TablePayload, QueryPayload

test_table_metadata = TablePayload(
    type='table',
    table_name='products',
    description='Table storing product information including name, price and category',
    keywords=['products', 'price', 'category'],
    columns=[
        {'name': 'id', 'type': 'INTEGER', 'nullable': False},
        {'name': 'name', 'type': 'VARCHAR(100)', 'nullable': False},
        {'name': 'price', 'type': 'DECIMAL(10,2)', 'nullable': True},
        {'name': 'category_id', 'type': 'INTEGER', 'nullable': True}
    ],
    primary_keys=['id'],
    foreign_keys=[{'constrained_columns': ['category_id'], 'referred_table': 'categories', 'referred_columns': ['id']}],
    row_count=100,
    importance_score=0.8
)

store.add_table(test_table_metadata)

ERROR:hey-database:Error adding table metadata: 'TablePayload' object has no attribute 'base_metadata'


False