In [1]:
import sys
import os
from pathlib import Path
project_root = Path.cwd().parent
sys.path.append(str(project_root))
from typing import Dict, List
import logging
from src.embedding.openai_embedding import OpenAIEmbedding
from src.store.qdrant_vectorstore import QdrantStore
from src.config.models.vector_store import TablePayload, QueryPayload
from src.schema_metadata.enhancer import MetadataEnhancer
from src.llm_handler.openai_handler import OpenAIHandler
from src.connettori.postgres import PostgresManager
from src.schema_metadata.postgres_metadata_retriever import PostgresMetadataRetriever

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configurazione 
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

def initialize_store_with_metadata():
    """Inizializza lo store con i metadati delle tabelle"""
    
    # Connessione al DB
    db_manager = PostgresManager(
        host="localhost",
        port="5432",
        database="postgres",
        user="postgres",
        password="admin"
    )
    if not db_manager.connect():
        raise RuntimeError("Failed to connect to database")
    
    # Crea metadata retriever
    metadata_retriever = PostgresMetadataRetriever(
        db_manager.engine, 
        schema="video_games"  # il tuo schema
    )
    
    # Crea LLM e enhancer
    llm = OpenAIHandler(
        api_key=OPENAI_API_KEY,
        chat_model="gpt-4o"
    )
    metadata_enhancer = MetadataEnhancer(llm)
    
    # Estrai e arricchisci i metadati
    base_metadata = metadata_retriever.get_all_tables_metadata()
    enhanced_metadata = metadata_enhancer.enhance_all_metadata(base_metadata)
    
    # Crea e inizializza vector store
    embedding_model = OpenAIEmbedding(
        api_key=OPENAI_API_KEY,
        model="text-embedding-3-small"
    )
    
    store = QdrantStore(
        path="./video_games_store",
        collection_name="video_games_store",
        embedding_model=embedding_model
    )
    
    # Inizializza con i metadati
    store.initialize(enhanced_metadata)
    
    return store

In [2]:
def print_table_results(results):
    if not results:
        print("Nessuna tabella trovata")
        return
        
    print("\nTabelle trovate:")
    for result in results:
        print(f"\nTabella: {result.table_name}")
        print(f"Score: {result.relevance_score}")
        print(f"Descrizione: {result.metadata.description}")
        print("-" * 50)

def print_query_results(results):
    if not results:
        print("Nessuna query trovata")
        return
        
    print("\nQuery simili trovate:")
    for result in results:
        print(f"\nDomanda: {result.question}")
        print(f"Score: {result.score}")
        print(f"SQL: {result.sql_query}")
        print(f"Spiegazione: {result.explanation}")
        print(f"Voti positivi: {result.positive_votes}")
        print("-" * 50)

In [3]:
print("Inizializzazione store con metadati...")
store = initialize_store_with_metadata()

Inizializzazione store con metadati...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [4]:
# Test ricerca tabelle
print("\nTest ricerca tabelle...")
queries = [
    "vendite per regione",
    "dettagli prodotti",
    "catalogo giochi"
]

for query in queries:
    print(f"\nCerca tabelle per: '{query}'")
    results = store.search_similar_tables(query, limit=3)
    print_table_results(results)


Test ricerca tabelle...

Cerca tabelle per: 'vendite per regione'


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Tabelle trovate:

Tabella: region_sales
Score: 0.38547605299358856
Descrizione: The `region_sales` table tracks the sales figures of games across different regions and platforms. Each record associates a specific region and game platform with the number of sales, represented as a numeric value with two decimal places. The table uses foreign keys to link to the `region` and `game_platform` tables, ensuring data consistency and relational integrity.
--------------------------------------------------

Tabella: region
Score: 0.31783389301488907
Descrizione: The "region" table is designed to store information about different geographical regions. It contains two columns: "id," which serves as the primary key and uniquely identifies each region, and "region_name," which holds the name of the region, allowing for up to 50 characters. With four records, this table likely represents a small set of predefined regions used for categorization or reference within a larger database system.
--------

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Tabelle trovate:

Tabella: publisher
Score: 0.18335946419050658
Descrizione: The "publisher" table is designed to store information about publishers, with each record representing a unique publisher. It contains 577 entries and includes two columns: "id," which serves as the primary key to uniquely identify each publisher, and "publisher_name," which holds the name of the publisher up to 100 characters. The table does not have any foreign key relationships, indicating it is likely a standalone entity within the database.
--------------------------------------------------

Tabella: region_sales
Score: 0.17710558376457272
Descrizione: The `region_sales` table tracks the sales figures of games across different regions and platforms. Each record associates a specific region and game platform with the number of sales, represented as a numeric value with two decimal places. The table uses foreign keys to link to the `region` and `game_platform` tables, ensuring data consistency and relation

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Tabelle trovate:

Tabella: game
Score: 0.35903726759430166
Descrizione: The "game" table stores information about various games, with each record representing a unique game identified by the "id" column. It includes the game's name ("game_name") and associates each game with a genre through the "genre_id" column, which references the "id" in the "genre" table. This structure facilitates the organization and categorization of games by genre.
--------------------------------------------------

Tabella: game_publisher
Score: 0.3417370336519434
Descrizione: The `game_publisher` table serves as a junction table that establishes a many-to-many relationship between games and publishers. It contains 11,732 records, each uniquely identified by the `id` primary key, and links a game (`game_id`) to a publisher (`publisher_id`) through foreign keys referencing the `game` and `publisher` tables, respectively. This structure allows for the association of multiple publishers with a single game and v

In [5]:
# Test ricerca query simili
print("\nTest ricerca query simili...")
example_queries = [
    "mostra le vendite totali",
    "quali sono i prodotti più costosi",
    "trova i giochi più venduti"
]

for query in example_queries:
    print(f"\nCerca query simili a: '{query}'")
    results = store.search_similar_queries(query, limit=2)
    print_query_results(results)


Test ricerca query simili...

Cerca query simili a: 'mostra le vendite totali'


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Query simili trovate:

Domanda: quale gioco ha venduto di più in giappone?
Score: 0.4356128182817005
SQL: SELECT g.game_name, SUM(rs.num_sales) AS total_sales FROM video_games.region_sales rs JOIN video_games.game_platform gp ON rs.game_platform_id = gp.id JOIN video_games.game_publisher gpbl ON gp.game_publisher_id = gpbl.id JOIN video_games.game g ON gpbl.game_id = g.id WHERE rs.region_id = 3 GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1
Spiegazione: This query retrieves the game with the highest total sales in Japan. It joins the necessary tables to link games with their sales data in the Japanese region, aggregates the sales for each game, and orders the results to find the game with the highest sales. The result will show the name of the game and its total sales in Japan.
Voti positivi: 1
--------------------------------------------------

Domanda: quali sono i generi più popolari?
Score: 0.3641239519767405
SQL: SELECT g.genre_name, SUM(rs.num_sales) AS total_sales FROM 

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Query simili trovate:

Domanda: quali sono i generi più popolari?
Score: 0.4118532752061207
SQL: SELECT g.genre_name, SUM(rs.num_sales) AS total_sales FROM video_games.genre g JOIN video_games.game ga ON g.id = ga.genre_id JOIN video_games.game_publisher gp ON ga.id = gp.game_id JOIN video_games.game_platform gpl ON gp.id = gpl.game_publisher_id JOIN video_games.region_sales rs ON gpl.id = rs.game_platform_id GROUP BY g.genre_name ORDER BY total_sales DESC LIMIT 5;
Spiegazione: This query calculates the total sales for each game genre by joining the relevant tables and summing up the sales across all regions. It groups the results by genre name and orders them in descending order of total sales, returning the top 5 genres. This will show the most popular genres based on sales data.
Voti positivi: 1
--------------------------------------------------

Domanda: quale gioco ha venduto di più in giappone?
Score: 0.3250764508488201
SQL: SELECT g.game_name, SUM(rs.num_sales) AS total_sales F

INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Query simili trovate:

Domanda: quale gioco ha venduto di più in giappone?
Score: 0.5731292708724081
SQL: SELECT g.game_name, SUM(rs.num_sales) AS total_sales FROM video_games.region_sales rs JOIN video_games.game_platform gp ON rs.game_platform_id = gp.id JOIN video_games.game_publisher gpbl ON gp.game_publisher_id = gpbl.id JOIN video_games.game g ON gpbl.game_id = g.id WHERE rs.region_id = 3 GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1
Spiegazione: This query retrieves the game with the highest total sales in Japan. It joins the necessary tables to link games with their sales data in the Japanese region, aggregates the sales for each game, and orders the results to find the game with the highest sales. The result will show the name of the game and its total sales in Japan.
Voti positivi: 1
--------------------------------------------------

Domanda: quali sono i generi più popolari?
Score: 0.46843466728538
SQL: SELECT g.genre_name, SUM(rs.num_sales) AS total_sales FROM vi

In [8]:
print("\nTest feedback positivo...")
feedback_query = {
    "question": "Qual è il gioco più venduto?",
    "sql_query": "SELECT g.game_name, SUM(rs.num_sales) as total_sales FROM games g JOIN region_sales rs ON g.id = rs.game_id GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1",
    "explanation": "Questa query trova il gioco con il maggior numero di vendite totali sommando le vendite in tutte le regioni"
}

success = store.handle_positive_feedback(
    question=feedback_query["question"],
    sql_query=feedback_query["sql_query"],
    explanation=feedback_query["explanation"]
)
print(f"Feedback salvato: {success}")


Test feedback positivo...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"


Feedback salvato: True


In [9]:
# Verifica che la query sia stata salvata
print("\nVerifica query salvata...")
results = store.search_similar_queries("gioco più venduto", limit=1)
print_query_results(results)


Verifica query salvata...


INFO:httpx:HTTP Request: POST https://api.openai.com/v1/embeddings "HTTP/1.1 200 OK"



Query simili trovate:

Domanda: Qual è il gioco più venduto?
Score: 0.8299179812225611
SQL: SELECT g.game_name, SUM(rs.num_sales) as total_sales FROM games g JOIN region_sales rs ON g.id = rs.game_id GROUP BY g.game_name ORDER BY total_sales DESC LIMIT 1
Spiegazione: Questa query trova il gioco con il maggior numero di vendite totali sommando le vendite in tutte le regioni
Voti positivi: 1
--------------------------------------------------


In [8]:
from src.config.models.vector_store import TablePayload, QueryPayload

test_table_metadata = TablePayload(
    type='table',
    table_name='products',
    description='Table storing product information including name, price and category',
    keywords=['products', 'price', 'category'],
    columns=[
        {'name': 'id', 'type': 'INTEGER', 'nullable': False},
        {'name': 'name', 'type': 'VARCHAR(100)', 'nullable': False},
        {'name': 'price', 'type': 'DECIMAL(10,2)', 'nullable': True},
        {'name': 'category_id', 'type': 'INTEGER', 'nullable': True}
    ],
    primary_keys=['id'],
    foreign_keys=[{'constrained_columns': ['category_id'], 'referred_table': 'categories', 'referred_columns': ['id']}],
    row_count=100,
    importance_score=0.8
)

store.add_table(test_table_metadata)

ERROR:hey-database:Error adding table metadata: 'TablePayload' object has no attribute 'base_metadata'


False