In [1]:
import sys
import os
from pathlib import Path
project_root = Path.cwd().parent
sys.path.append(str(project_root))
api_key = os.getenv('OPENAI_API_KEY')

import logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [2]:
from src.connectors.postgres import PostgresManager

db = PostgresManager(
    host='localhost',
    port='5432',
    user='postgres',
    database='giacomo',
    password='admin',
)

2025-01-25 15:48:49,157 - INFO - Connected to PostgreSQL database


In [3]:
print(db.engine)

Engine(postgresql://postgres:***@localhost:5432/giacomo)


In [4]:
from src.llm_handler.openai_handler import OpenAIHandler
llm = OpenAIHandler(
    api_key=api_key,
    chat_model="gpt-4o-mini"
)

## Tables

In [5]:
from src.metadata.extractors.table.postgres_table_metadata_extractor import PostgresTableMetadataExtractor

metadata_extractor = PostgresTableMetadataExtractor(db, 'northwind')

In [6]:
employess_metadata = metadata_extractor.extract_metadata('employees')
employess_metadata

TableMetadata(name='employees', columns=['employee_id', 'last_name', 'first_name', 'title', 'title_of_courtesy', 'birth_date', 'hire_date', 'address', 'city', 'region', 'postal_code', 'country', 'home_phone', 'extension', 'photo', 'notes', 'reports_to', 'photo_path'], primary_keys=['employee_id'], foreign_keys=[{'constrained_columns': ['reports_to'], 'referred_table': 'employees', 'referred_columns': ['employee_id']}], row_count=9)

In [7]:
from src.metadata.enhancers.table_metadata_enhancer import TableMetadataEnhancer

table_enhancer = TableMetadataEnhancer(llm)

In [8]:
employees_metadata_enhanced = table_enhancer.enhance(employess_metadata)
employees_metadata_enhanced

2025-01-25 15:48:51,193 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


EnhancedTableMetadata(base_metadata=TableMetadata(name='employees', columns=['employee_id', 'last_name', 'first_name', 'title', 'title_of_courtesy', 'birth_date', 'hire_date', 'address', 'city', 'region', 'postal_code', 'country', 'home_phone', 'extension', 'photo', 'notes', 'reports_to', 'photo_path'], primary_keys=['employee_id'], foreign_keys=[{'constrained_columns': ['reports_to'], 'referred_table': 'employees', 'referred_columns': ['employee_id']}], row_count=9), description='Table: employees -> "Contiene i dati anagrafici e professionali dei dipendenti, inclusi nome, cognome, titolo lavorativo, data di nascita e data di assunzione. Include anche informazioni di contatto e riferimenti gerarchici tra i dipendenti."', keywords=['inclusi nome', 'titolo lavorativo', 'dei dipendenti', 'Table', 'data'], importance_score=1.0)

## Columns

In [9]:
from src.metadata.extractors.columns.postgres_column_metadata_extractor import PostgresColumnMetadataExtractor

column_metadata_extractor = PostgresColumnMetadataExtractor(db, 'northwind')

In [10]:
employess_columns_metadata = column_metadata_extractor.extract_metadata('employees')
employess_columns_metadata

[ColumnMetadata(name='employee_id', table='employees', data_type='SMALLINT', nullable=False, is_primary_key=False, is_foreign_key=False, distinct_values=['1', '9', '4', '8', '6', '3', '2', '7', '5'], relationships=[]),
 ColumnMetadata(name='last_name', table='employees', data_type='VARCHAR(20)', nullable=False, is_primary_key=False, is_foreign_key=False, distinct_values=['Callahan', 'Fuller', 'Leverling', 'Peacock', 'Suyama', 'Buchanan', 'Dodsworth', 'Davolio', 'King'], relationships=[]),
 ColumnMetadata(name='first_name', table='employees', data_type='VARCHAR(10)', nullable=False, is_primary_key=False, is_foreign_key=False, distinct_values=['Laura', 'Janet', 'Andrew', 'Nancy', 'Anne', 'Robert', 'Michael', 'Steven', 'Margaret'], relationships=[]),
 ColumnMetadata(name='title', table='employees', data_type='VARCHAR(30)', nullable=True, is_primary_key=False, is_foreign_key=False, distinct_values=['Vice President, Sales', 'Inside Sales Coordinator', 'Sales Representative', 'Sales Manager'

In [11]:
from src.metadata.enhancers.column_metadata_enhancer import ColumnMetadataEnhancer

column_enhancer = ColumnMetadataEnhancer(llm)

In [12]:
employee_id_enhanced = column_enhancer.enhance(employess_columns_metadata[0])
employee_id_enhanced

2025-01-25 15:48:52,119 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


EnhancedColumnMetadata(base_metadata=ColumnMetadata(name='employee_id', table='employees', data_type='SMALLINT', nullable=False, is_primary_key=False, is_foreign_key=False, distinct_values=['1', '9', '4', '8', '6', '3', '2', '7', '5'], relationships=[]), ai_name='placeholder', description='"Identificatore univoco dell\'impiegato, rappresentato da un numero intero compreso tra 1 e 9."', keywords=['Identificatore univoco', "univoco dell'impiegato", 'compreso tra'])