In [None]:
from pathlib import Path
import sys

PROJECT_ROOT = Path.cwd().resolve().parent
if str(PROJECT_ROOT) not in sys.path:
    sys.path.append(str(PROJECT_ROOT))

In [None]:
import pandas as pd
from sqlalchemy import create_engine

from database.config.database import get_connection_string
from spark_jobs.config import GOLD_DIR

In [None]:
print('Starting GOLD load into PostgreSQL...')
engine = create_engine(get_connection_string())
gold_dir = Path(GOLD_DIR)

parquet_files = list(gold_dir.glob('*.parquet'))
if not parquet_files:
    print(f'No parquet files found in {gold_dir}')

files_loaded = 0
errors = []

for file_path in parquet_files:
    try:
        df = pd.read_parquet(file_path)
        table_name = file_path.stem
        print(f'Loading {file_path.name} into table {table_name}...')
        df.to_sql(name=table_name, con=engine, if_exists='append', index=False)
        files_loaded += 1
    except Exception as exc:  # noqa: BLE001
        print(f'Error loading {file_path.name}: {exc}')
        errors.append(file_path.name)

print('--- Load summary ---')
print(f'Tables loaded: {files_loaded}')
if errors:
    print('Failures:')
    for name in errors:
        print(f' - {name}')
else:
    print('All parquet files loaded successfully.')