In [1]:
import re
import os
import sys
import csv
import json
import argparse
import psycopg2
import pandas as pd
from psycopg2 import sql
from datetime import datetime
from dotenv import load_dotenv
from psycopg2.extras import RealDictCursor, execute_batch
load_dotenv()

True

# Connect database

In [2]:
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = int(os.getenv("DB_PORT", "5432"))
DB_NAME = os.getenv("DB_NAME", "postgres")
DB_USER = os.getenv("DB_USER", "")
DB_PASSWORD = os.getenv("DB_PASSWORD", "")
CONNECT_TIMEOUT = int(os.getenv("DB_CONNECT_TIMEOUT", "10"))

In [3]:
def get_db_connection():
    """
    Tr·∫£ v·ªÅ psycopg2 connection s·ª≠ d·ª•ng bi·∫øn m√¥i tr∆∞·ªùng t·ª´ .env.
    """
    conn_kwargs = {
        "host": DB_HOST,
        "port": DB_PORT,
        "dbname": DB_NAME,
        "user": DB_USER,
        "password": DB_PASSWORD,
        "connect_timeout": CONNECT_TIMEOUT,
    }
    return psycopg2.connect(cursor_factory=RealDictCursor, **conn_kwargs)

def test_connection(sql_query: str = "SELECT now() AS now"):
    """
    Th·ª±c thi 1 c√¢u SQL test v√† in k·∫øt qu·∫£.
    """
    conn = None
    try:
        conn = get_db_connection()
        with conn.cursor() as cur:
            cur.execute(sql_query)
            rows = cur.fetchall()
            print(f"‚úì Query executed: {sql_query}")
            for row in rows:
                print(row)
    except Exception as e:
        print(f"DB error: {e}")
    finally:
        if conn:
            conn.close()

def list_tables():
    """Li·ªát k√™ c√°c b·∫£ng hi·ªán c√≥ (b·ªè schema h·ªá th·ªëng)."""
    sql_query = """
    SELECT table_schema, table_name
    FROM information_schema.tables
    WHERE table_type='BASE TABLE'
      AND table_schema NOT IN ('pg_catalog','information_schema')
    ORDER BY table_schema, table_name;
    """
    test_connection(sql_query)

def show_table_info(table_name: str, schema: str = "public", do_count: bool = False):
    """
    In th√¥ng tin b·∫£ng:
      - columns + types
      - primary key
      - indexes
      - approx size on disk
      - optional exact row count (can be slow)
    """
    conn = None
    try:
        conn = get_db_connection()
        with conn.cursor() as cur:
            # Columns + types
            cur.execute(
                """
                SELECT column_name, data_type, is_nullable, character_maximum_length
                FROM information_schema.columns
                WHERE table_schema = %s AND table_name = %s
                ORDER BY ordinal_position;
                """,
                (schema, table_name),
            )
            cols = cur.fetchall()
            if not cols:
                print(f"Table '{schema}.{table_name}' not found or has no columns.")
                return
            print(f"\nColumns for {schema}.{table_name}:")
            for c in cols:
                print(f"  - {c['column_name']}: {c['data_type']} nullable={c['is_nullable']} max_len={c['character_maximum_length']}")

            # Primary key
            cur.execute(
                """
                SELECT kcu.column_name
                FROM information_schema.table_constraints tc
                JOIN information_schema.key_column_usage kcu
                  ON tc.constraint_name = kcu.constraint_name
                 AND tc.table_schema = kcu.table_schema
                WHERE tc.table_schema = %s
                  AND tc.table_name = %s
                  AND tc.constraint_type = 'PRIMARY KEY'
                ORDER BY kcu.ordinal_position;
                """,
                (schema, table_name),
            )
            pk = [r["column_name"] for r in cur.fetchall()]
            print(f"\nPrimary key: {pk or 'NONE'}")

            # Indexes (pg_indexes)
            cur.execute(
                """
                SELECT indexname, indexdef
                FROM pg_indexes
                WHERE schemaname = %s AND tablename = %s
                ORDER BY indexname;
                """,
                (schema, table_name),
            )
            idxs = cur.fetchall()
            print(f"\nIndexes ({len(idxs)}):")
            for i in idxs:
                print(f"  - {i['indexname']}: {i['indexdef']}")

            # Size on disk (human)
            cur.execute(
                """
                SELECT
                  pg_size_pretty(pg_total_relation_size(quote_ident(%s) || '.' || quote_ident(%s))) AS total_size,
                  pg_size_pretty(pg_relation_size(quote_ident(%s) || '.' || quote_ident(%s))) AS table_size
                """,
                (schema, table_name, schema, table_name),
            )
            size_info = cur.fetchone()
            print(f"\nSize: total={size_info['total_size']} table={size_info['table_size']}")

            # Approx row estimate from pg_class
            cur.execute(
                """
                SELECT reltuples::BIGINT AS estimate_rows
                FROM pg_class c
                JOIN pg_namespace n ON n.oid = c.relnamespace
                WHERE n.nspname = %s AND c.relname = %s;
                """,
                (schema, table_name),
            )
            est = cur.fetchone()
            print(f"Estimated rows (pg_class.reltuples): {est['estimate_rows'] if est else 'N/A'}")

            # Optional exact count (use sql module to safely format identifiers)
            if do_count:
                print("\nComputing exact COUNT(*) (may be slow)...")
                q = sql.SQL("SELECT count(*) AS exact_count FROM {}.{}").format(
                    sql.Identifier(schema), sql.Identifier(table_name)
                )
                cur.execute(q)
                cnt = cur.fetchone()
                print(f"Exact rows: {cnt['exact_count']}")
    except Exception as e:
        print(f"DB error: {e}")

In [4]:
list_tables()

‚úì Query executed: 
    SELECT table_schema, table_name
    FROM information_schema.tables
    WHERE table_type='BASE TABLE'
      AND table_schema NOT IN ('pg_catalog','information_schema')
    ORDER BY table_schema, table_name;
    
RealDictRow([('table_schema', 'public'), ('table_name', 'Action')])
RealDictRow([('table_schema', 'public'), ('table_name', 'Admin')])
RealDictRow([('table_schema', 'public'), ('table_name', 'Answer')])
RealDictRow([('table_schema', 'public'), ('table_name', 'AnswerTranslate')])
RealDictRow([('table_schema', 'public'), ('table_name', 'Attachment')])
RealDictRow([('table_schema', 'public'), ('table_name', 'AttachmentReference')])
RealDictRow([('table_schema', 'public'), ('table_name', 'BiometricDevice')])
RealDictRow([('table_schema', 'public'), ('table_name', 'Category')])
RealDictRow([('table_schema', 'public'), ('table_name', 'CategoryOpenTime')])
RealDictRow([('table_schema', 'public'), ('table_name', 'CategoryOpenTimeTranslate')])
RealDictRow([('tabl

In [5]:
show_table_info("PoiClean")


Columns for public.PoiClean:
  - id: uuid nullable=NO max_len=None
  - created_at: timestamp without time zone nullable=NO max_len=None
  - updatedAt: timestamp without time zone nullable=NO max_len=None
  - deletedAt: timestamp without time zone nullable=YES max_len=None
  - name: text nullable=NO max_len=None
  - lat: double precision nullable=NO max_len=None
  - address: text nullable=YES max_len=None
  - poi_type: text nullable=YES max_len=None
  - total_reviews: integer nullable=YES max_len=None
  - lon: double precision nullable=NO max_len=None
  - geom: USER-DEFINED nullable=NO max_len=None
  - stay_time: double precision nullable=YES max_len=None
  - avg_stars: double precision nullable=YES max_len=None
  - normalize_stars_reviews: double precision nullable=YES max_len=None
  - open_hours: json nullable=YES max_len=None
  - poi_type_clean: text nullable=YES max_len=None
  - main_subcategory: text nullable=YES max_len=None
  - specialization: text nullable=YES max_len=None
  - 

In [9]:
file_path = os.path.join(os.getcwd(), "../data_csv/data_clean_normalize.csv")
print(f"File path: {file_path}")

File path: c:\Users\nguye\Desktop\vinamo\Main_Branch\Kyanon-support-localtion\scripts\ingest_db\../data_csv/data_clean_normalize.csv


In [13]:
df = pd.read_csv(file_path)
print(f"DataFrame loaded with {len(df)} rows and {len(df.columns)} columns.")

DataFrame loaded with 1454 rows and 19 columns.


In [14]:
df.head(2)

Unnamed: 0,id,name,address,lat,lon,poi_type,avg_stars,total_reviews,crowd,offerings,atmosphere,highlights,dining_options,children,accessibility,popular_for,opening_hours,stay_time,normalize_stars_reviews
0,0f9d2009-9436-46a4-b354-b0261898a39e,The Pub Coffee - Beer & Cocktail,"18A17 TƒÉng Nh∆°n Ph√∫, Ph∆∞·ªõc Long B, Qu·∫≠n 9, Th√†...",10.829481,106.773785,"Cafe,Bar",4.9,181,Groups,"Alcohol, Beer, Cocktails, Coffee, Hard liquor","Casual, Cozy","Great beer selection, Great coffee, Live music...",Table service,,,,"[{'day': 'Monday', 'hours': [{'start': '00:00'...",30,0.755
1,02887955-963a-43ac-b0f7-355d7d7cfacf,Julieta,"C. Sta. Luc√≠a, 9, Distrito Centro, 29008 M√°lag...",36.722011,-4.42178,Cafe,4.3,2053,"College students, Groups, Tourists","Alcohol, Beer, Coffee, Healthy options, Organi...","Casual, Cozy, Trendy","Great coffee, Great dessert, Great tea selection","Breakfast, Brunch, Lunch, Dessert, Seating, Ta...",Good for kids,"Wheelchair accessible entrance, Wheelchair acc...","Breakfast, Good for working on laptop","[{'day': 'Monday', 'hours': [{'start': '08:00'...",30,0.661


# Ingest data v√†o table PoiClean

In [16]:
UPSERT_SQL = """
INSERT INTO public."PoiClean" (
    id,
    name,
    address,
    lat,
    lon,
    geom,
    poi_type,
    avg_stars,
    total_reviews,
    stay_time,
    normalize_stars_reviews,
    created_at,
    "updatedAt",
    "deletedAt"
)
VALUES (
    %s, %s, %s, %s, %s,
    ST_SetSRID(ST_MakePoint(%s, %s), 4326),
    %s, %s, %s, %s, %s,
    NOW(),
    NOW(),
    NULL
)
ON CONFLICT (id) DO UPDATE SET
    name = EXCLUDED.name,
    address = EXCLUDED.address,
    lat = EXCLUDED.lat,
    lon = EXCLUDED.lon,
    geom = EXCLUDED.geom,
    poi_type = EXCLUDED.poi_type,
    avg_stars = EXCLUDED.avg_stars,
    total_reviews = EXCLUDED.total_reviews,
    stay_time = EXCLUDED.stay_time,
    normalize_stars_reviews = EXCLUDED.normalize_stars_reviews,
    "updatedAt" = NOW();
"""

In [None]:
# M·ªü connection
conn = get_db_connection()
cur = conn.cursor()

In [None]:
def import_csv_to_poi_clean(csv_file_path: str, batch_size: int = 500):
    total_rows = 0
    skipped_rows = 0

    try:
        with open(csv_file_path, "r", encoding="utf-8-sig") as file:
            reader = csv.DictReader(file)

            batch_data = []
    
            for row in reader:
                try:
                    # Required fields
                    if not row.get("id") or not row.get("lat") or not row.get("lon"):
                        skipped_rows += 1
                        continue

                    lat = float(row["lat"])
                    lon = float(row["lon"])

                    # Validate coordinates
                    if not (-90 <= lat <= 90 and -180 <= lon <= 180):
                        skipped_rows += 1
                        continue

                    avg_stars = float(row["avg_stars"]) if row.get("avg_stars") else None
                    total_reviews = int(row["total_reviews"]) if row.get("total_reviews") else None
                    stay_time = float(row["stay_time"]) if row.get("stay_time") else None
                    normalize_score = (
                        float(row["normalize_stars_reviews"])
                        if row.get("normalize_stars_reviews")
                        else None
                    )

                    batch_data.append((
                        row["id"],
                        row.get("name"),
                        row.get("address"),
                        lat,
                        lon,
                        lon,   # x for ST_MakePoint
                        lat,   # y for ST_MakePoint
                        row.get("poi_type"),
                        avg_stars,
                        total_reviews,
                        stay_time,
                        normalize_score
                    ))

                    if len(batch_data) >= batch_size:
                        execute_batch(cur, UPSERT_SQL, batch_data)
                        conn.commit()
                        total_rows += len(batch_data)
                        batch_data.clear()
                        print(f"  ‚úì Imported {total_rows} records...")

                except Exception as e:
                    skipped_rows += 1
                    print(f"‚ö† Skip row: {e}")

            if batch_data:
                execute_batch(cur, UPSERT_SQL, batch_data)
                conn.commit()
                total_rows += len(batch_data)

        # cur.close()

        print("\nüéâ IMPORT HO√ÄN T·∫§T")
        print(f"  - Th√†nh c√¥ng: {total_rows}")
        print(f"  - B·ªè qua: {skipped_rows}")

    except Exception as e:
        conn.rollback()
        print(f"‚ùå IMPORT FAILED: {e}")
        raise
    finally:
        conn.close()

def verify_data(limit: int = 5):
    cur.execute(f"""
        SELECT
            id,
            name,
            lat,
            lon,
            poi_type,
            avg_stars,
            total_reviews,
            stay_time,
            normalize_stars_reviews,
            ST_AsText(geom)
        FROM public."PoiClean"
        LIMIT {limit};
    """)

    print(f"\nüß™ SAMPLE DATA ({limit} rows):")
    for r in cur.fetchall():
        print(
            f"- {r[1]} | ({r[2]}, {r[3]}) | "
            f"‚≠ê {r[5]} | reviews={r[6]} | stay={r[7]} | geom={r[9]}"
        )

    cur.close()
    conn.close()

In [None]:
print("üöÄ START IMPORT PoiClean")
print(f"üìÅ CSV: {file_path}")
print(f"üóÑÔ∏è DB: {DB_NAME}@{DB_HOST}\n")

import_csv_to_poi_clean(file_path, batch_size=500)
verify_data(limit=5)

# Ingest th√™m open_hours 

In [11]:
# M·ªü connection
conn = get_db_connection()
cur = conn.cursor()

In [19]:
cur.execute('SELECT * FROM "PoiClean"')
rows = cur.fetchall()
for poi in rows[:5]:
    print(poi)

RealDictRow([('id', '96451ce8-09ec-4b42-89ea-82534456e189'), ('created_at', datetime.datetime(2026, 1, 7, 9, 28, 3, 360928)), ('updatedAt', datetime.datetime(2026, 1, 7, 9, 51, 40, 111841)), ('deletedAt', None), ('name', 'Bar tocata'), ('lat', 36.721288), ('address', 'C. Duque de la Victoria, 6, Distrito Centro, 29015 M√°laga, Spain'), ('poi_type', 'Disco club,Bar,Night club'), ('total_reviews', 1321), ('lon', -4.4198423), ('geom', '0101000020E6100000E1E8CF23EBAD11C0A67F492A535C4240'), ('stay_time', 30.0), ('avg_stars', 4.0), ('normalize_stars_reviews', 0.556), ('open_hours', [{'day': 'Tuesday', 'hours': [{'start': '22:00', 'end': '03:00'}]}, {'day': 'Wednesday', 'hours': [{'start': '22:00', 'end': '03:00'}]}, {'day': 'Thursday', 'hours': [{'start': '22:00', 'end': '03:00'}]}, {'day': 'Friday', 'hours': [{'start': '20:00', 'end': '04:00'}]}, {'day': 'Saturday', 'hours': [{'start': '18:00', 'end': '04:00'}]}, {'day': 'Sunday', 'hours': [{'start': '22:00', 'end': '03:00'}]}]), ('poi_type

In [None]:
# Read the CSV file with opening_hours data
df_hours = pd.read_csv(file_path, encoding="utf-8", usecols=['id', 'opening_hours'])

# Display first few rows to verify
print(f"Total rows in CSV: {len(df_hours)}")
print(df_hours.head())

Total rows in CSV: 1454
                                     id  \
0  0f9d2009-9436-46a4-b354-b0261898a39e   
1  02887955-963a-43ac-b0f7-355d7d7cfacf   
2  622c7643-30e8-4402-9b6c-b8407ff063e2   
3  4f06908d-e9fa-4f6a-b1ae-c7d8882e2edf   
4  279dfce3-c227-4b58-b4ed-09197327a32a   

                                       opening_hours  
0  [{'day': 'Monday', 'hours': [{'start': '00:00'...  
1  [{'day': 'Monday', 'hours': [{'start': '08:00'...  
2  [{'day': 'Tuesday', 'hours': [{'start': '11:00...  
3  [{'day': 'Tuesday', 'hours': [{'start': '17:30...  
4  [{'day': 'Monday', 'hours': [{'start': '10:00'...  


In [None]:
# Update open_hours in database from CSV data
updated_count = 0
skipped_count = 0
error_count = 0

for idx, row in df_hours.iterrows():
    poi_id = row['id']
    opening_hours = row['opening_hours']
    
    # Skip if opening_hours is null or empty
    if pd.isna(opening_hours) or opening_hours == '':
        skipped_count += 1
        continue
    
    try:
        # convert Python-like string -> valid JSON
        opening_hours_json = json.dumps(ast.literal_eval(opening_hours))

        cur.execute(
            'UPDATE "PoiClean" SET open_hours = %s WHERE id = %s',
            (opening_hours_json, poi_id)
        )
        updated_count += 1
        
        # Print progress every 100 records
        if (updated_count + skipped_count) % 100 == 0:
            print(f"Processed: {updated_count + skipped_count} records (Updated: {updated_count}, Skipped: {skipped_count})")
    except Exception as e:
        error_count += 1
        print(f"Error updating POI {poi_id}: {e}")

# Commit the changes
conn.commit()
print(f"\n‚úì Update complete!")
print(f"Total Updated: {updated_count}")
print(f"Total Skipped: {skipped_count}")
print(f"Total Errors: {error_count}")

In [None]:
# Verify the update by checking a few POIs
verify_query = 'SELECT id, name, open_hours FROM "PoiClean" WHERE open_hours IS NOT NULL LIMIT 5'
cur.execute(verify_query)
results = cur.fetchall()

print("Sample of updated records:")
for poi in results:
    print(f"\nPOI: {poi['name']} (ID: {poi['id']})")
    print(f"Opening Hours: {poi['open_hours'][:100]}...")  # Print first 100 chars

# Ingest th√™m t·ª´ json

In [None]:
file_path = os.path.join(os.getcwd(), "../data/results.json")

In [None]:
# ƒê·ªçc file JSON
with open(file_path, "r", encoding="utf-8") as f:
    data = json.load(f)
print(type(data))
print(f"Total POI in JSON: {len(data)}")
print(data[3].get("main_subcategory"))  # In ra 2 POI ƒë·∫ßu ti√™n ƒë·ªÉ ki·ªÉm tra

In [None]:
# Update fields in database
updated_count = 0
skipped_count = 0
error_count = 0

for poi in data:
    poi_id = poi.get('id')
    poi_type_clean = poi.get('poi_type_new')        # c√≥ th·ªÉ None
    main_subcategory = poi.get('main_subcategory')  # c√≥ th·ªÉ None
    specialization = poi.get('specialization')      # c√≥ th·ªÉ None

    try:
        cur.execute(
            '''UPDATE "PoiClean"
               SET poi_type_clean = %s,
                   main_subcategory = %s,
                   specialization = %s
               WHERE id = %s''',
            (poi_type_clean, main_subcategory, specialization, poi_id)
        )

        updated_count += 1

        # Print progress every 100 records
        if (updated_count + skipped_count) % 100 == 0:
            print(f"Processed: {updated_count + skipped_count} records "
                  f"(Updated: {updated_count}, Skipped: {skipped_count})")

    except Exception as e:
        error_count += 1
        print(f"Error updating POI {poi_id}: {e}")

# Commit
conn.commit()

print("\n‚úì Update complete!")
print(f"Total Updated: {updated_count}")
print(f"Total Skipped: {skipped_count}")
print(f"Total Errors: {error_count}")

# Update poi_type_clean from "Cafe & Bakery" to "Cafe"

In [None]:
# Update poi_type_clean from "Cafe & Bakery" to "Cafe"

try:
    cur.execute(
        '''
        UPDATE "PoiClean"
        SET poi_type_clean = %s
        WHERE poi_type_clean = %s
        ''',
        ("History museum", "Local history museum")
    )

    affected_rows = cur.rowcount
    conn.commit()

    print(f"‚úì Updated {affected_rows} records")

except Exception as e:
    conn.rollback()
    print(f"‚úó Error updating poi_type_clean: {e}")


# ingest th√™m stay_time v√† travel_type 

In [6]:
file_path = os.path.join(os.getcwd(), "../generate_description/test_stay_time/result.json")
print(f"File path: {file_path}")

File path: c:\Users\nguye\Desktop\vinamo\Main_Branch\Kyanon-support-localtion\scripts\ingest_db\../generate_description/test_stay_time/result.json


In [None]:
# ƒê·ªçc file JSON
with open(file_path, "r", encoding="utf-8") as f:
    data = json.load(f)
print(type(data))
print(f"Total POI in JSON: {len(data)}")
∆°print(data[3].get("main_subcategory"))  # In ra 2 POI ƒë·∫ßu ti√™n ƒë·ªÉ ki·ªÉm tra

<class 'list'>
Total POI in JSON: 1454
None


In [9]:
for poi in data[:3]:
    poi_id = poi.get('id')
    stay_time = poi.get('stay_time')  # c√≥ th·ªÉ None
    suitability = poi.get('suitability')  # c√≥ th·ªÉ None
    print(f"POI ID: {poi_id}, Stay Time: {stay_time}, Suitability: {suitability}")
    print(type(stay_time), type(suitability))

POI ID: 0f9d2009-9436-46a4-b354-b0261898a39e, Stay Time: 120, Suitability: {'Solo': 25, 'Couple': 40, 'Friends': 80, 'Family with kids': 10, 'Business traveler / free time': 30}
<class 'int'> <class 'dict'>
POI ID: 02887955-963a-43ac-b0f7-355d7d7cfacf, Stay Time: 90, Suitability: {'Solo': 70, 'Couple': 60, 'Friends': 50, 'Family with kids': 60, 'Business traveler / free time': 80}
<class 'int'> <class 'dict'>
POI ID: 622c7643-30e8-4402-9b6c-b8407ff063e2, Stay Time: 75, Suitability: {'Solo': 60, 'Couple': 70, 'Friends': 75, 'Family with kids': 80, 'Business traveler / free time': 50}
<class 'int'> <class 'dict'>


In [13]:
import json
from psycopg2.extras import Json

In [14]:
# Update fields in database
updated_count = 0
skipped_count = 0
error_count = 0

for poi in data:
    poi_id = poi.get('id')
    stay_time = poi.get('stay_time')  # c√≥ th·ªÉ None
    travel_type = poi.get('suitability')  # c√≥ th·ªÉ None

    try:
        cur.execute(
            '''UPDATE "PoiClean"
               SET stay_time = %s,
                   travel_type = %s
               WHERE id = %s''',
             (stay_time, Json(travel_type), poi_id)
        )

        updated_count += 1

        # Print progress every 100 records
        if (updated_count + skipped_count) % 100 == 0:
            print(f"Processed: {updated_count + skipped_count} records "
                  f"(Updated: {updated_count}, Skipped: {skipped_count})")

    except Exception as e:
        error_count += 1
        print(f"Error updating POI {poi_id}: {e}")

# Commit
conn.commit()

print("\n‚úì Update complete!")
print(f"Total Updated: {updated_count}")
print(f"Total Skipped: {skipped_count}")
print(f"Total Errors: {error_count}")

Processed: 100 records (Updated: 100, Skipped: 0)
Processed: 200 records (Updated: 200, Skipped: 0)
Processed: 300 records (Updated: 300, Skipped: 0)
Processed: 400 records (Updated: 400, Skipped: 0)
Processed: 500 records (Updated: 500, Skipped: 0)
Processed: 600 records (Updated: 600, Skipped: 0)
Processed: 700 records (Updated: 700, Skipped: 0)
Processed: 800 records (Updated: 800, Skipped: 0)
Processed: 900 records (Updated: 900, Skipped: 0)
Processed: 1000 records (Updated: 1000, Skipped: 0)
Processed: 1100 records (Updated: 1100, Skipped: 0)
Processed: 1200 records (Updated: 1200, Skipped: 0)
Processed: 1300 records (Updated: 1300, Skipped: 0)
Processed: 1400 records (Updated: 1400, Skipped: 0)

‚úì Update complete!
Total Updated: 1454
Total Skipped: 0
Total Errors: 0


In [15]:
cur.execute('SELECT * FROM "PoiClean"')
rows = cur.fetchall()
for poi in rows[:5]:
    print(poi)

RealDictRow([('id', '96451ce8-09ec-4b42-89ea-82534456e189'), ('created_at', datetime.datetime(2026, 1, 7, 9, 28, 3, 360928)), ('updatedAt', datetime.datetime(2026, 1, 7, 9, 51, 40, 111841)), ('deletedAt', None), ('name', 'Bar tocata'), ('lat', 36.721288), ('address', 'C. Duque de la Victoria, 6, Distrito Centro, 29015 M√°laga, Spain'), ('poi_type', 'Disco club,Bar,Night club'), ('total_reviews', 1321), ('lon', -4.4198423), ('geom', '0101000020E6100000E1E8CF23EBAD11C0A67F492A535C4240'), ('stay_time', 120.0), ('avg_stars', 4.0), ('normalize_stars_reviews', 0.556), ('open_hours', [{'day': 'Tuesday', 'hours': [{'start': '22:00', 'end': '03:00'}]}, {'day': 'Wednesday', 'hours': [{'start': '22:00', 'end': '03:00'}]}, {'day': 'Thursday', 'hours': [{'start': '22:00', 'end': '03:00'}]}, {'day': 'Friday', 'hours': [{'start': '20:00', 'end': '04:00'}]}, {'day': 'Saturday', 'hours': [{'start': '18:00', 'end': '04:00'}]}, {'day': 'Sunday', 'hours': [{'start': '22:00', 'end': '03:00'}]}]), ('poi_typ