In [1]:
import re
import os
import sys
import csv
import json
import argparse
import psycopg2
import pandas as pd
from psycopg2 import sql
from datetime import datetime
from dotenv import load_dotenv
from psycopg2.extras import RealDictCursor
load_dotenv()

True

# Connect database

In [6]:
DB_HOST = os.getenv("DB_HOST", "localhost")
DB_PORT = int(os.getenv("DB_PORT", "5432"))
DB_NAME = os.getenv("DB_NAME", "postgres")
DB_USER = os.getenv("DB_USER", "")
DB_PASSWORD = os.getenv("DB_PASSWORD", "")
CONNECT_TIMEOUT = int(os.getenv("DB_CONNECT_TIMEOUT", "10"))

In [7]:
def get_db_connection():
    """
    Trả về psycopg2 connection sử dụng biến môi trường từ .env.
    """
    conn_kwargs = {
        "host": DB_HOST,
        "port": DB_PORT,
        "dbname": DB_NAME,
        "user": DB_USER,
        "password": DB_PASSWORD,
        "connect_timeout": CONNECT_TIMEOUT,
    }
    return psycopg2.connect(cursor_factory=RealDictCursor, **conn_kwargs)

def test_connection(sql_query: str = "SELECT now() AS now"):
    """
    Thực thi 1 câu SQL test và in kết quả.
    """
    conn = None
    try:
        conn = get_db_connection()
        with conn.cursor() as cur:
            cur.execute(sql_query)
            rows = cur.fetchall()
            print(f"✓ Query executed: {sql_query}")
            for row in rows:
                print(row)
    except Exception as e:
        print(f"DB error: {e}")
    finally:
        if conn:
            conn.close()

def list_tables():
    """Liệt kê các bảng hiện có (bỏ schema hệ thống)."""
    sql_query = """
    SELECT table_schema, table_name
    FROM information_schema.tables
    WHERE table_type='BASE TABLE'
      AND table_schema NOT IN ('pg_catalog','information_schema')
    ORDER BY table_schema, table_name;
    """
    test_connection(sql_query)

def show_table_info(table_name: str, schema: str = "public", do_count: bool = False):
    """
    In thông tin bảng:
      - columns + types
      - primary key
      - indexes
      - approx size on disk
      - optional exact row count (can be slow)
    """
    conn = None
    try:
        conn = get_db_connection()
        with conn.cursor() as cur:
            # Columns + types
            cur.execute(
                """
                SELECT column_name, data_type, is_nullable, character_maximum_length
                FROM information_schema.columns
                WHERE table_schema = %s AND table_name = %s
                ORDER BY ordinal_position;
                """,
                (schema, table_name),
            )
            cols = cur.fetchall()
            if not cols:
                print(f"Table '{schema}.{table_name}' not found or has no columns.")
                return
            print(f"\nColumns for {schema}.{table_name}:")
            for c in cols:
                print(f"  - {c['column_name']}: {c['data_type']} nullable={c['is_nullable']} max_len={c['character_maximum_length']}")

            # Primary key
            cur.execute(
                """
                SELECT kcu.column_name
                FROM information_schema.table_constraints tc
                JOIN information_schema.key_column_usage kcu
                  ON tc.constraint_name = kcu.constraint_name
                 AND tc.table_schema = kcu.table_schema
                WHERE tc.table_schema = %s
                  AND tc.table_name = %s
                  AND tc.constraint_type = 'PRIMARY KEY'
                ORDER BY kcu.ordinal_position;
                """,
                (schema, table_name),
            )
            pk = [r["column_name"] for r in cur.fetchall()]
            print(f"\nPrimary key: {pk or 'NONE'}")

            # Indexes (pg_indexes)
            cur.execute(
                """
                SELECT indexname, indexdef
                FROM pg_indexes
                WHERE schemaname = %s AND tablename = %s
                ORDER BY indexname;
                """,
                (schema, table_name),
            )
            idxs = cur.fetchall()
            print(f"\nIndexes ({len(idxs)}):")
            for i in idxs:
                print(f"  - {i['indexname']}: {i['indexdef']}")

            # Size on disk (human)
            cur.execute(
                """
                SELECT
                  pg_size_pretty(pg_total_relation_size(quote_ident(%s) || '.' || quote_ident(%s))) AS total_size,
                  pg_size_pretty(pg_relation_size(quote_ident(%s) || '.' || quote_ident(%s))) AS table_size
                """,
                (schema, table_name, schema, table_name),
            )
            size_info = cur.fetchone()
            print(f"\nSize: total={size_info['total_size']} table={size_info['table_size']}")

            # Approx row estimate from pg_class
            cur.execute(
                """
                SELECT reltuples::BIGINT AS estimate_rows
                FROM pg_class c
                JOIN pg_namespace n ON n.oid = c.relnamespace
                WHERE n.nspname = %s AND c.relname = %s;
                """,
                (schema, table_name),
            )
            est = cur.fetchone()
            print(f"Estimated rows (pg_class.reltuples): {est['estimate_rows'] if est else 'N/A'}")

            # Optional exact count (use sql module to safely format identifiers)
            if do_count:
                print("\nComputing exact COUNT(*) (may be slow)...")
                q = sql.SQL("SELECT count(*) AS exact_count FROM {}.{}").format(
                    sql.Identifier(schema), sql.Identifier(table_name)
                )
                cur.execute(q)
                cnt = cur.fetchone()
                print(f"Exact rows: {cnt['exact_count']}")
    except Exception as e:
        print(f"DB error: {e}")

In [8]:
list_tables()

✓ Query executed: 
    SELECT table_schema, table_name
    FROM information_schema.tables
    WHERE table_type='BASE TABLE'
      AND table_schema NOT IN ('pg_catalog','information_schema')
    ORDER BY table_schema, table_name;
    
RealDictRow([('table_schema', 'public'), ('table_name', 'Action')])
RealDictRow([('table_schema', 'public'), ('table_name', 'Admin')])
RealDictRow([('table_schema', 'public'), ('table_name', 'Answer')])
RealDictRow([('table_schema', 'public'), ('table_name', 'AnswerTranslate')])
RealDictRow([('table_schema', 'public'), ('table_name', 'Attachment')])
RealDictRow([('table_schema', 'public'), ('table_name', 'AttachmentReference')])
RealDictRow([('table_schema', 'public'), ('table_name', 'BiometricDevice')])
RealDictRow([('table_schema', 'public'), ('table_name', 'Category')])
RealDictRow([('table_schema', 'public'), ('table_name', 'CategoryOpenTime')])
RealDictRow([('table_schema', 'public'), ('table_name', 'CategoryOpenTimeTranslate')])
RealDictRow([('table_

In [9]:
show_table_info("Poi")


Columns for public.Poi:
  - id: uuid nullable=NO max_len=None
  - created_at: timestamp without time zone nullable=NO max_len=None
  - updatedAt: timestamp without time zone nullable=NO max_len=None
  - deletedAt: timestamp without time zone nullable=YES max_len=None
  - cityId: uuid nullable=NO max_len=None
  - source: character varying nullable=NO max_len=None
  - content: text nullable=NO max_len=None
  - raw_data: jsonb nullable=YES max_len=None
  - metadata: jsonb nullable=NO max_len=None
  - placeId: character varying nullable=YES max_len=None
  - audioUrl: character varying nullable=YES max_len=None
  - location: USER-DEFINED nullable=YES max_len=None

Primary key: NONE

Indexes (1):
  - PK_101b759c3c5200a5e040a6874a0: CREATE UNIQUE INDEX "PK_101b759c3c5200a5e040a6874a0" ON public."Poi" USING btree (id)

Size: total=18 MB table=2424 kB
Estimated rows (pg_class.reltuples): 1430


In [10]:
show_table_info("UserItinerary")



Columns for public.UserItinerary:
  - id: uuid nullable=NO max_len=None
  - created_at: timestamp without time zone nullable=NO max_len=None
  - updatedAt: timestamp without time zone nullable=NO max_len=None
  - deletedAt: timestamp without time zone nullable=YES max_len=None
  - userId: uuid nullable=NO max_len=None
  - title: character varying nullable=YES max_len=None
  - hour: integer nullable=NO max_len=None
  - cityId: uuid nullable=NO max_len=None
  - status: character varying nullable=NO max_len=None
  - finish_at: timestamp without time zone nullable=YES max_len=None
  - review_id: uuid nullable=YES max_len=None

Primary key: NONE

Indexes (2):
  - PK_ca70f5430a6f26bcdc08c2f4d10: CREATE UNIQUE INDEX "PK_ca70f5430a6f26bcdc08c2f4d10" ON public."UserItinerary" USING btree (id)
  - UQ_31dd318df1ba4c3cce74fdb9fdc: CREATE UNIQUE INDEX "UQ_31dd318df1ba4c3cce74fdb9fdc" ON public."UserItinerary" USING btree (review_id)

Size: total=560 kB table=216 kB
Estimated rows (pg_class.reltupl

In [11]:
show_table_info("UserItineraryPoi")


Columns for public.UserItineraryPoi:
  - id: uuid nullable=NO max_len=None
  - created_at: timestamp without time zone nullable=NO max_len=None
  - updatedAt: timestamp without time zone nullable=NO max_len=None
  - deletedAt: timestamp without time zone nullable=YES max_len=None
  - user_itinerary_id: uuid nullable=NO max_len=None
  - poi_id: uuid nullable=NO max_len=None
  - status: character varying nullable=NO max_len=None
  - start_time: character varying nullable=YES max_len=20
  - end_time: character varying nullable=YES max_len=20
  - duration: integer nullable=YES max_len=None

Primary key: NONE

Indexes (1):
  - PK_1599883d5bc895ed21840c8a5fb: CREATE UNIQUE INDEX "PK_1599883d5bc895ed21840c8a5fb" ON public."UserItineraryPoi" USING btree (id)

Size: total=736 kB table=528 kB
Estimated rows (pg_class.reltuples): 4155


In [14]:
# Mở connection
conn = get_db_connection()
cur = conn.cursor()

In [15]:
# 7ee50a22-cd1a-4d5a-bb06-fcc7ced43eb7
cur.execute('SELECT * FROM "UserItinerary"')
rows = cur.fetchall()
a = []
for poi in rows:
    if poi.get("review_id") is not None:
        # print(poi.get("review_id"))
        a.append(poi.get("review_id"))
        # print(poi.get("review_id"))
print(len(a))

245


In [17]:
itinerary_id = "816d05bf-5b65-49d2-9087-77c4c83be655"

cur.execute(
    'SELECT id FROM "UserItinerary" WHERE "userId" = %s',
    (itinerary_id,)
)

row = cur.fetchone()
print(row)


RealDictRow([('id', '1916e657-ed24-4fe8-a9b6-b780225842cf')])


In [11]:
# 7ee50a22-cd1a-4d5a-bb06-fcc7ced43eb7
cur.execute('SELECT * FROM "UserItineraryPoi"')
rows = cur.fetchall()
# poi = []
for poi in rows[:5]:
    print(poi)

RealDictRow([('id', 'b7d2dd5e-a271-4a8e-879c-5850d263d044'), ('created_at', datetime.datetime(2025, 10, 28, 9, 31, 45, 957429)), ('updatedAt', datetime.datetime(2025, 10, 28, 9, 31, 45, 957429)), ('deletedAt', None), ('user_itinerary_id', '6cfa9f0c-1fd8-44f0-ab43-994bb1ff0826'), ('poi_id', '11702469-0605-40ea-9089-7502d88ec25d'), ('status', 'pending'), ('start_time', None), ('end_time', None), ('duration', None)])
RealDictRow([('id', 'c50318f8-99a5-4bc4-9771-dcac3707efba'), ('created_at', datetime.datetime(2025, 10, 28, 9, 31, 45, 957429)), ('updatedAt', datetime.datetime(2025, 10, 28, 9, 31, 45, 957429)), ('deletedAt', None), ('user_itinerary_id', '6cfa9f0c-1fd8-44f0-ab43-994bb1ff0826'), ('poi_id', '06b5946e-1574-4a05-8cad-516a13855855'), ('status', 'pending'), ('start_time', None), ('end_time', None), ('duration', None)])
RealDictRow([('id', 'e99e582c-8e83-4be2-a35c-3c2c96719465'), ('created_at', datetime.datetime(2025, 10, 28, 10, 23, 11, 499855)), ('updatedAt', datetime.datetime(20

nhập userid để truy xuất trong UserItinerary -> lấy id trong UserItinerary -> truy vấn tới UserItineraryPoi -> lấy ra poi_id