 ## SQLModel Notebook

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
DATABASE_URL = os.getenv("DATABASE_URL")
import uuid
from uuid import uuid4
from datetime import datetime
from sqlmodel import SQLModel, Field, create_engine, Session, select, text, Column, DateTime
from sqlalchemy.dialects.postgresql import JSONB, UUID
from sqlalchemy.schema import CreateSchema
engine = create_engine(DATABASE_URL, echo=True)

 ## getting all schemas

In [None]:
with Session(engine) as session:
    result = session.exec(text(
        "SELECT schema_name FROM information_schema.schemata;"
    ))
    for row in result:
        print(row)

2025-11-06 09:46:26,631 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2025-11-06 09:46:26,632 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-11-06 09:46:26,632 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-11-06 09:46:26,820 INFO sqlalchemy.engine.Engine select current_schema()
2025-11-06 09:46:26,822 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-11-06 09:46:26,997 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2025-11-06 09:46:26,999 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-11-06 09:46:27,439 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:27,441 INFO sqlalchemy.engine.Engine SELECT schema_name FROM information_schema.schemata;
2025-11-06 09:46:27,444 INFO sqlalchemy.engine.Engine [generated in 0.00280s] {}
('information_schema',)
('pg_catalog',)
('pg_toast',)
('public',)
('poc',)
2025-11-06 09:46:27,620 INFO sqlalchemy.engine.Engine ROLLBACK


 ## getting all tables

In [None]:
with Session(engine) as session:
    for table in SQLModel.metadata.tables:
        print(table)

 ## create schema

In [None]:
schema_name = "poc"
with engine.connect() as conn:
    conn.execute(CreateSchema(schema_name, if_not_exists=True))
    conn.commit()
SQLModel.metadata.create_all(engine)

2025-11-06 09:46:27,765 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:27,767 INFO sqlalchemy.engine.Engine CREATE SCHEMA IF NOT EXISTS poc
2025-11-06 09:46:27,768 INFO sqlalchemy.engine.Engine [no key 0.00316s] {}
2025-11-06 09:46:27,767 INFO sqlalchemy.engine.Engine CREATE SCHEMA IF NOT EXISTS poc
2025-11-06 09:46:27,768 INFO sqlalchemy.engine.Engine [no key 0.00316s] {}
2025-11-06 09:46:27,956 INFO sqlalchemy.engine.Engine COMMIT
2025-11-06 09:46:28,055 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:28,057 INFO sqlalchemy.engine.Engine COMMIT


 ## getting all schemas

In [None]:
with Session(engine) as session:
    result = session.exec(text(
        "SELECT schema_name FROM information_schema.schemata;"
    ))
    for row in result:
        print(row)

2025-11-06 09:46:28,092 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:28,094 INFO sqlalchemy.engine.Engine SELECT schema_name FROM information_schema.schemata;
2025-11-06 09:46:28,095 INFO sqlalchemy.engine.Engine [cached since 0.6547s ago] {}
2025-11-06 09:46:28,094 INFO sqlalchemy.engine.Engine SELECT schema_name FROM information_schema.schemata;
2025-11-06 09:46:28,095 INFO sqlalchemy.engine.Engine [cached since 0.6547s ago] {}
('information_schema',)
('pg_catalog',)
('pg_toast',)
('public',)
('poc',)
2025-11-06 09:46:28,271 INFO sqlalchemy.engine.Engine ROLLBACK


In [None]:
# ##DDL - Data Definition Language
class State(SQLModel, table=True):
    __tablename__ = "states"
    __table_args__ = (
        {"schema": schema_name},
    )
    state_id: uuid.UUID = Field(
        default_factory=uuid4,
        sa_column=Column(
            "state_id", 
            UUID(as_uuid=True), 
            primary_key=True, 
            nullable=False,
            comment="Primary key for the states table"
        )
    )
    canonical_schema: dict = Field(
        sa_column=Column(
            "canonical_schema", 
            JSONB, 
            nullable=False,
            comment="A JSONB column to store the canonical schema"
        )
    )
    created_at: datetime | None = Field(
        default=None,
        sa_column=Column(
            "created_at",
            DateTime(timezone=True),
            nullable=False,
            server_default=text("now()"),
            comment="Insertion timestamp."
        )
    )
    updated_at: datetime | None = Field(
        default=None,
        sa_column=Column(
            "updated_at",
            DateTime(timezone=True),
            nullable=False,
            server_default=text("now()"),
            comment="Update timestamp."
        )
    )

 ## create tables

In [None]:
SQLModel.metadata.create_all(engine)

2025-11-06 09:46:28,414 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:28,417 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s::VARCHAR AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s::VARCHAR, %(param_2)s::VARCHAR, %(param_3)s::VARCHAR, %(param_4)s::VARCHAR, %(param_5)s::VARCHAR]) AND pg_catalog.pg_namespace.nspname = %(nspname_1)s::VARCHAR
2025-11-06 09:46:28,417 INFO sqlalchemy.engine.Engine [generated in 0.00051s] {'table_name': 'states', 'param_1': 'r', 'param_2': 'p', 'param_3': 'f', 'param_4': 'v', 'param_5': 'm', 'nspname_1': 'poc'}
2025-11-06 09:46:28,417 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.r

 ## getting all schemas

In [None]:
with Session(engine) as session:
    result = session.exec(text(
        "SELECT schema_name FROM information_schema.schemata;"
    ))
    for row in result:
        print(row)

2025-11-06 09:46:29,166 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:29,167 INFO sqlalchemy.engine.Engine SELECT schema_name FROM information_schema.schemata;
2025-11-06 09:46:29,167 INFO sqlalchemy.engine.Engine [cached since 1.727s ago] {}
2025-11-06 09:46:29,167 INFO sqlalchemy.engine.Engine SELECT schema_name FROM information_schema.schemata;
2025-11-06 09:46:29,167 INFO sqlalchemy.engine.Engine [cached since 1.727s ago] {}
('information_schema',)
('pg_catalog',)
('pg_toast',)
('public',)
('poc',)
2025-11-06 09:46:29,341 INFO sqlalchemy.engine.Engine ROLLBACK


 ## getting all tables

In [None]:
with Session(engine) as session:
    for table in SQLModel.metadata.tables:
        print(table)

poc.states


 ## rollback - drop tables

In [None]:
SQLModel.metadata.drop_all(engine, tables=[State.__table__])
t = SQLModel.metadata.tables.get("poc.states")
if t is not None:
    SQLModel.metadata.remove(t) # affects only in-memory metadata
print(list(SQLModel.metadata.tables.keys()))

2025-11-06 09:46:29,465 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:29,467 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s::VARCHAR AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s::VARCHAR, %(param_2)s::VARCHAR, %(param_3)s::VARCHAR, %(param_4)s::VARCHAR, %(param_5)s::VARCHAR]) AND pg_catalog.pg_namespace.nspname = %(nspname_1)s::VARCHAR
2025-11-06 09:46:29,467 INFO sqlalchemy.engine.Engine [cached since 1.05s ago] {'table_name': 'states', 'param_1': 'r', 'param_2': 'p', 'param_3': 'f', 'param_4': 'v', 'param_5': 'm', 'nspname_1': 'poc'}
2025-11-06 09:46:29,467 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.

 ## verify drop tables

In [None]:
with Session(engine) as session:
    try:
        result = session.exec(text(
            "SELECT * FROM poc.states;"
        ))
    except Exception as e:
        print(e)

2025-11-06 09:46:29,849 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:29,850 INFO sqlalchemy.engine.Engine SELECT * FROM poc.states;
2025-11-06 09:46:29,852 INFO sqlalchemy.engine.Engine [generated in 0.00212s] {}
2025-11-06 09:46:29,850 INFO sqlalchemy.engine.Engine SELECT * FROM poc.states;
2025-11-06 09:46:29,852 INFO sqlalchemy.engine.Engine [generated in 0.00212s] {}
(psycopg.errors.UndefinedTable) relation "poc.states" does not exist
LINE 1: SELECT * FROM poc.states;
                      ^
[SQL: SELECT * FROM poc.states;]
(Background on this error at: https://sqlalche.me/e/20/f405)
2025-11-06 09:46:30,026 INFO sqlalchemy.engine.Engine ROLLBACK


 ## getting all schemas

In [None]:
with Session(engine) as session:
    result = session.exec(text(
        "SELECT schema_name FROM information_schema.schemata;"
    ))
    for row in result:
        print(row)

2025-11-06 09:46:30,129 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:30,131 INFO sqlalchemy.engine.Engine SELECT schema_name FROM information_schema.schemata;
2025-11-06 09:46:30,133 INFO sqlalchemy.engine.Engine [cached since 2.692s ago] {}
2025-11-06 09:46:30,131 INFO sqlalchemy.engine.Engine SELECT schema_name FROM information_schema.schemata;
2025-11-06 09:46:30,133 INFO sqlalchemy.engine.Engine [cached since 2.692s ago] {}
('information_schema',)
('pg_catalog',)
('pg_toast',)
('public',)
('poc',)
2025-11-06 09:46:30,312 INFO sqlalchemy.engine.Engine ROLLBACK


 ## getting all tables

In [None]:
with Session(engine) as session:
    for table in SQLModel.metadata.tables:
        print(table)

 # addinng records
 ## create models

In [None]:
class State(SQLModel, table=True):
    __tablename__ = "states"
    __table_args__ = (
        {"schema": schema_name},
    )
    state_id: uuid.UUID = Field(
        default_factory=uuid4,
        sa_column=Column(
            "state_id", 
            UUID(as_uuid=True), 
            primary_key=True, 
            nullable=False,
            comment="Primary key for the states table"
        )
    )
    canonical_schema: dict = Field(
        sa_column=Column(
            "canonical_schema", 
            JSONB, 
            nullable=False,
            comment="A JSONB column to store the canonical schema"
        )
    )
    created_at: datetime | None = Field(
        default=None,
        sa_column=Column(
            "created_at",
            DateTime(timezone=True),
            nullable=False,
            server_default=text("now()"),
            comment="Insertion timestamp."
        )
    )
    updated_at: datetime | None = Field(
        default=None,
        sa_column=Column(
            "updated_at",
            DateTime(timezone=True),
            nullable=False,
            server_default=text("now()"),
            comment="Update timestamp."
        )
    )

  DeclarativeMeta.__init__(cls, classname, bases, dict_, **kw)


 ## recreate table

In [None]:
SQLModel.metadata.create_all(engine)

2025-11-06 09:46:30,474 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:30,475 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class.relname = %(table_name)s::VARCHAR AND pg_catalog.pg_class.relkind = ANY (ARRAY[%(param_1)s::VARCHAR, %(param_2)s::VARCHAR, %(param_3)s::VARCHAR, %(param_4)s::VARCHAR, %(param_5)s::VARCHAR]) AND pg_catalog.pg_namespace.nspname = %(nspname_1)s::VARCHAR
2025-11-06 09:46:30,476 INFO sqlalchemy.engine.Engine [cached since 2.059s ago] {'table_name': 'states', 'param_1': 'r', 'param_2': 'p', 'param_3': 'f', 'param_4': 'v', 'param_5': 'm', 'nspname_1': 'poc'}
2025-11-06 09:46:30,475 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace.oid = pg_catalog.pg_class.relnamespace 
WHERE pg_catalog.pg_class

 ## getting all tables

In [None]:
with Session(engine) as session:
    for table in SQLModel.metadata.tables:
        print(table)

poc.states


 ## inserting records

In [None]:
state_1 = State(
    canonical_schema={
        "trace_id": "fabpqz0l-7g2h-11ee-be56-0242ac120002",
        "doc_id": "afsds-dsafs-fsdf-fs",
        "workflow_id": "afsds-dsafs-fsdf-fs_wf_sdsf",
        "tenant_id": "tt",
        "step_id": "S01",
        "extracted_data": {
            "schema_name": "invoice_canonical_schema",
            "document_type": "invoice",
            "identifiers": {
                "bol_number": "",
                "pro_number": "",
                "scac": ""
            },
            "parties": {
                "shipper": {},
                "consignee": {},
                "bill_to": {}
            },
            "line_items": [
                {
                    "description": "",
                    "quantity": 0,
                    "weight": 0.0,
                    "class": "",
                    "nmfc": ""
                }
            ]
        }
    }
)

state_2 = State(
    canonical_schema={
        "trace_id": "fabpqz0l-7g2h-11ee-be56-0242ac120002",
        "doc_id": "afsds-dsafs-fsdf-fssdf",
        "workflow_id": "afsds-dsafs-fsdfdf-fs_wf_sdsf",
        "tenant_id": "tt",
        "step_id": "S03",
        "extracted_data": {
            "schema_name": "",
            "document_type": "bill_of_lading",
            "identifiers": {
                "bol_number": "",
                "pro_number": "",
                "scac": ""
            },
            "parties": {
                "shipper": {},
                "consignee": {},
                "bill_to": {}
            },
            "shipment": {
                "origin": {},
                "destination": {},
                "pickup_date": None,
                "delivery_date": None,
                "freight_terms": ""
            },
            "line_items": [
                {
                    "description": "",
                    "quantity": 0,
                    "weight": 0.0,
                    "class": "",
                    "nmfc": ""
                }
            ]
        }
    }
)
with Session(engine) as session:
    session.add(state_1)
    session.add(state_2)
    session.commit()

2025-11-06 09:46:31,261 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:31,263 INFO sqlalchemy.engine.Engine INSERT INTO poc.states (state_id, canonical_schema) VALUES (%(state_id__0)s::UUID, %(canonical_schema__0)s::JSONB), (%(state_id__1)s::UUID, %(canonical_schema__1)s::JSONB) RETURNING poc.states.created_at, poc.states.updated_at, poc.states.state_id
2025-11-06 09:46:31,264 INFO sqlalchemy.engine.Engine [generated in 0.00009s (insertmanyvalues) 1/1 (ordered)] {'canonical_schema__0': Jsonb({'trace_id': 'fabpqz0l-7g2h-11ee-be ... (476 chars)), 'state_id__0': UUID('72c5c9a9-ec78-4799-a6fa-1433357cce0b'), 'canonical_schema__1': Jsonb({'trace_id': 'fabpqz0l-7g2h-11ee-be ... (576 chars)), 'state_id__1': UUID('31a1dcb9-b6bf-4a62-9997-c134fe8c9d11')}
2025-11-06 09:46:31,263 INFO sqlalchemy.engine.Engine INSERT INTO poc.states (state_id, canonical_schema) VALUES (%(state_id__0)s::UUID, %(canonical_schema__0)s::JSONB), (%(state_id__1)s::UUID, %(canonical_schema__1)s::JSONB) R

 ## querying records

In [None]:
with Session(engine) as session:
    states = session.exec(select(State)).all()
    for state in states:
        print(state)

2025-11-06 09:46:31,565 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-11-06 09:46:31,567 INFO sqlalchemy.engine.Engine SELECT poc.states.state_id, poc.states.canonical_schema, poc.states.created_at, poc.states.updated_at 
FROM poc.states
2025-11-06 09:46:31,568 INFO sqlalchemy.engine.Engine [generated in 0.00100s] {}
2025-11-06 09:46:31,567 INFO sqlalchemy.engine.Engine SELECT poc.states.state_id, poc.states.canonical_schema, poc.states.created_at, poc.states.updated_at 
FROM poc.states
2025-11-06 09:46:31,568 INFO sqlalchemy.engine.Engine [generated in 0.00100s] {}
updated_at=datetime.datetime(2025, 11, 6, 14, 46, 31, 267801, tzinfo=datetime.timezone.utc) state_id=UUID('72c5c9a9-ec78-4799-a6fa-1433357cce0b') canonical_schema={'doc_id': 'afsds-dsafs-fsdf-fs', 'step_id': 'S01', 'trace_id': 'fabpqz0l-7g2h-11ee-be56-0242ac120002', 'tenant_id': 'tt', 'workflow_id': 'afsds-dsafs-fsdf-fs_wf_sdsf', 'extracted_data': {'parties': {'bill_to': {}, 'shipper': {}, 'consignee': {}}, 'line_item

In [None]:
state.canonical_schema

{'doc_id': 'afsds-dsafs-fsdf-fssdf',
 'step_id': 'S03',
 'trace_id': 'fabpqz0l-7g2h-11ee-be56-0242ac120002',
 'tenant_id': 'tt',
 'workflow_id': 'afsds-dsafs-fsdfdf-fs_wf_sdsf',
 'extracted_data': {'parties': {'bill_to': {}, 'shipper': {}, 'consignee': {}},
  'shipment': {'origin': {},
   'destination': {},
   'pickup_date': None,
   'delivery_date': None,
   'freight_terms': ''},
  'line_items': [{'nmfc': '',
    'class': '',
    'weight': 0.0,
    'quantity': 0,
    'description': ''}],
  'identifiers': {'scac': '', 'bol_number': '', 'pro_number': ''},
  'schema_name': '',
  'document_type': 'bill_of_lading'}}