In [5]:
from sqlalchemy import (
    create_engine,
    Column,
    Integer,
    String,
    Float,
    ForeignKey,
    TIMESTAMP,
    text,
    func,
    DateTime,
    TEXT,
    Text
)
from sqlalchemy.orm import declarative_base, relationship, sessionmaker
from sqlalchemy.orm import relationship, declarative_base
from sqlalchemy.schema import CreateSchema
from sqlalchemy import inspect
from sqlalchemy.dialects.postgresql import ARRAY

In [6]:
# Update with your database credentials
DATABASE_URL = "postgresql+psycopg2://postgres:postgrespassword@localhost:5432/ai_db"
engine = create_engine(DATABASE_URL, echo=True)
Base = declarative_base()

In [7]:
# Schema for multi-modal embeddings
schema_name = "multi_modal_rag"

# Source Dataset Table
class Source(Base):
    __tablename__ = "sources"
    __table_args__ = {"schema": schema_name}

    id = Column(Integer, primary_key=True, autoincrement=True)
    source_name = Column(String, nullable=False)
    source_type = Column(String, nullable=False)  # e.g., "text", "image", "audio", "video"
    file_path = Column(String, nullable=True)
    description = Column(Text, nullable=True)
    created_at = Column(TIMESTAMP, server_default=func.now(), nullable=False)

    embeddings = relationship(
        "Embedding",
        back_populates="source",
        cascade="all, delete-orphan",
        passive_deletes=True
    )

# Embedding Table
class Embedding(Base):
    __tablename__ = "embeddings"
    __table_args__ = {"schema": schema_name}

    id = Column(Integer, primary_key=True, autoincrement=True)
    source_id = Column(
        Integer,
        ForeignKey(f"{schema_name}.sources.id", ondelete="CASCADE"),
        nullable=False
    )
    modality = Column(String, nullable=False)  # "text", "image", "audio"
    embedding_vector = Column(ARRAY(Float), nullable=False)  # vector embeddings
    embedding_metadata = Column(Text, nullable=True)  # renamed from 'metadata'
    created_at = Column(TIMESTAMP, server_default=func.now(), nullable=False)

    source = relationship("Source", back_populates="embeddings")

# Optional: Text-specific metadata
class TextMetadata(Base):
    __tablename__ = "text_metadata"
    __table_args__ = {"schema": schema_name}

    id = Column(Integer, primary_key=True, autoincrement=True)
    source_id = Column(
        Integer,
        ForeignKey(f"{schema_name}.sources.id", ondelete="CASCADE"),
        nullable=False
    )
    language = Column(String, nullable=True)
    word_count = Column(Integer, nullable=True)
    summary = Column(Text, nullable=True)
    created_at = Column(TIMESTAMP, server_default=func.now(), nullable=False)

# Optional: Image-specific metadata
class ImageMetadata(Base):
    __tablename__ = "image_metadata"
    __table_args__ = {"schema": schema_name}

    id = Column(Integer, primary_key=True, autoincrement=True)
    source_id = Column(
        Integer,
        ForeignKey(f"{schema_name}.sources.id", ondelete="CASCADE"),
        nullable=False
    )
    width = Column(Integer, nullable=True)
    height = Column(Integer, nullable=True)
    format = Column(String, nullable=True)
    created_at = Column(TIMESTAMP, server_default=func.now(), nullable=False)

# Optional: Audio-specific metadata
class AudioMetadata(Base):
    __tablename__ = "audio_metadata"
    __table_args__ = {"schema": schema_name}

    id = Column(Integer, primary_key=True, autoincrement=True)
    source_id = Column(
        Integer,
        ForeignKey(f"{schema_name}.sources.id", ondelete="CASCADE"),
        nullable=False
    )
    duration_seconds = Column(Float, nullable=True)
    sample_rate = Column(Integer, nullable=True)
    channels = Column(Integer, nullable=True)
    format = Column(String, nullable=True)
    created_at = Column(TIMESTAMP, server_default=func.now(), nullable=False)

# Create schema and tables
with engine.connect() as conn:
    conn.execute(text(f"CREATE SCHEMA IF NOT EXISTS {schema_name}"))
    conn.commit()

Base.metadata.create_all(engine)

2026-02-18 15:44:24,479 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2026-02-18 15:44:24,479 INFO sqlalchemy.engine.Engine [raw sql] {}
2026-02-18 15:44:24,483 INFO sqlalchemy.engine.Engine select current_schema()
2026-02-18 15:44:24,484 INFO sqlalchemy.engine.Engine [raw sql] {}
2026-02-18 15:44:24,487 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2026-02-18 15:44:24,488 INFO sqlalchemy.engine.Engine [raw sql] {}
2026-02-18 15:44:24,490 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2026-02-18 15:44:24,491 INFO sqlalchemy.engine.Engine CREATE SCHEMA IF NOT EXISTS multi_modal_rag
2026-02-18 15:44:24,492 INFO sqlalchemy.engine.Engine [generated in 0.00117s] {}
2026-02-18 15:44:24,495 INFO sqlalchemy.engine.Engine COMMIT
2026-02-18 15:44:24,519 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2026-02-18 15:44:24,522 INFO sqlalchemy.engine.Engine SELECT pg_catalog.pg_class.relname 
FROM pg_catalog.pg_class JOIN pg_catalog.pg_namespace ON pg_catalog.pg_namespace

In [8]:
Session = sessionmaker(bind=engine, expire_on_commit=False)

with Session() as session:
    # Example: adding a text source and embedding
    text_source = Source(source_name="Sample Text", source_type="text", file_path=None)
    session.add(text_source)
    session.commit()

    # Add embedding
    text_embedding = Embedding(
        source_id=text_source.id,
        modality="text",
        embedding_vector=[0.12, 0.34, 0.56],
        embedding_metadata='{"token_count": 10, "language": "en"}'  # updated name
    )
    session.add(text_embedding)
    session.commit()

2026-02-18 15:44:47,613 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2026-02-18 15:44:47,617 INFO sqlalchemy.engine.Engine INSERT INTO multi_modal_rag.sources (source_name, source_type, file_path, description) VALUES (%(source_name)s, %(source_type)s, %(file_path)s, %(description)s) RETURNING multi_modal_rag.sources.id, multi_modal_rag.sources.created_at
2026-02-18 15:44:47,619 INFO sqlalchemy.engine.Engine [generated in 0.00160s] {'source_name': 'Sample Text', 'source_type': 'text', 'file_path': None, 'description': None}
2026-02-18 15:44:47,632 INFO sqlalchemy.engine.Engine COMMIT
2026-02-18 15:44:47,637 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2026-02-18 15:44:47,639 INFO sqlalchemy.engine.Engine INSERT INTO multi_modal_rag.embeddings (source_id, modality, embedding_vector, embedding_metadata) VALUES (%(source_id)s, %(modality)s, %(embedding_vector)s::FLOAT[], %(embedding_metadata)s) RETURNING multi_modal_rag.embeddings.id, multi_modal_rag.embeddings.created_at
2026-02-18 15