In [30]:
from sqlalchemy import (
    Column, Integer, String, Text, ForeignKey, DateTime, Boolean, Enum, Index, Table, UniqueConstraint, JSON
)
from sqlalchemy.orm import relationship, declarative_base, Session
from sqlalchemy import create_engine

from sqlalchemy.orm import sessionmaker
from sqlalchemy.sql import func
from datetime import datetime
from sqlalchemy.dialects.postgresql import JSONB
from dotenv import load_dotenv
import os
import boto3
import enum

# Step 0. Imports & Base

In [None]:
# from sqlalchemy import create_engine, text

# # load the .env file
# load_dotenv()

# # get database url
# DATABASE_URL = os.getenv("DATABASE_URL")

# engine = create_engine(DATABASE_URL, echo=True)

# with engine.connect() as conn:
#     result = conn.execute(text("SELECT version();"))
#     print("‚úÖ Connected to:", result.scalar())

2025-09-21 14:00:55,055 INFO sqlalchemy.engine.Engine select pg_catalog.version()
2025-09-21 14:00:55,055 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-09-21 14:00:55,056 INFO sqlalchemy.engine.Engine select current_schema()
2025-09-21 14:00:55,056 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-09-21 14:00:55,057 INFO sqlalchemy.engine.Engine show standard_conforming_strings
2025-09-21 14:00:55,057 INFO sqlalchemy.engine.Engine [raw sql] {}
2025-09-21 14:00:55,058 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-09-21 14:00:55,058 INFO sqlalchemy.engine.Engine SELECT version();
2025-09-21 14:00:55,058 INFO sqlalchemy.engine.Engine [generated in 0.00052s] {}
‚úÖ Connected to: PostgreSQL 14.18 (Homebrew) on aarch64-apple-darwin23.6.0, compiled by Apple clang version 16.0.0 (clang-1600.0.26.6), 64-bit
2025-09-21 14:00:55,059 INFO sqlalchemy.engine.Engine ROLLBACK


### CAUTION: SQLAlchemy‚Äôs in-memory registry.
When you re-run your model definitions (class Project(Base) etc.), SQLAlchemy thinks you‚Äôre trying to define the same table again in the same Python session.

Fixes:

* Restart the kernel (clean slate).

* Or run Base.metadata.clear() before re-defining models.

* Or (not recommended for production, but useful in notebooks) add:    

`__table_args__ = {"extend_existing": True}`    

inside each model.    

**Potential problems of 3rd option-- Silent overwrites**    

If you redefine a model with different column definitions, SQLAlchemy will happily overwrite the in-memory Python mapping.    

But the database table itself is unchanged ‚Äî unless you drop/recreate or run a migration.    

This can cause a mismatch: your Python code thinks a column exists (or has a new type), but the real Postgres table does not.    

### My own suggestion:    
#### During prototyping stage:         
* use `__table_args__ = {"extend_existing": True}` for each model;
    
#### When your schema stabilizes and you‚Äôre preparing for AWS deployment:    
1. Move your models into models.py (or a models/ package).

        Delete __table_args__ = {"extend_existing": True} from each model.
        Define Base = declarative_base() once at the top.

2. Add Alembic to manage schema evolution:

    `pip install alembic`    

    `alembic init migrations`    

* Configure alembic.ini with your DATABASE_URL.   

* In env.py, set target_metadata = Base.metadata.

3. Whenever you change a model:

    `alembic revision --autogenerate -m "describe change"`
   
    `alembic upgrade head`

This will safely apply only the changes needed, without dropping your tables.

In [3]:
# create base class
Base = declarative_base() 

#---
# It creates a registry (Base.metadata) that will hold all the tables you define.
# Every time you define a model (class Project(Base): ...), that model‚Äôs table gets registered into Base.metadata.tables.
#---

In [None]:
# Base.metadata.clear()

# Step 1. Enums

In [None]:
# --------------------------
# Enums
# --------------------------

## 1.1 ProjectStatus
class ProjectStatus(enum.Enum):
    draft = "draft"                  # project created, requirements being defined
    ready_for_annotation = "ready_for_annotation"  # files uploaded, jobs not started
    in_progress = "in_progress"      # annotation jobs are running
    completed = "completed"          # all jobs done
    archived = "archived"            # project closed, read-only

## 1.2 FileStatus (file lifecycle)
class FileStatus(enum.Enum):
    pending = "pending"
    ready_for_annotation = "ready_for_annotation"
    in_progress = "in_progress"
    completed = "completed"
    archived = "archived"

## 1.3 FileType
class FileType(enum.Enum):
    dataset = "dataset"
    requirement = "requirement"
    report = "annotation_results"
    llm_output = "llm_output"
# Does our PM also needs to upload sliced file results? (NO currently)

## 1.4 UserRole
class UserRole(enum.Enum):
    org_admin = "org_admin"       # customer admin
    org_pm = "org_pm"             # customer project manager
    our_pm = "our_pm"             # our company PM that manages annotation jobs & assigns annotators
    annotator = "annotator"       # our company annotator
    qc = "qc"                     # our company QC for annotation results review

## 1.5 AnnotationJobStatus (job lifecycle)
class AnnotationJobStatus(enum.Enum):
    not_started = "not_started"
    in_progress = "in_progress"
    submitted = "submitted"
    reviewed = "reviewed"

## 1.6 ReviewStatus
class ReviewStatus(enum.Enum):
    pending = "pending"
    approved = "approved"
    rejected = "rejected"

## 1.7 EntityType
class EntityType(enum.Enum):
    project = "project"
    file = "file"
    file_version = "file_version"
    annotation_job = "annotation_job"

## 1.8 EventType
class EventType(enum.Enum):
    uploaded = "uploaded"
    reuploaded = "reuploaded"
    annotation_started = "annotation_started"
    annotation_completed = "annotation_completed"
    reviewed = "reviewed"
    deleted = "deleted"
    status_changed = "status_changed"

## 1.9 AssignmentRole
class AssignmentRole(enum.Enum):
    annotator = "annotator"
    reviewer = "reviewer"
    qc = "qc"   # quality control / audit

## 1.10 Language (for AnnotationJob)
class Language(enum.Enum):
    en = "en"   # English
    zh = "zh"   # Chinese
    fr = "fr"   # French
    de = "de"   # German
    es = "es"   # Spanish
    ar = "ar"   # Arabic

## 1.11 Priority (for AnnotationJob)
class JobPriority(enum.Enum):
    low = "low"
    medium = "medium"
    high = "high"

# Step 2. Association Tables 

In [None]:
# --------------------------
# Association Tables
# --------------------------

# 2.1 User <-> Role
user_roles = Table(
    "user_roles",
    Base.metadata,
    Column("user_id", Integer, ForeignKey("user.user_id", ondelete="CASCADE"), primary_key=True),
    Column("role_id", Integer, ForeignKey("role.role_id", ondelete="CASCADE"), primary_key=True)
)

# 2.2 Role <-> Permission
role_permissions = Table(
    "role_permissions",
    Base.metadata,
    Column("role_id", Integer, ForeignKey("role.role_id", ondelete="CASCADE"), primary_key=True),
    Column("permission_id", Integer, ForeignKey("permission.permission_id", ondelete="CASCADE"), primary_key=True)
)


# One ExportLog may include multiple versions.
# One FileVersion may appear in multiple exports
class ExportedFile(Base):
    __tablename__ = "exported_file"
    __table_args__ = {"extend_existing": True}

    export_id = Column(Integer, ForeignKey("export_log.export_id", ondelete="CASCADE"), primary_key=True)
    file_version_id = Column(Integer, ForeignKey("file_version.version_id", ondelete="CASCADE"), primary_key=True)
    included_at = Column(DateTime, default=func.now())

    # Relationships
    export = relationship("ExportLog", back_populates="exported_files")
    file_version = relationship("FileVersion", back_populates="exported_files")

# This table records which annotators have worked on this job before 
# (for feedback loops / reassignment tracking)
job_previous_annotators = Table(
    "job_previous_annotators",
    Base.metadata,
    Column("job_id", Integer, ForeignKey("annotation_job.job_id", ondelete="CASCADE"), primary_key=True),
    Column("user_id", Integer, ForeignKey("user.user_id", ondelete="CASCADE"), primary_key=True),
    Column("assigned_at", DateTime, default=func.now())
)



InvalidRequestError: Table 'user_roles' is already defined for this MetaData instance.  Specify 'extend_existing=True' to redefine options and columns on an existing Table object.

# Step 3: CORE TABLES

## 3.1 Project Table

In [34]:
# -----------------------------
# Core Tables
# -----------------------------
    
# Project Table
class Project(Base):
    __tablename__ = "project"
    __table_args__ = (
    UniqueConstraint("org_id", "name", name="uq_org_project_name"), # no two projects can share same name in one comp
    Index("ix_project_status", "status"), # speeds up dashboards like ‚Äúshow me all in-progress projects‚Äù.
    Index("ix_project_is_active", "is_active"), # speeds up ‚Äúonly show active projects‚Äù.
    Index("ix_project_client_pm_id", "client_pm_id"), # useful if query ‚Äúall projects started by this PM‚Äù.
    Index("ix_project_org_id", "org_id"), # useful if query ‚Äúall projects for this org‚Äù.
    {"extend_existing": True} # delete
    )

    project_id = Column(Integer, primary_key=True, autoincrement=True)
    org_id = Column(Integer, ForeignKey("organization.org_id"), nullable=False)
    name = Column(String, nullable=False)
    description = Column(Text, nullable=True) # longer desp than name

    # plain text instructions
    requirements_text = Column(Text, nullable=True)
    # optional uploaded doc (PDF, Word, PPT, etc.)
    # requirements_file_id = Column(Integer, ForeignKey("file.file_id"), nullable=True)

    # project status enum
    status = Column(Enum(ProjectStatus, name="project_status_enum"), default=ProjectStatus.draft)
    
    is_active = Column(Boolean, default=True, nullable=False)

    date_created = Column(DateTime, default=func.now(), nullable=False)
    date_updated = Column(DateTime, default=func.now(), onupdate=func.now(), nullable=False)
    completed_at = Column(DateTime, nullable=True)
    deleted_at = Column(DateTime, nullable=True) # when customer delete the project

    # --- PM links ---
    client_pm_id = Column(Integer, ForeignKey("user.user_id"), nullable=False) # client PM
    our_pm_id = Column(Integer, ForeignKey("user.user_id"), nullable=True) # our PM

    # --- Relationships ---
    files = relationship("File", back_populates="project")             # all files
    # convenience: only requirement files
    requirement_files = relationship(
        "File",
        primaryjoin="and_(Project.project_id==File.project_id, File.file_type=='requirement')",
        viewonly=True
    ) # only get files that accords with reqs
    jobs = relationship("AnnotationJob", back_populates="project")     # all jobs
    events = relationship("EventLog", back_populates="project")        # all events
    organization = relationship("Organization", back_populates="projects")
    client_pm = relationship("User", foreign_keys=[client_pm_id], back_populates="client_projects")
    our_pm = relationship("User", foreign_keys=[our_pm_id], back_populates="managed_projects")
    exports = relationship("ExportLog", back_populates="project")


  class Project(Base):


## 3.2 File Table 
(currently only create one for all kinds of files' storage)

check the logic:
1. files that clients uploaded, our PM needs to be able to view it so that they can further assign tasks;     
2. files that clients uploaded, needs to be able to transport to LLM model(maybe it's sth in backend?)     
3. annotator also needs relevant access, because for each task they needs the correspond raw file for annotation   
4. quality checker also need them to check the work done by the annotator.     
5. clients needs to be able to view the files they had uploaded and see the progress     
6. organizations also needs to be able to view all the projects as well as all the files their company uploaded and created.    
7. for the llm generated file, the annotator need to have access to it for corresponding task;     
8. for annotated file uploaded by annotators, they needs to be able to send to quality checker;     
9. the files that's approved by quality check needs to be able to back to customers     
10. the organization needs to have access to all the final annotated files     
11. the client PM needs to have access to the final annotated files that they uploaded     
12. during uploading, each project allow customer to upload multiple files with different versions, only by clicking sth like "confirmation" will one project be created, and when project created, all files will be in pending status 
13. raw files can be deleted by customers before the task is processing (can be deleted during pending status)     
14. one project only related to one client, but one client can relate to many projects     
15. one file only related to one projects, but only projects can have multiple files,     
16. when deleting a project, all the files under this project will be inactive,    
17. both project manager in client company and the corresponding organization can have the access to delete the project(but client can only delete the project they created)    
18. further link with file size and format

In [None]:
# --------------------------
# File Table
# --------------------------
class File(Base):
    __tablename__ = "file"
    __table_args__ = (
    UniqueConstraint("project_id", "name", name="uq_project_file_name"),
    Index("ix_file_project_id", "project_id"), # speeds up ‚Äúall files in project.‚Äù
    Index("ix_file_status", "status"), # speeds up ‚Äúall files ready for annotation.‚Äù
    Index("ix_file_type", "file_type"), # speeds up filtering datasets vs. requirements.
    {"extend_existing": True},
)


    file_id = Column(Integer, primary_key=True, autoincrement=True)
    # descriptive file name (user-facing)
    name = Column(String, nullable=False)
    description = Column(Text, nullable=True)
    uploaded_by = Column(Integer, ForeignKey("user.user_id"), nullable=False)


    # workflow state
    status = Column(
        Enum(FileStatus, name="file_status_enum"),
        default=FileStatus.pending,
        nullable=False
    )
    # what kind of file this is (dataset, requirement, annotation_results, llm_nl)
    file_type = Column(Enum(FileType, name="file_type_enum"), nullable=False, default=FileType.dataset)

    # audit timestamps
    date_created = Column(DateTime, default=func.now(), nullable=False)
    date_updated = Column(DateTime, default=func.now(), onupdate=func.now(), nullable=False)
    is_active = Column(Boolean, default=True, nullable=False)
    deleted_at = Column(DateTime, nullable=True)

    # validate uploads by storage
    size_bytes = Column(Integer, nullable=True)
    mime_type = Column(String, nullable=True) # technical format

    
    # --- PM links ---
    project_id = Column(Integer, ForeignKey("project.project_id"), nullable=False)
    # active version pointer
    active_version_id = Column(Integer, ForeignKey("file_version.version_id"), nullable=True)


    # --- Relationships ---
    uploader = relationship("User", back_populates="uploaded_files")
    project = relationship("Project", back_populates="files")
    versions = relationship("FileVersion", back_populates="file", cascade="all, delete-orphan")
    annotation_jobs = relationship("AnnotationJob", back_populates="file") 
    events = relationship("EventLog", back_populates="file")
    active_version = relationship("FileVersion", foreign_keys=[active_version_id], uselist=False)

  class File(Base):


## 3.3 File Version Table

In [None]:
# --------------------------
# File Version Table
# --------------------------
class FileVersion(Base):
    __tablename__ = "file_version"
    __table_args__ = (
        Index("ix_fileversion_file_id", "file_id"),
        {"extend_existing": True},
    )

    version_id = Column(Integer, primary_key=True, autoincrement=True)

    # --- Parent link ---
    file_id = Column(Integer, ForeignKey("file.file_id", ondelete="CASCADE"), nullable=False)
    version_number = Column(Integer, nullable=False)  # 1, 2, 3‚Ä¶

    # --- Storage info ---
    storage_path = Column(String, nullable=False)   # MinIO/S3 key or path
    checksum = Column(String, nullable=True)        # for integrity validation
    size_bytes = Column(Integer, nullable=True)     # optional: store size at version-level
    mime_type = Column(String, nullable=True)       # optional: file format at version-level

    # --- Upload & provenance ---
    uploaded_by = Column(Integer, ForeignKey("user.user_id"), nullable=True)
    uploaded_at = Column(DateTime(timezone=True), server_default=func.now(), nullable=False)
    # --- Lifecycle flags ---
    is_active = Column(Boolean, default=True, nullable=False)

    source_file_version_id = Column(Integer, ForeignKey("file_version.version_id"), nullable=True)
    generation_method = Column(
        Enum("upload", "ocr", "llm", name="generation_method_enum"),
        default="upload",
        nullable=False
    )
    llm_model = Column(String, nullable=True)       # e.g., "gpt-4", "llama-3"
    llm_params = Column(JSON, nullable=True)        # parameters if generated by LLM


    # --- Relationships ---
    file = relationship("File", back_populates="versions")
    source_version = relationship("FileVersion", remote_side=[version_id])  # self-ref
    events = relationship("EventLog", back_populates="file_version")        # version-level logs
    exports = relationship(
    "ExportLog",
    secondary="exported_file",
    back_populates="file_versions"
    )


    # export_id = Column(Integer, ForeignKey("export_log.export_id"), nullable=True)
    exported_files = relationship(
        "ExportedFile",
        back_populates="file_version",
        cascade="all, delete-orphan"
    )


  class FileVersion(Base):


## 3.4 User Table

"user" is a generic account table that represents any actor in the system:

* Organization Admin (client company, oversees all projects).

* Organization PM (client company, uploads datasets + requirements).

* Our PM (your company, manages annotation jobs & assigns annotators).

* Annotators (our company, upload results).

* QC / Reviewers (our company, upload corrections).

therefore, "user" here basically means everyone who logs in and interacts with the system.

product manager(our company):

1. able to check the availability of the annotators and quality checkers
2. able to check for each file, who and how many annotators have annotated that
3. able to check the files uploaded by the client company‚Äôs PM or uploaded by the organization
4. have access to projects and further assign annotators to each project to conduct annotation job 
5. have access to reassign annotators for same project
6. have access to assign quality checker (one people can both be quality checker and annotator the same time, but for each project, if one person can either be quality checker or annotator)

client company‚Äôs PM:

1. able to upload files and create project
2. able to view the files/projects they uploaded/created before
3. not able to see the files/projects uploaded by others
4. able to view the status of the projects/files
5. able to resend files if the original project is pending(not in process)

organization:

1. able to upload files and create project
2. able to view all the files/projects their company‚Äôs PM uploaded/created before

LLM:

1. get the files uploaded by client company‚Äôs PM
2. generate natural language file according to the files

our annotators:

1. able to view the raw files and projects assigned to them that are uploaded by organization/ client company‚Äôs PM
2. able to upload their finished annotated files
3. able to get the natural language files generated by LLM

our quality checker:

1. able to view the raw files 
2. able to view the finished annotated files 
3. able to approve/ need modification of the annotated files and able to send back to annotators
4. able to write down feedbacks

In [None]:
# --------------------------
# File User Table
# --------------------------
class User(Base):
    __tablename__ = "user"
    __table_args__ = {"extend_existing": True}

    # --- Core fields ---
    user_id = Column(Integer, primary_key=True, autoincrement=True)
    email = Column(String, unique=True, nullable=False, index=True)   # üîπ added index for fast lookups
    role = Column(Enum(UserRole, name="user_role_enum"), nullable=False)
    
    org_id = Column(
        Integer,
        ForeignKey("organization.org_id", ondelete="SET NULL"),  # better: keep user even if org deleted
        nullable=True
    )

    availability = Column(JSON, nullable=True)

    # --- Relationships ---
    uploaded_files = relationship("File", back_populates="uploader")

    events = relationship("EventLog", back_populates="user")

    assignments = relationship(
        "Assignment",
        back_populates="user",
        cascade="all, delete-orphan"
    )

    roles = relationship(
        "Role",
        secondary=user_roles,
        back_populates="users"
    )

    # project links
    # Client-side PM <-> Project
    projects = relationship("Project", back_populates="client_pm")
    # make sure our PM has access to projects and further assign annotators
    managed_projects = relationship("Project", back_populates="our_pm")

    # Records which annotators have worked on this job before
    # (for feedback loops / reassignment tracking)
    previous_jobs = relationship(
        "AnnotationJob",
        secondary="job_previous_annotators",
        back_populates="previous_annotators"
    )

  class User(Base):


In [None]:
class User(Base):
    __tablename__ = "user"
    __table_args__ = {"extend_existing": True}

    # --- Core fields ---
    user_id = Column(Integer, primary_key=True, autoincrement=True)
    email = Column(String, unique=True, nullable=False, index=True)
    role = Column(Enum(UserRole, name="user_role_enum"), nullable=False)

    org_id = Column(
        Integer,
        ForeignKey("organization.org_id", ondelete="SET NULL"),
        nullable=True
    ) # better: keep user even if org deleted

    # --- Availability & Skills ---
    availability = Column(JSON, nullable=True)             # weekly availability
    language_expertise = Column(JSON, nullable=True)       # {"en": 4.5, "zh": 3.0}
    skill_score = Column(Float, nullable=True)             # overall skill score
    skill_level = Column(String, nullable=True)           
    qa_approval_rate = Column(Float, nullable=True)        # average QA pass rate
    completed_task_count = Column(Integer, default=0)      # total tasks completed

    # --- Relationships ---
    uploaded_files = relationship("File", back_populates="uploader")
    events = relationship("EventLog", back_populates="user")
    assignments = relationship("Assignment", back_populates="user", cascade="all, delete-orphan")

    roles = relationship("Role", secondary=user_roles, back_populates="users")

    # PM links
    client_projects = relationship("Project", back_populates="client_pm")
    # make sure our PM has access to projects and further assign annotators
    managed_projects = relationship("Project", back_populates="our_pm")

    # Historical job links
    # Records which annotators have worked on this job before
    # (for feedback loops / reassignment tracking)
    previous_jobs = relationship(
        "AnnotationJob",
        secondary="job_previous_annotators",
        back_populates="previous_annotators"
    )

    # --- Availability ---
    # Store weekly availability / working hours in JSON
    # Example:
    # {
    #   "monday": ["09:00-12:00", "13:00-17:00"],
    #   "tuesday": ["10:00-18:00"],
    #   "wednesday": []
    # }

## 3.5 Annotation Job Table

In [38]:
# --------------------------
# File Annotation Job Table
# --------------------------
class AnnotationJob(Base):
    __tablename__ = "annotation_job"
    __table_args__ = {"extend_existing": True}

    job_id = Column(Integer, primary_key=True, autoincrement=True)

    file_id = Column(Integer, ForeignKey("file.file_id", ondelete="CASCADE"), nullable=False)
    project_id = Column(Integer, ForeignKey("project.project_id", ondelete="CASCADE"), nullable=False)

    # New attributes
    language = Column(Enum(Language, name="annotation_job_language_enum"), nullable=True)
    priority = Column(Enum(JobPriority, name="job_priority_enum"), default=JobPriority.medium, nullable=False)

    status = Column(Enum(AnnotationJobStatus, name="annotation_job_status_enum"),
                    default=AnnotationJobStatus.not_started, nullable=False)

    review_status = Column(Enum(ReviewStatus, name="review_status_enum"),
                           default=ReviewStatus.pending, nullable=False)

    is_active = Column(Boolean, default=True, nullable=False)
    deleted_at = Column(DateTime, nullable=True)

    due_date = Column(DateTime, nullable=True)
    completed_at = Column(DateTime, nullable=True)

    created_at = Column(DateTime, default=func.now(), nullable=False)
    updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), nullable=False)

    # Relationships
    file = relationship("File", back_populates="annotation_jobs")
    project = relationship("Project", back_populates="annotation_jobs")

    reviews = relationship("Review", back_populates="job", cascade="all, delete-orphan")
    assignments = relationship("Assignment", back_populates="job", cascade="all, delete-orphan")

    # Historical annotators (M2M self join via Assignment/User)
    previous_annotators = relationship(
        "User",
        secondary="job_previous_annotators",
        back_populates="previous_jobs"
    )

  class AnnotationJob(Base):


## 3.6 Event Log Table

In [None]:
# Event Log Table
class EventLog(Base):
    __tablename__ = "event_log"
    __table_args__ = {"extend_existing": True}

    event_id = Column(Integer, primary_key=True, autoincrement=True)

    entity_type = Column(Enum(EntityType, name="entity_type_enum"), nullable=False)
    entity_id = Column(Integer, nullable=False)      # e.g. file_id

    event_type = Column(Enum(EventType, name="event_type_enum"), nullable=False)

    user_id = Column(Integer, ForeignKey("user.user_id"), nullable=True)
    event_time = Column(DateTime, default=func.now())

    user = relationship("User", back_populates="events")

    event_metadata = Column(JSONB, nullable=True)  # use JSONB for flexible key/value storage

    file_id = Column(Integer, ForeignKey("file.file_id"), nullable=True)
file = relationship("File", back_populates="events")

file_version_id = Column(Integer, ForeignKey("file_version.version_id"), nullable=True)
file_version = relationship("FileVersion", back_populates="events")


## 3.7 Review Table

In [12]:
# --------------------------
# Review Table
# --------------------------
class Review(Base):
    __tablename__ = "review"
    __table_args__ = {"extend_existing": True}

    review_id = Column(Integer, primary_key=True, autoincrement=True)

    # Link to the job being reviewed
    job_id = Column(Integer, ForeignKey("annotation_job.job_id"), nullable=False)

    # Reviewer (user with reviewer role)
    reviewer_id = Column(Integer, ForeignKey("user.user_id"), nullable=False)

    # Review decision
    status = Column(
        Enum(ReviewStatus, name="review_status_enum"),
        default=ReviewStatus.pending,
        nullable=False
    )

    # Optional comments from reviewer
    feedback = Column(Text, nullable=True)

    # Audit timestamps
    created_at = Column(DateTime, default=func.now(), nullable=False)
    updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), nullable=False)

    # --------------------------
    # Relationships
    # --------------------------
    job = relationship("AnnotationJob", back_populates="reviews")
    reviewer = relationship("User")

## 3.8 Assignment Table

In [13]:
# --------------------------
# Assignment Table
# --------------------------
class Assignment(Base):
    __tablename__ = "assignment"
    __table_args__ = {"extend_existing": True}

    assignment_id = Column(Integer, primary_key=True, autoincrement=True)

    # Link to the annotation job
    job_id = Column(Integer, ForeignKey("annotation_job.job_id", ondelete="CASCADE"), nullable=False)

    # Who is assigned
    user_id = Column(Integer, ForeignKey("user.user_id", ondelete="CASCADE"), nullable=False)

    # Role in this job (annotator, reviewer, qc)
    role = Column(Enum(AssignmentRole, name="assignment_role_enum"), nullable=False)

    # Status of this assignment (separate from job status)
    status = Column(String, default="assigned")  
    # e.g. assigned, accepted, in_progress, completed

    # Audit fields
    assigned_at = Column(DateTime, default=func.now(), nullable=False)
    updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), nullable=False)

    # Relationships
    job = relationship("AnnotationJob", back_populates="assignments")
    user = relationship("User", back_populates="assignments")
    

## 3.9 Role&Permission Table

In [14]:
# --------------------------
# Role Table
# --------------------------
class Role(Base):
    __tablename__ = "role"
    __table_args__ = {"extend_existing": True}

    role_id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(String, unique=True, nullable=False)   # e.g. "organization_admin", "pm", "annotator", "reviewer"

    # Relationships
    users = relationship("User", secondary=user_roles, back_populates="roles")
    permissions = relationship("Permission", secondary=role_permissions, back_populates="roles")

# --------------------------
# Permission Table
# --------------------------
class Permission(Base):
    __tablename__ = "permission"
    __table_args__ = {"extend_existing": True}

    permission_id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(String, unique=True, nullable=False)   # e.g. "upload_file", "assign_job", "review_annotation"

    # Relationships
    roles = relationship("Role", secondary=role_permissions, back_populates="permissions")

## 3.10 Organization Table

In [18]:
# ------------------------------
# Organization Table
# ------------------------------
class Organization(Base):
    __tablename__ = "organization"
    __table_args__ = {"extend_existing": True}

    org_id = Column(Integer, primary_key=True, autoincrement=True)
    name = Column(String, unique=True, nullable=False)   # e.g. "Acme Corp"
    description = Column(Text, nullable=True)            # optional, for notes
    date_created = Column(DateTime, default=func.now(), nullable=False)
    date_updated = Column(DateTime, default=func.now(), onupdate=func.now(), nullable=False)

    # Relationships
    users = relationship("User", back_populates="organization")
    projects = relationship("Project", back_populates="organization")

## 3.11 Export / Report tables
stores generated ZIPs or audit PDFs, those should also have an S3 pointer (storage_path).

cascade behavior:    
If an export log is deleted, should the join rows in exported_file also be deleted?    
Usually yes ‚Üí add cascade="all, delete-orphan" on ExportedFile if you model it as a class.    
If you keep exported_file as a raw Table, SQLAlchemy will handle cleanup when ExportLog is deleted.    


In [None]:
class ExportLog(Base):
    __tablename__ = "export_log"

    export_id = Column(Integer, primary_key=True, autoincrement=True)
    project_id = Column(Integer, ForeignKey("project.project_id"), nullable=False)
    requested_by = Column(Integer, ForeignKey("user.user_id"), nullable=False)

    # Where the final package (ZIP, PDF, TAR, etc.) lives in S3/MinIO
    storage_path = Column(String, nullable=False)

    # Optional metadata
    checksum = Column(String, nullable=True)
    #included_file_ids = Column(JSON, nullable=True)  # list of files packaged
    #included_versions = Column(JSON, nullable=True)  # if version-level tracking matters

    status = Column(Enum("pending", "completed", "failed", name="export_status_enum"), default="pending")

    date_requested = Column(DateTime, default=func.now(), nullable=False)
    date_completed = Column(DateTime, nullable=True)

    # Relationships
    project = relationship("Project", back_populates="exports")
    requested_user = relationship("User", foreign_keys=[requested_by])
    file_versions = relationship(
    "FileVersion",
    secondary="exported_file",    # uses the join table
    back_populates="exports"
    )
    exported_files = relationship(
        "ExportedFile",
        back_populates="export",
        cascade="all, delete-orphan"
    )
