From fa75414fb4ca56cfa025b80342935a7eb56ab18b Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Thu, 13 Nov 2025 14:08:07 +0530
Subject: [PATCH 1/8] updated indexes

---
 ...e69806207_evaluation_update_constraints.py | 89 +++++++++++++++++++
 backend/app/models/batch_job.py               | 87 +++++++++++++++---
 backend/app/models/evaluation.py              | 20 +++--
 3 files changed, 179 insertions(+), 17 deletions(-)
 create mode 100644 backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py

diff --git a/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py b/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
new file mode 100644
index 00000000..e90aadf5
--- /dev/null
+++ b/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
@@ -0,0 +1,89 @@
+"""evaluation update constraints
+
+Revision ID: 633e69806207
+Revises: 6fe772038a5a
+Create Date: 2025-11-13 11:36:16.484694
+
+"""
+from alembic import op
+
+# revision identifiers, used by Alembic.
+revision = "633e69806207"
+down_revision = "6fe772038a5a"
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # Remove SET NULL behavior from evaluation_run batch_job foreign keys
+    # This ensures evaluation runs fail if their batch job is deleted (maintain referential integrity)
+    op.drop_constraint(
+        "fk_evaluation_run_embedding_batch_job_id", "evaluation_run", type_="foreignkey"
+    )
+    op.drop_constraint(
+        "evaluation_run_batch_job_id_fkey", "evaluation_run", type_="foreignkey"
+    )
+    op.drop_constraint(
+        "openai_conversation_organization_id_fkey1",
+        "openai_conversation",
+        type_="foreignkey",
+    )
+    op.drop_constraint(
+        "openai_conversation_project_id_fkey1",
+        "openai_conversation",
+        type_="foreignkey",
+    )
+    op.create_foreign_key(
+        "evaluation_run_batch_job_id_fkey",
+        "evaluation_run",
+        "batch_job",
+        ["batch_job_id"],
+        ["id"],
+    )
+    op.create_foreign_key(
+        "fk_evaluation_run_embedding_batch_job_id",
+        "evaluation_run",
+        "batch_job",
+        ["embedding_batch_job_id"],
+        ["id"],
+    )
+
+
+def downgrade():
+    # Restore SET NULL behavior to evaluation_run batch_job foreign keys
+    op.drop_constraint(
+        "fk_evaluation_run_embedding_batch_job_id", "evaluation_run", type_="foreignkey"
+    )
+    op.drop_constraint(
+        "evaluation_run_batch_job_id_fkey", "evaluation_run", type_="foreignkey"
+    )
+    op.create_foreign_key(
+        "evaluation_run_batch_job_id_fkey",
+        "evaluation_run",
+        "batch_job",
+        ["batch_job_id"],
+        ["id"],
+        ondelete="SET NULL",
+    )
+    op.create_foreign_key(
+        "fk_evaluation_run_embedding_batch_job_id",
+        "evaluation_run",
+        "batch_job",
+        ["embedding_batch_job_id"],
+        ["id"],
+        ondelete="SET NULL",
+    )
+    op.create_foreign_key(
+        "openai_conversation_organization_id_fkey1",
+        "openai_conversation",
+        "organization",
+        ["organization_id"],
+        ["id"],
+    )
+    op.create_foreign_key(
+        "openai_conversation_project_id_fkey1",
+        "openai_conversation",
+        "project",
+        ["project_id"],
+        ["id"],
+    )
diff --git a/backend/app/models/batch_job.py b/backend/app/models/batch_job.py
index 3ef07f7f..c0355425 100644
--- a/backend/app/models/batch_job.py
+++ b/backend/app/models/batch_job.py
@@ -1,7 +1,8 @@
 from datetime import datetime
 from typing import TYPE_CHECKING, Any, Optional
 
-from sqlalchemy import Column
+import sqlalchemy as sa
+from sqlalchemy import Column, Index, Integer, String, Text
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlmodel import Field, Relationship, SQLModel
 
@@ -16,55 +17,117 @@ class BatchJob(SQLModel, table=True):
     """Batch job table for tracking async LLM batch operations."""
 
     __tablename__ = "batch_job"
+    __table_args__ = (
+        Index("idx_batch_job_status_org", "provider_status", "organization_id"),
+        Index("idx_batch_job_status_project", "provider_status", "project_id"),
+    )
 
     id: int | None = Field(default=None, primary_key=True)
 
     # Provider and job type
-    provider: str = Field(description="LLM provider name (e.g., 'openai', 'anthropic')")
+    provider: str = Field(
+        sa_column=Column(
+            String,
+            nullable=False,
+            comment="LLM provider name (e.g., 'openai', 'anthropic')",
+        ),
+        description="LLM provider name (e.g., 'openai', 'anthropic')",
+    )
     job_type: str = Field(
-        description="Type of batch job (e.g., 'evaluation', 'classification', 'embedding')"
+        sa_column=Column(
+            String,
+            nullable=False,
+            index=True,
+            comment="Type of batch job (e.g., 'evaluation', 'classification', 'embedding')",
+        ),
+        description="Type of batch job (e.g., 'evaluation', 'classification', 'embedding')",
     )
 
     # Batch configuration - stores all provider-specific config
     config: dict[str, Any] = Field(
         default_factory=dict,
-        sa_column=Column(JSONB()),
+        sa_column=Column(
+            JSONB(),
+            nullable=False,
+            comment="Complete batch configuration",
+        ),
         description="Complete batch configuration including model, temperature, instructions, tools, etc.",
     )
 
     # Provider-specific batch tracking
     provider_batch_id: str | None = Field(
-        default=None, description="Provider's batch job ID (e.g., OpenAI batch_id)"
+        default=None,
+        sa_column=Column(
+            String,
+            nullable=True,
+            comment="Provider's batch job ID",
+        ),
+        description="Provider's batch job ID (e.g., OpenAI batch_id)",
     )
     provider_file_id: str | None = Field(
-        default=None, description="Provider's input file ID"
+        default=None,
+        sa_column=Column(String, nullable=True, comment="Provider's input file ID"),
+        description="Provider's input file ID",
     )
     provider_output_file_id: str | None = Field(
-        default=None, description="Provider's output file ID"
+        default=None,
+        sa_column=Column(String, nullable=True, comment="Provider's output file ID"),
+        description="Provider's output file ID",
     )
 
     # Provider status tracking
     provider_status: str | None = Field(
         default=None,
+        sa_column=Column(
+            String,
+            nullable=True,
+            comment="Provider-specific status (e.g., OpenAI: validating, in_progress, completed, failed)",
+        ),
         description="Provider-specific status (e.g., OpenAI: validating, in_progress, finalizing, completed, failed, expired, cancelling, cancelled)",
     )
 
     # Raw results (before parent-specific processing)
     raw_output_url: str | None = Field(
-        default=None, description="S3 URL of raw batch output file"
+        default=None,
+        sa_column=Column(
+            String, nullable=True, comment="S3 URL of raw batch output file"
+        ),
+        description="S3 URL of raw batch output file",
     )
     total_items: int = Field(
-        default=0, description="Total number of items in the batch"
+        default=0,
+        sa_column=Column(
+            Integer,
+            nullable=False,
+            comment="Total number of items in the batch",
+        ),
+        description="Total number of items in the batch",
     )
 
     # Error handling
     error_message: str | None = Field(
-        default=None, description="Error message if batch failed"
+        default=None,
+        sa_column=Column(Text, nullable=True, comment="Error message if batch failed"),
+        description="Error message if batch failed",
     )
 
     # Foreign keys
-    organization_id: int = Field(foreign_key="organization.id")
-    project_id: int = Field(foreign_key="project.id")
+    organization_id: int = Field(
+        sa_column=Column(
+            Integer,
+            sa.ForeignKey("organization.id", ondelete="CASCADE"),
+            nullable=False,
+            index=True,
+        )
+    )
+    project_id: int = Field(
+        sa_column=Column(
+            Integer,
+            sa.ForeignKey("project.id", ondelete="CASCADE"),
+            nullable=False,
+            index=True,
+        )
+    )
 
     # Timestamps
     inserted_at: datetime = Field(
diff --git a/backend/app/models/evaluation.py b/backend/app/models/evaluation.py
index 57a83d35..aa2b568c 100644
--- a/backend/app/models/evaluation.py
+++ b/backend/app/models/evaluation.py
@@ -2,7 +2,8 @@
 from typing import TYPE_CHECKING, Any, Optional
 
 from pydantic import BaseModel, Field
-from sqlalchemy import JSON, Column, Text, UniqueConstraint
+from sqlalchemy import JSON, Column, ForeignKey, Index, Integer, Text, UniqueConstraint
+from sqlalchemy.dialects.postgresql import JSONB
 from sqlmodel import Field as SQLField
 from sqlmodel import Relationship, SQLModel
 
@@ -90,10 +91,10 @@ class EvaluationDataset(SQLModel, table=True):
         default=None, description="Optional description of the dataset"
     )
 
-    # Dataset metadata stored as JSON
+    # Dataset metadata stored as JSONB
     dataset_metadata: dict[str, Any] = SQLField(
         default_factory=dict,
-        sa_column=Column(JSON),
+        sa_column=Column(JSONB, nullable=False),
         description=(
             "Dataset metadata (original_items_count, total_items_count, "
             "duplication_factor)"
@@ -132,6 +133,10 @@ class EvaluationRun(SQLModel, table=True):
     """Database table for evaluation runs."""
 
     __tablename__ = "evaluation_run"
+    __table_args__ = (
+        Index("idx_eval_run_status_org", "status", "organization_id"),
+        Index("idx_eval_run_status_project", "status", "project_id"),
+    )
 
     id: int = SQLField(default=None, primary_key=True)
 
@@ -142,7 +147,7 @@ class EvaluationRun(SQLModel, table=True):
     # Config field - dict requires sa_column
     config: dict[str, Any] = SQLField(
         default_factory=dict,
-        sa_column=Column(JSON),
+        sa_column=Column(JSON, nullable=False),
         description="Evaluation configuration",
     )
 
@@ -164,7 +169,12 @@ class EvaluationRun(SQLModel, table=True):
     )
     embedding_batch_job_id: int | None = SQLField(
         default=None,
-        foreign_key="batch_job.id",
+        sa_column=Column(
+            Integer,
+            ForeignKey("batch_job.id"),
+            nullable=True,
+            comment="Reference to the batch_job for embedding-based similarity scoring",
+        ),
         description="Reference to the batch_job for embedding-based similarity scoring",
     )
 

From 237227bc6385deb5562345063d0684e1f6ecc083 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Thu, 13 Nov 2025 16:11:55 +0530
Subject: [PATCH 2/8] moving to JSONB

---
 ...e69806207_evaluation_update_constraints.py | 30 +++++++++++++++++++
 backend/app/models/batch_job.py               | 15 ++--------
 backend/app/models/evaluation.py              | 16 ++++------
 3 files changed, 38 insertions(+), 23 deletions(-)

diff --git a/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py b/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
index e90aadf5..c4569f22 100644
--- a/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
+++ b/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
@@ -6,6 +6,8 @@
 
 """
 from alembic import op
+import sqlalchemy as sa
+from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
 revision = "633e69806207"
@@ -15,6 +17,20 @@
 
 
 def upgrade():
+    op.alter_column(
+        "evaluation_run",
+        "config",
+        existing_type=postgresql.JSON(astext_type=sa.Text()),
+        type_=postgresql.JSONB(astext_type=sa.Text()),
+        existing_nullable=False,
+    )
+    op.alter_column(
+        "evaluation_run",
+        "score",
+        existing_type=postgresql.JSON(astext_type=sa.Text()),
+        type_=postgresql.JSONB(astext_type=sa.Text()),
+        existing_nullable=True,
+    )
     # Remove SET NULL behavior from evaluation_run batch_job foreign keys
     # This ensures evaluation runs fail if their batch job is deleted (maintain referential integrity)
     op.drop_constraint(
@@ -50,6 +66,20 @@ def upgrade():
 
 
 def downgrade():
+    op.alter_column(
+        "evaluation_run",
+        "score",
+        existing_type=postgresql.JSONB(astext_type=sa.Text()),
+        type_=postgresql.JSON(astext_type=sa.Text()),
+        existing_nullable=True,
+    )
+    op.alter_column(
+        "evaluation_run",
+        "config",
+        existing_type=postgresql.JSONB(astext_type=sa.Text()),
+        type_=postgresql.JSON(astext_type=sa.Text()),
+        existing_nullable=False,
+    )
     # Restore SET NULL behavior to evaluation_run batch_job foreign keys
     op.drop_constraint(
         "fk_evaluation_run_embedding_batch_job_id", "evaluation_run", type_="foreignkey"
diff --git a/backend/app/models/batch_job.py b/backend/app/models/batch_job.py
index c0355425..47e8ad63 100644
--- a/backend/app/models/batch_job.py
+++ b/backend/app/models/batch_job.py
@@ -1,7 +1,6 @@
 from datetime import datetime
 from typing import TYPE_CHECKING, Any, Optional
 
-import sqlalchemy as sa
 from sqlalchemy import Column, Index, Integer, String, Text
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlmodel import Field, Relationship, SQLModel
@@ -113,20 +112,10 @@ class BatchJob(SQLModel, table=True):
 
     # Foreign keys
     organization_id: int = Field(
-        sa_column=Column(
-            Integer,
-            sa.ForeignKey("organization.id", ondelete="CASCADE"),
-            nullable=False,
-            index=True,
-        )
+        foreign_key="organization.id", nullable=False, ondelete="CASCADE", index=True
     )
     project_id: int = Field(
-        sa_column=Column(
-            Integer,
-            sa.ForeignKey("project.id", ondelete="CASCADE"),
-            nullable=False,
-            index=True,
-        )
+        foreign_key="project.id", nullable=False, ondelete="CASCADE", index=True
     )
 
     # Timestamps
diff --git a/backend/app/models/evaluation.py b/backend/app/models/evaluation.py
index aa2b568c..ae44b347 100644
--- a/backend/app/models/evaluation.py
+++ b/backend/app/models/evaluation.py
@@ -2,7 +2,7 @@
 from typing import TYPE_CHECKING, Any, Optional
 
 from pydantic import BaseModel, Field
-from sqlalchemy import JSON, Column, ForeignKey, Index, Integer, Text, UniqueConstraint
+from sqlalchemy import Column, Index, Text, UniqueConstraint
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlmodel import Field as SQLField
 from sqlmodel import Relationship, SQLModel
@@ -147,7 +147,7 @@ class EvaluationRun(SQLModel, table=True):
     # Config field - dict requires sa_column
     config: dict[str, Any] = SQLField(
         default_factory=dict,
-        sa_column=Column(JSON, nullable=False),
+        sa_column=Column(JSONB, nullable=False),
         description="Evaluation configuration",
     )
 
@@ -164,17 +164,13 @@ class EvaluationRun(SQLModel, table=True):
         default=None,
         foreign_key="batch_job.id",
         description=(
-            "Reference to the batch_job that processes this evaluation " "(responses)"
+            "Reference to the batch_job that processes this evaluation (responses)"
         ),
     )
     embedding_batch_job_id: int | None = SQLField(
         default=None,
-        sa_column=Column(
-            Integer,
-            ForeignKey("batch_job.id"),
-            nullable=True,
-            comment="Reference to the batch_job for embedding-based similarity scoring",
-        ),
+        foreign_key="batch_job.id",
+        nullable=True,
         description="Reference to the batch_job for embedding-based similarity scoring",
     )
 
@@ -194,7 +190,7 @@ class EvaluationRun(SQLModel, table=True):
     # Score field - dict requires sa_column
     score: dict[str, Any] | None = SQLField(
         default=None,
-        sa_column=Column(JSON, nullable=True),
+        sa_column=Column(JSONB, nullable=True),
         description="Evaluation scores (e.g., correctness, cosine_similarity, etc.)",
     )
 

From 9585979b285ae0ee67e3e198de2ebc567f2dc042 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Thu, 13 Nov 2025 16:40:32 +0530
Subject: [PATCH 3/8] moving to JSONB

---
 .../versions/6fe772038a5a_create_evaluation_run_table.py   | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py b/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py
index c9fd595a..2803f594 100644
--- a/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py
+++ b/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py
@@ -170,12 +170,7 @@ def upgrade():
         sa.Column("dataset_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
         sa.Column("config", sa.JSON(), nullable=False),
         sa.Column("batch_job_id", sa.Integer(), nullable=True),
-        sa.Column(
-            "embedding_batch_job_id",
-            sa.Integer(),
-            nullable=True,
-            comment="Reference to the batch_job for embedding-based similarity scoring",
-        ),
+        sa.Column("embedding_batch_job_id", sa.Integer(), nullable=True),
         sa.Column("dataset_id", sa.Integer(), nullable=False),
         sa.Column("status", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
         sa.Column(

From 284cac4b8c09c621ef5815aefd271cb87bc0771a Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Wed, 19 Nov 2025 11:05:03 +0530
Subject: [PATCH 4/8] formatting fixes

---
 ...e69806207_evaluation_update_constraints.py |  3 ++-
 ...fe772038a5a_create_evaluation_run_table.py |  6 ++---
 backend/app/models/batch_job.py               | 23 +++++++++++++++----
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py b/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
index c4569f22..5cecb19c 100644
--- a/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
+++ b/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
@@ -5,8 +5,9 @@
 Create Date: 2025-11-13 11:36:16.484694
 
 """
-from alembic import op
+
 import sqlalchemy as sa
+from alembic import op
 from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
diff --git a/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py b/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py
index 2803f594..a873bd5e 100644
--- a/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py
+++ b/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py
@@ -5,11 +5,11 @@
 Create Date: 2025-11-05 22:47:18.266070
 
 """
-from alembic import op
+
 import sqlalchemy as sa
-from sqlalchemy.dialects import postgresql
 import sqlmodel.sql.sqltypes
-
+from alembic import op
+from sqlalchemy.dialects import postgresql
 
 # revision identifiers, used by Alembic.
 revision = "6fe772038a5a"
diff --git a/backend/app/models/batch_job.py b/backend/app/models/batch_job.py
index 47e8ad63..9b9d4942 100644
--- a/backend/app/models/batch_job.py
+++ b/backend/app/models/batch_job.py
@@ -37,9 +37,13 @@ class BatchJob(SQLModel, table=True):
             String,
             nullable=False,
             index=True,
-            comment="Type of batch job (e.g., 'evaluation', 'classification', 'embedding')",
+            comment=(
+                "Type of batch job (e.g., 'evaluation', 'classification', 'embedding')"
+            ),
+        ),
+        description=(
+            "Type of batch job (e.g., 'evaluation', 'classification', 'embedding')"
         ),
-        description="Type of batch job (e.g., 'evaluation', 'classification', 'embedding')",
     )
 
     # Batch configuration - stores all provider-specific config
@@ -50,7 +54,10 @@ class BatchJob(SQLModel, table=True):
             nullable=False,
             comment="Complete batch configuration",
         ),
-        description="Complete batch configuration including model, temperature, instructions, tools, etc.",
+        description=(
+            "Complete batch configuration including model, temperature, "
+            "instructions, tools, etc."
+        ),
     )
 
     # Provider-specific batch tracking
@@ -80,9 +87,15 @@ class BatchJob(SQLModel, table=True):
         sa_column=Column(
             String,
             nullable=True,
-            comment="Provider-specific status (e.g., OpenAI: validating, in_progress, completed, failed)",
+            comment=(
+                "Provider-specific status (e.g., OpenAI: validating, "
+                "in_progress, completed, failed)"
+            ),
+        ),
+        description=(
+            "Provider-specific status (e.g., OpenAI: validating, in_progress, "
+            "finalizing, completed, failed, expired, cancelling, cancelled)"
         ),
-        description="Provider-specific status (e.g., OpenAI: validating, in_progress, finalizing, completed, failed, expired, cancelling, cancelled)",
     )
 
     # Raw results (before parent-specific processing)

From ff3b4c1443d8145c7022ce4b8c90119c5a64ca10 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Wed, 19 Nov 2025 11:20:58 +0530
Subject: [PATCH 5/8] cleanups

---
 backend/app/models/batch_job.py | 47 +++------------------------------
 1 file changed, 4 insertions(+), 43 deletions(-)

diff --git a/backend/app/models/batch_job.py b/backend/app/models/batch_job.py
index 9b9d4942..68b79762 100644
--- a/backend/app/models/batch_job.py
+++ b/backend/app/models/batch_job.py
@@ -1,7 +1,7 @@
 from datetime import datetime
 from typing import TYPE_CHECKING, Any, Optional
 
-from sqlalchemy import Column, Index, Integer, String, Text
+from sqlalchemy import Column, Index, Text
 from sqlalchemy.dialects.postgresql import JSONB
 from sqlmodel import Field, Relationship, SQLModel
 
@@ -25,22 +25,10 @@ class BatchJob(SQLModel, table=True):
 
     # Provider and job type
     provider: str = Field(
-        sa_column=Column(
-            String,
-            nullable=False,
-            comment="LLM provider name (e.g., 'openai', 'anthropic')",
-        ),
         description="LLM provider name (e.g., 'openai', 'anthropic')",
     )
     job_type: str = Field(
-        sa_column=Column(
-            String,
-            nullable=False,
-            index=True,
-            comment=(
-                "Type of batch job (e.g., 'evaluation', 'classification', 'embedding')"
-            ),
-        ),
+        index=True,
         description=(
             "Type of batch job (e.g., 'evaluation', 'classification', 'embedding')"
         ),
@@ -49,11 +37,7 @@ class BatchJob(SQLModel, table=True):
     # Batch configuration - stores all provider-specific config
     config: dict[str, Any] = Field(
         default_factory=dict,
-        sa_column=Column(
-            JSONB(),
-            nullable=False,
-            comment="Complete batch configuration",
-        ),
+        sa_column=Column(JSONB, nullable=False),
         description=(
             "Complete batch configuration including model, temperature, "
             "instructions, tools, etc."
@@ -63,35 +47,20 @@ class BatchJob(SQLModel, table=True):
     # Provider-specific batch tracking
     provider_batch_id: str | None = Field(
         default=None,
-        sa_column=Column(
-            String,
-            nullable=True,
-            comment="Provider's batch job ID",
-        ),
         description="Provider's batch job ID (e.g., OpenAI batch_id)",
     )
     provider_file_id: str | None = Field(
         default=None,
-        sa_column=Column(String, nullable=True, comment="Provider's input file ID"),
         description="Provider's input file ID",
     )
     provider_output_file_id: str | None = Field(
         default=None,
-        sa_column=Column(String, nullable=True, comment="Provider's output file ID"),
         description="Provider's output file ID",
     )
 
     # Provider status tracking
     provider_status: str | None = Field(
         default=None,
-        sa_column=Column(
-            String,
-            nullable=True,
-            comment=(
-                "Provider-specific status (e.g., OpenAI: validating, "
-                "in_progress, completed, failed)"
-            ),
-        ),
         description=(
             "Provider-specific status (e.g., OpenAI: validating, in_progress, "
             "finalizing, completed, failed, expired, cancelling, cancelled)"
@@ -101,25 +70,17 @@ class BatchJob(SQLModel, table=True):
     # Raw results (before parent-specific processing)
     raw_output_url: str | None = Field(
         default=None,
-        sa_column=Column(
-            String, nullable=True, comment="S3 URL of raw batch output file"
-        ),
         description="S3 URL of raw batch output file",
     )
     total_items: int = Field(
         default=0,
-        sa_column=Column(
-            Integer,
-            nullable=False,
-            comment="Total number of items in the batch",
-        ),
         description="Total number of items in the batch",
     )
 
     # Error handling
     error_message: str | None = Field(
         default=None,
-        sa_column=Column(Text, nullable=True, comment="Error message if batch failed"),
+        sa_column=Column(Text, nullable=True),
         description="Error message if batch failed",
     )
 

From d854bbe1114b1ae768850296ddbea98b7ccb65bf Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Wed, 19 Nov 2025 11:26:45 +0530
Subject: [PATCH 6/8] on delete null

---
 .../versions/633e69806207_evaluation_update_constraints.py      | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py b/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
index 5cecb19c..7503c603 100644
--- a/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
+++ b/backend/app/alembic/versions/633e69806207_evaluation_update_constraints.py
@@ -56,6 +56,7 @@ def upgrade():
         "batch_job",
         ["batch_job_id"],
         ["id"],
+        ondelete="SET NULL",
     )
     op.create_foreign_key(
         "fk_evaluation_run_embedding_batch_job_id",
@@ -63,6 +64,7 @@ def upgrade():
         "batch_job",
         ["embedding_batch_job_id"],
         ["id"],
+        ondelete="SET NULL",
     )
 
 

From 7181054600b9924ee30c3ccf31b67772314522d8 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Wed, 19 Nov 2025 11:48:29 +0530
Subject: [PATCH 7/8] cleanup comments

---
 ...fe772038a5a_create_evaluation_run_table.py | 62 +++----------------
 1 file changed, 9 insertions(+), 53 deletions(-)

diff --git a/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py b/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py
index a873bd5e..d636000d 100644
--- a/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py
+++ b/backend/app/alembic/versions/6fe772038a5a_create_evaluation_run_table.py
@@ -23,68 +23,26 @@ def upgrade():
     op.create_table(
         "batch_job",
         sa.Column("id", sa.Integer(), nullable=False),
-        sa.Column(
-            "provider",
-            sa.String(),
-            nullable=False,
-            comment="LLM provider name (e.g., 'openai', 'anthropic')",
-        ),
-        sa.Column(
-            "job_type",
-            sa.String(),
-            nullable=False,
-            comment="Type of batch job (e.g., 'evaluation', 'classification', 'embedding')",
-        ),
+        sa.Column("provider", sa.String(), nullable=False),
+        sa.Column("job_type", sa.String(), nullable=False),
         sa.Column(
             "config",
             postgresql.JSONB(astext_type=sa.Text()),
             nullable=False,
             server_default=sa.text("'{}'::jsonb"),
-            comment="Complete batch configuration",
-        ),
-        sa.Column(
-            "provider_batch_id",
-            sa.String(),
-            nullable=True,
-            comment="Provider's batch job ID",
-        ),
-        sa.Column(
-            "provider_file_id",
-            sa.String(),
-            nullable=True,
-            comment="Provider's input file ID",
-        ),
-        sa.Column(
-            "provider_output_file_id",
-            sa.String(),
-            nullable=True,
-            comment="Provider's output file ID",
-        ),
-        sa.Column(
-            "provider_status",
-            sa.String(),
-            nullable=True,
-            comment="Provider-specific status (e.g., OpenAI: validating, in_progress, completed, failed)",
-        ),
-        sa.Column(
-            "raw_output_url",
-            sa.String(),
-            nullable=True,
-            comment="S3 URL of raw batch output file",
         ),
+        sa.Column("provider_batch_id", sa.String(), nullable=True),
+        sa.Column("provider_file_id", sa.String(), nullable=True),
+        sa.Column("provider_output_file_id", sa.String(), nullable=True),
+        sa.Column("provider_status", sa.String(), nullable=True),
+        sa.Column("raw_output_url", sa.String(), nullable=True),
         sa.Column(
             "total_items",
             sa.Integer(),
             nullable=False,
             server_default=sa.text("0"),
-            comment="Total number of items in the batch",
-        ),
-        sa.Column(
-            "error_message",
-            sa.Text(),
-            nullable=True,
-            comment="Error message if batch failed",
         ),
+        sa.Column("error_message", sa.Text(), nullable=True),
         sa.Column("organization_id", sa.Integer(), nullable=False),
         sa.Column("project_id", sa.Integer(), nullable=False),
         sa.Column("inserted_at", sa.DateTime(), nullable=False),
@@ -136,9 +94,7 @@ def upgrade():
             "object_store_url", sqlmodel.sql.sqltypes.AutoString(), nullable=True
         ),
         sa.Column(
-            "langfuse_dataset_id",
-            sqlmodel.sql.sqltypes.AutoString(),
-            nullable=True,
+            "langfuse_dataset_id", sqlmodel.sql.sqltypes.AutoString(), nullable=True
         ),
         sa.Column("organization_id", sa.Integer(), nullable=False),
         sa.Column("project_id", sa.Integer(), nullable=False),

From 68fe7b824905955ba98dc6aecc25fd4643aa4bb7 Mon Sep 17 00:00:00 2001
From: AkhileshNegi <akhileshnegi.an3@gmail.com>
Date: Wed, 19 Nov 2025 11:55:47 +0530
Subject: [PATCH 8/8] updated models

---
 backend/app/models/evaluation.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/backend/app/models/evaluation.py b/backend/app/models/evaluation.py
index ae44b347..d86db892 100644
--- a/backend/app/models/evaluation.py
+++ b/backend/app/models/evaluation.py
@@ -163,6 +163,7 @@ class EvaluationRun(SQLModel, table=True):
     batch_job_id: int | None = SQLField(
         default=None,
         foreign_key="batch_job.id",
+        ondelete="SET NULL",
         description=(
             "Reference to the batch_job that processes this evaluation (responses)"
         ),
@@ -171,6 +172,7 @@ class EvaluationRun(SQLModel, table=True):
         default=None,
         foreign_key="batch_job.id",
         nullable=True,
+        ondelete="SET NULL",
         description="Reference to the batch_job for embedding-based similarity scoring",
     )