In [1]:
!pip uninstall -y numpy pandas chromadb langchain langchain-core langchain-community langchain-openai


Found existing installation: numpy 1.26.4
Uninstalling numpy-1.26.4:
  Successfully uninstalled numpy-1.26.4
Found existing installation: pandas 2.2.2
Uninstalling pandas-2.2.2:
  Successfully uninstalled pandas-2.2.2
Found existing installation: chromadb 0.4.24
Uninstalling chromadb-0.4.24:
  Successfully uninstalled chromadb-0.4.24
Found existing installation: langchain 0.1.16
Uninstalling langchain-0.1.16:
  Successfully uninstalled langchain-0.1.16
Found existing installation: langchain-core 0.1.53
Uninstalling langchain-core-0.1.53:
  Successfully uninstalled langchain-core-0.1.53
Found existing installation: langchain-community 0.0.33
Uninstalling langchain-community-0.0.33:
  Successfully uninstalled langchain-community-0.0.33
Found existing installation: langchain-openai 0.0.8
Uninstalling langchain-openai-0.0.8:
  Successfully uninstalled langchain-openai-0.0.8


In [2]:
!pip install -q "numpy==1.26.4" \
  "pandas==2.2.2" \
  "chromadb==0.4.24" \
  "langchain==0.1.16" \
  "langchain-core==0.1.53" \
  "langchain-community==0.0.33" \
  "langchain-openai==0.0.8" \
  openpyxl tiktoken


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
opencv-contrib-python 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
db-dtypes 1.4.3 requires packaging>=24.2.0, but you have packaging 23.2 which is incompatible.
jaxlib 0.7.2 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
opencv-python-headless 4.12.0.88 requires numpy<2.3.0,>=2; python_version >= "3.9", but you have numpy 1.26.4 which is incompatible.
jax 0.7.2 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
pytensor 2.35.1 requires numpy>=2.0, but you have numpy 1.26.4 which is incompatible.
google-adk 1.16.0 requires opentelemetry-api<=1.37.0,>=1.37.0, but you have opentelemetry-api 1.38.0 which is incompatible.
google-adk 1.16.0 requires opentelemetry-sdk<=1.37.0,>=1.37.0, but you have opentelemet

In [None]:
# ============================================================
# üß† 2. Imports and setup
# ============================================================
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from langchain.prompts import ChatPromptTemplate
import os, time, pandas as pd

# --- üîë Enter your API key ---
os.environ["OPENAI_API_KEY"] = "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"



# ============================================================
# üß† 4. Build simple RAG database
# ============================================================
os.makedirs("vector_db/validation", exist_ok=True)
with open("data/validation_rules.txt", "w") as f:
    f.write("""
Rule 1  Operation sequence validity ‚Äì operation i+1 must start after or at the end of i.
Rule 2  Machine conflict ‚Äì no overlapping operations on the same machine.
Rule 3  Time consistency ‚Äì End = Start + Duration.
Example invalid case : Job0O1 starts before Job0O0 ends ‚Üí precedence violation.
Example valid case : all operations non-overlapping and consistent.
""")

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
rag_db = Chroma.from_texts(
    texts=[open("data/validation_rules.txt").read()],
    embedding=embeddings,
    persist_directory="./vector_db/validation",
)
rag_db.persist()

# ============================================================
# ü§ñ 5. LLM + parser setup
# ============================================================
llm = ChatOpenAI(model="gpt-4o", temperature=0)

# Define the structured schema for parsing
schemas = [
    ResponseSchema(name="overall", description="Either VALID or INVALID"),
    ResponseSchema(name="details", description="Short reason or 'All constraints satisfied'"),
]
parser = StructuredOutputParser.from_response_schemas(schemas)
format_instructions = parser.get_format_instructions()

# Prompt templates -----------------------------------------------------------
simple_template = ChatPromptTemplate.from_template(
    """You are a schedule validation assistant.
Check whether this Job Shop Schedule is VALID or INVALID.

Return ONLY valid JSON following the format below.

{format_instructions}

Schedule:
{schedule}
"""
)

detailed_template = ChatPromptTemplate.from_template(
    """You are an expert in Job Shop Scheduling validation.

Verify these constraints:
1. Operation sequence validity
2. Machine conflict-free usage
3. Time consistency

Return ONLY valid JSON.

{format_instructions}

Schedule:
{schedule}
"""
)

rag_template = ChatPromptTemplate.from_template(
    """You are an expert in Job Shop Scheduling validation.
Use this background knowledge:
{context}

Determine if the schedule is VALID or INVALID.

Return ONLY valid JSON.

{format_instructions}

Schedule:
{schedule}
"""
)

# ============================================================
# üßÆ 6. Run evaluation pipeline
# ============================================================
df = pd.read_excel("/content/sample_schedules_with_validity.xlsx")
results = []

df_half = df.head(len(df) // 2)   # first half only
for i, row in enumerate(df_half.iterrows(), start=1):
    _, row = row
    qid = row["ID"]
    schedule = row["Question"]
    expected = row["Expected_Output"]
    n_mach, n_jobs = row["num_machines"], row["num_jobs"]

    print(f"üîç Checking {qid} ({n_jobs} jobs √ó {n_mach} machines)‚Ä¶")

    # ---------- Simple ----------
    msgs = simple_template.format_messages(
        schedule=schedule, format_instructions=format_instructions
    )
    simple_out = llm.invoke(msgs).content
    simple_json = parser.parse(simple_out)

    # ---------- Detailed ----------
    msgs = detailed_template.format_messages(
        schedule=schedule, format_instructions=format_instructions
    )
    detailed_out = llm.invoke(msgs).content
    detailed_json = parser.parse(detailed_out)

    # ---------- RAG + Detailed ----------
    docs = rag_db.similarity_search(schedule, k=2)
    context = "\n".join([d.page_content for d in docs])
    msgs = rag_template.format_messages(
        context=context, schedule=schedule, format_instructions=format_instructions
    )
    rag_out = llm.invoke(msgs).content
    rag_json = parser.parse(rag_out)

    print(
        f"üü¢ Simple: {simple_json['overall']}, "
        f"üü° Detailed: {detailed_json['overall']}, "
        f"üîµ RAG: {rag_json['overall']}"
    )

    results.append(
        {
            "ID": qid,
            "Expected_Output": expected,
            "num_machines": n_mach,
            "num_jobs": n_jobs,
            # Raw JSON text (for traceability)
            "Simple_Prompt_Out": simple_out,
            "Detailed_Prompt_Out": detailed_out,
            "RAG_Out": rag_out,
            # Parsed structured results
            "Simple_Result": simple_json["overall"],
            "Simple_Details": simple_json["details"],
            "Detailed_Result": detailed_json["overall"],
            "Detailed_Details": detailed_json["details"],
            "RAG_Result": rag_json["overall"],
            "RAG_Details": rag_json["details"],
        }
    )

    time.sleep(0.5)
     # ‚úÖ every 5 items, longer cool-down
    if i % 5 == 0:
        print("‚è≥ Cooling down to avoid rate limit...")
        time.sleep(20)

# ============================================================
# üíæ 7. Save results
# ============================================================
os.makedirs("outputs", exist_ok=True)
out_path = "outputs/validity_agent_structured.xlsx"
pd.DataFrame(results).to_excel(out_path, index=False)
print(f"\nResults saved to {out_path}")

# ============================================================
# üìä 8. Quick accuracy summary
# ============================================================
df_out = pd.DataFrame(results)
for col in ["Simple_Result", "Detailed_Result", "RAG_Result"]:
    df_out[f"{col}_Match"] = (
        df_out["Expected_Output"].str.upper() == df_out[col].str.upper()
    )

acc = (
    df_out[[c for c in df_out.columns if c.endswith("_Match")]].mean() * 100
).round(1)
print("\n Accuracy Summary (% correct vs expected):")
print(acc)


ERROR:chromadb.telemetry.product.posthog:Failed to send telemetry event ClientStartEvent: capture() takes 1 positional argument but 3 were given
ERROR:chromadb.telemetry.product.posthog:Failed to send telemetry event ClientCreateCollectionEvent: capture() takes 1 positional argument but 3 were given


üîç Checking V001 (10 jobs √ó 6 machines)‚Ä¶
üü¢ Simple: INVALID, üü° Detailed: INVALID, üîµ RAG: INVALID
üîç Checking V002 (6 jobs √ó 4 machines)‚Ä¶
üü¢ Simple: INVALID, üü° Detailed: VALID, üîµ RAG: VALID
üîç Checking V003 (8 jobs √ó 5 machines)‚Ä¶
üü¢ Simple: INVALID, üü° Detailed: INVALID, üîµ RAG: INVALID
üîç Checking V004 (15 jobs √ó 10 machines)‚Ä¶
üü¢ Simple: INVALID, üü° Detailed: INVALID, üîµ RAG: INVALID
üîç Checking V005 (10 jobs √ó 9 machines)‚Ä¶
üü¢ Simple: INVALID, üü° Detailed: INVALID, üîµ RAG: INVALID
‚è≥ Cooling down to avoid rate limit...
üîç Checking V006 (8 jobs √ó 4 machines)‚Ä¶
üü¢ Simple: INVALID, üü° Detailed: INVALID, üîµ RAG: INVALID
üîç Checking V007 (7 jobs √ó 7 machines)‚Ä¶
üü¢ Simple: INVALID, üü° Detailed: INVALID, üîµ RAG: INVALID
üîç Checking V008 (3 jobs √ó 3 machines)‚Ä¶
üü¢ Simple: VALID, üü° Detailed: VALID, üîµ RAG: VALID
üîç Checking V009 (6 jobs √ó 2 machines)‚Ä¶
üü¢ Simple: VALID, üü° Detailed: VALID, üîµ RA

In [8]:
import pandas as pd

file_path = "/content/outputs/validity_agent_structured.xlsx"
df_results = pd.read_excel(file_path)
display(df_results)

Unnamed: 0,ID,Expected_Output,num_machines,num_jobs,Simple_Prompt_Out,Detailed_Prompt_Out,RAG_Out,Simple_Result,Simple_Details,Detailed_Result,Detailed_Details,RAG_Result,RAG_Details
0,V001,INVALID,6,10,"```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...",INVALID,Job 1 Operation 1 on Machine 4 ends before it ...,INVALID,Time inconsistency in Job 1 Operation 1: end t...,INVALID,Operation sequence violation: Job 1 Operation ...
1,V002,VALID,4,6,"```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""VALID"",\n\t""details""...","```json\n{\n\t""overall"": ""VALID"",\n\t""details""...",INVALID,Job 1 Operation 1 on Machine 3 starts before J...,VALID,All constraints satisfied,VALID,All constraints satisfied
2,V003,INVALID,5,8,"```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...",INVALID,Operation dependencies are violated; some oper...,INVALID,Operation sequence validity issue: Job 2 Opera...,INVALID,Operation sequence validity violation: Job 2 O...
3,V004,INVALID,10,15,"```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...",INVALID,Job 5 Operation 7 on Machine 0 has an incorrec...,INVALID,Machine conflict detected: Machine 0 has overl...,INVALID,Operation 5 of Job 5 on Machine 0 ends before ...
4,V005,INVALID,9,10,"```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...",INVALID,Operation 4 of Job 4 ends before it starts (17...,INVALID,Time inconsistency in Job 4 Operation 6 on Mac...,INVALID,Operation 4 of Job 4 on Machine 4 has an end t...
5,V006,INVALID,4,8,"```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...",INVALID,"Job 7 Operation 1 on Machine 1 starts at 674, ...",INVALID,Machine conflict on Machine 0 between Job 6 Op...,INVALID,Machine conflict on Machine 0: Job 7 Operation...
6,V007,INVALID,7,7,"```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...","```json\n{\n\t""overall"": ""INVALID"",\n\t""detail...",INVALID,Job 0 Operation 0 on Machine 0 has incorrect e...,INVALID,Operation sequence validity and time consisten...,INVALID,Operation sequence validity violation: Job 0 O...
7,V008,VALID,3,3,"```json\n{\n\t""overall"": ""VALID"",\n\t""details""...","```json\n{\n\t""overall"": ""VALID"",\n\t""details""...","```json\n{\n\t""overall"": ""VALID"",\n\t""details""...",VALID,All constraints satisfied,VALID,All constraints satisfied,VALID,All constraints satisfied
8,V009,VALID,2,6,"```json\n{\n\t""overall"": ""VALID"",\n\t""details""...","```json\n{\n\t""overall"": ""VALID"",\n\t""details""...","```json\n{\n\t""overall"": ""VALID"",\n\t""details""...",VALID,All constraints satisfied,VALID,All constraints satisfied,VALID,All constraints satisfied
9,V010,VALID,4,9,"```json\n{\n\t""overall"": ""VALID"",\n\t""details""...","```json\n{\n\t""overall"": ""VALID"",\n\t""details""...","```json\n{\n\t""overall"": ""VALID"",\n\t""details""...",VALID,All constraints satisfied,VALID,All constraints satisfied,VALID,All constraints satisfied
