## Logic Checker Pipeline
- checks the essay from a logical perspective
- including the relevance of the topic etc

In [1]:
import os
from typing import List, Optional
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

In [3]:
# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [4]:
#loading the documents
pdf_path = "C:\\Users\\HP\\Documents\\repos\\essay-checker-agentic-rag\\data\\raw\\essay_content.pdf"
loader = PyPDFLoader(pdf_path)
docs = loader.load()
if not docs:
        print("Error: No documents found.")
docs

[Document(metadata={'producer': 'Microsoft® Word 2024', 'creator': 'Microsoft® Word 2024', 'creationdate': '2025-12-16T14:12:31+08:00', 'author': 'hp404sk7@outlook.com', 'moddate': '2025-12-16T14:12:31+08:00', 'source': 'C:\\Users\\HP\\Documents\\repos\\essay-checker-agentic-rag\\data\\raw\\essay_content.pdf', 'total_pages': 3, 'page': 0, 'page_label': '1'}, page_content='2. Student Essay Submission \nTitle \nBeyond Prohibition: Integrating Generative AI into Higher Education Assessment \nand Learning \n \nIntroduction \nThe rapid emergence of Generative Artificial Intelligence (GenAI) tools, such as ChatGPT \nand Claude, has fundamentally disrupted the landscape of higher education. While \ndigital tools have long supported academic study, GenAI’s ability to synthesise complex \ninformation and generate human-like text presents unprecedented challenges to \nestablished educational norms. \nA key concern among educators is the threat these tools pose to academic integrity, \nparticular

In [5]:
# Define the "Logic & Relevance" Schema ---

class LogicalFallacy(BaseModel):
    fallacy_type: str = Field(..., description="Name of the fallacy (e.g., 'Circular Reasoning', 'Straw Man', 'Hasty Generalization').")
    location_snippet: str = Field(..., description="The quote from the text containing the fallacy.")
    explanation: str = Field(..., description="Why this argument is logically flawed.")

class RelevanceAnalysis(BaseModel):
    is_off_topic: bool = Field(..., description="True if the essay completely fails to address the prompt.")
    score: int = Field(..., description="Score 1-10. How directly does it answer the specific question asked?")
    thesis_alignment: str = Field(..., description="Analysis of whether the thesis statement directly addresses the prompt.")
    missing_key_points: List[str] = Field(..., description="List of key concepts related to the question that the student failed to mention.")

class StructureAnalysis(BaseModel):
    has_clear_intro: bool = Field(..., description="Does it have a distinct introduction?")
    has_clear_conclusion: bool = Field(..., description="Does it have a distinct conclusion?")
    flow_score: int = Field(..., description="Score 1-10. How well do paragraphs transition and build upon each other?")
    structural_weaknesses: List[str] = Field(..., description="List of specific structural issues (e.g., 'Sudden topic change in Para 3').")

class LogicAnalysisResult(BaseModel):
    relevance: RelevanceAnalysis = Field(..., description="Analysis of how well the essay answers the prompt.")
    structure: StructureAnalysis = Field(..., description="Analysis of the essay's organization.")
    identified_fallacies: List[LogicalFallacy] = Field(default_factory=list, description="List of logical errors found in the argumentation.")
    argument_strength_score: int = Field(..., description="Score 1-10 on the overall persuasiveness and soundness of arguments.")
    summary_critique: str = Field(..., description="A concise summary of the logical quality for the final grader.")

In [6]:
#Setup the Model (DeepSeek via OpenAI API) ---
llm = ChatOpenAI(
    model="deepseek-ai/DeepSeek-V3",
    openai_api_key=OPENAI_API_KEY,
    openai_api_base="https://api.siliconflow.cn/v1",
    temperature=0  # Keep it 0 for consistent analysis
)

# Bind the robust schema
structured_llm = llm.with_structured_output(LogicAnalysisResult)

In [None]:
# Define the Prompt ---
system_prompt = """
You are a strict Essay Editor and Logic Expert.
Your task is to ruthlessly evaluate the **Relevance** and **Logic** of the student's essay against the provided Question.

**Analysis Directives:**
1. **Relevance is Paramount:** If the essay is well-written but answers the wrong question, it must receive a low Relevance Score and `is_off_topic=True`.
2. **Logical Rigor:** Hunt for logical fallacies. Does the conclusion follow from the premises? Are the claims supported by evidence or just asserted?
3. **Structure:** Check for standard academic form (Intro -> Body Arguments -> Conclusion).

Output strictly in the requested JSON format.
"""

In [8]:
prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "Here is the student's essay:\n\n{text}")
])

chain = prompt | structured_llm

In [None]:
# execute 
full_text = "\n\n".join([d.page_content for d in docs])
print(f"Analyzing {len(docs)} pages ({len(full_text)} characters)...")
try:
    # Invoke the chain with the full text
    result = chain.invoke({"text": full_text})
    result_json = result.dict()

except Exception as e:
    print(f"Error during analysis: {e}")

result_json 

Analyzing 3 pages (5227 characters)...


C:\Users\HP\AppData\Local\Temp\ipykernel_27804\2905930422.py:7: PydanticDeprecatedSince20: The `dict` method is deprecated; use `model_dump` instead. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.12/migration/
  result_json = result.dict()


{'relevance': {'is_off_topic': False,
  'score': 10,
  'thesis_alignment': 'The essay effectively addresses the question of integrating Generative AI into higher education assessment and learning, maintaining strong relevance throughout.',
  'missing_key_points': []},
 'structure': {'has_clear_intro': True,
  'has_clear_conclusion': True,
  'flow_score': 9,
  'structural_weaknesses': []},
 'identified_fallacies': [],
 'argument_strength_score': 9,
 'summary_critique': 'The essay presents a well-structured and logically rigorous argument for integrating Generative AI into higher education. It effectively balances the discussion between challenges to academic integrity and the benefits for student learning, culminating in a persuasive call for assessment reform. The argument is supported by relevant evidence and maintains strong relevance to the topic throughout.'}

In [10]:
# export to JSON file
LOGIC_ANALYSIS_OUTPUT_PATH = "C:\\Users\\HP\\Documents\\repos\\essay-checker-agentic-rag\\data\\processed\\logic_analysis_output.json"

import json
from pathlib import Path

Path(LOGIC_ANALYSIS_OUTPUT_PATH).write_text(json.dumps(result_json, indent=2))
print(f"Output saved to {LOGIC_ANALYSIS_OUTPUT_PATH}")

Output saved to C:\Users\HP\Documents\repos\essay-checker-agentic-rag\data\processed\logic_analysis_output.json


### Final

In [None]:
import os
from typing import List, Optional
from dotenv import load_dotenv
from pydantic import BaseModel, Field
from langchain_community.document_loaders import PyPDFLoader
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate

# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# --- Step 1: Define the "Logic & Relevance" Schema ---

class LogicalFallacy(BaseModel):
    fallacy_type: str = Field(..., description="Name of the fallacy (e.g., 'Circular Reasoning', 'Straw Man', 'Hasty Generalization').")
    location_snippet: str = Field(..., description="The quote from the text containing the fallacy.")
    explanation: str = Field(..., description="Why this argument is logically flawed.")

class RelevanceAnalysis(BaseModel):
    is_off_topic: bool = Field(..., description="True if the essay completely fails to address the prompt.")
    score: int = Field(..., description="Score 1-10. How directly does it answer the specific question asked?")
    thesis_alignment: str = Field(..., description="Analysis of whether the thesis statement directly addresses the prompt.")
    missing_key_points: List[str] = Field(..., description="List of key concepts related to the question that the student failed to mention.")

class StructureAnalysis(BaseModel):
    has_clear_intro: bool = Field(..., description="Does it have a distinct introduction?")
    has_clear_conclusion: bool = Field(..., description="Does it have a distinct conclusion?")
    flow_score: int = Field(..., description="Score 1-10. How well do paragraphs transition and build upon each other?")
    structural_weaknesses: List[str] = Field(..., description="List of specific structural issues (e.g., 'Sudden topic change in Para 3').")

class LogicAnalysisResult(BaseModel):
    relevance: RelevanceAnalysis = Field(..., description="Analysis of how well the essay answers the prompt.")
    structure: StructureAnalysis = Field(..., description="Analysis of the essay's organization.")
    identified_fallacies: List[LogicalFallacy] = Field(default_factory=list, description="List of logical errors found in the argumentation.")
    argument_strength_score: int = Field(..., description="Score 1-10 on the overall persuasiveness and soundness of arguments.")
    summary_critique: str = Field(..., description="A concise summary of the logical quality for the final grader.")

# --- Step 2: Setup the Model (DeepSeek via OpenAI API) ---
llm = ChatOpenAI(
    model="deepseek-ai/DeepSeek-V3",
    openai_api_key=OPENAI_API_KEY,
    openai_api_base="https://api.siliconflow.cn/v1",
    temperature=0 
)

structured_llm = llm.with_structured_output(LogicAnalysisResult)

# --- Step 3: Refined "Strict Editor" Prompt ---
system_prompt = """
You are a strict Academic Journal Editor and Logic Expert.
Your task is to ruthlessly evaluate the **Relevance** and **Logic** of the student's essay against the provided Question.

**Analysis Directives:**
1. **Relevance is Paramount:** If the essay is well-written but answers the wrong question, it must receive a low Relevance Score and `is_off_topic=True`.
2. **Logical Rigor:** Hunt for logical fallacies. Does the conclusion follow from the premises? Are the claims supported by evidence or just asserted?
3. **Structure:** Check for standard academic form (Intro -> Body Arguments -> Conclusion).

Output strictly in the requested JSON format.
"""

prompt = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", "Essay Question: {question}\n\nStudent Essay Content:\n{essay_content}")
])

chain = prompt | structured_llm

# --- Step 4: Execution Logic ---

def run_logic_check(pdf_path: str, essay_question: str):
    print(f"Loading PDF from: {pdf_path}")
    loader = PyPDFLoader(pdf_path)
    docs = loader.load()
    
    if not docs:
        print("Error: No documents found.")
        return None

    # Merge full text for comprehensive logical analysis
    full_text = "\n\n".join([d.page_content for d in docs])
    
    print(f"Analyzing Logic against Question: '{essay_question[:50]}...'")
    
    try:
        # Pass both the QUESTION and the CONTENT
        result = chain.invoke({
            "question": essay_question, 
            "essay_content": full_text
        })
        
        return result.dict()

    except Exception as e:
        print(f"Error during logic analysis: {e}")
        return None

# --- Usage Example ---
pdf_path = r"C:\Users\HP\Documents\repos\essay-checker-agentic-rag\data\raw\essay_content.pdf"
question = "Discuss the socio-economic impacts of the Industrial Revolution in 19th-century Britain."

result_json = run_logic_check(pdf_path, question)

if result_json:
    import json
    print(json.dumps(result_json, indent=2))