In [1]:
#install dependencies and import libraries
import os
import json
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader

In [2]:
# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [3]:
# We use a standard ChatModel with a high context window.
# We do NOT use structured output here because you want a Markdown report.
llm = ChatOpenAI(
    model="deepseek-ai/DeepSeek-V3", # or "gpt-4o"
    openai_api_key=OPENAI_API_KEY,
    openai_api_base="https://api.siliconflow.cn/v1",
    temperature=0.2 # Slight creativity for feedback writing, but low for consistency
)

In [4]:
# --- Refined System Prompt ---
system_prompt = """
You are the **Lead Academic Examiner** for an advanced university course.
Your task is to grade a student essay by strictly synthesizing data from three expert AI sub-agents (Logic, Fact, Language) and applying a specific Grading Rubric.

### 1. INPUT DATA OVERVIEW
You will receive:
1.  **Rubric:** The exact criteria and band descriptors.
2.  **Logic Report:** Scores on relevance, structure, and argument strength.
3.  **Fact Report:** Verification of claims and citations.
4.  **Language Report:** Analysis of grammar, vocabulary, and tone.
5.  **Student Essay:** The raw text.

### 2. GRADING ALGORITHM (Mental Steps)
Before writing the report, perform this analysis:
* **Step 1 (Relevance Check):** Look at `logic_analysis['relevance']['is_off_topic']`. If TRUE, the maximum score for "Task Response" is capped at **Band 5**.
* **Step 2 (Map Evidence to Rubric):**
    * *Task Response:* Use `logic_analysis['relevance']` and `logic_analysis['argument_strength_score']`.
    * *Cohesion/Structure:* Use `logic_analysis['structure']['flow_score']` and `language_analysis['structure']['flow_issues']`.
    * *Language/Style:* Use `language_analysis['grammar_issues']` count and `language_analysis['vocabulary']['score']`.
    * *Evidence/Referencing:* Use `fact_checking_output` (look for incorrect citations) and `logic_analysis['identified_fallacies']`.
* **Step 3 (Select Band):** For each criterion, find the Rubric Level where the `descriptor_points` best match your analysis.

### 3. OUTPUT RULES
* **Tone:** Professional, constructive, and authoritative.
* **Justification:** In the Scorecard, you MUST quote specific **Descriptor Points** from the rubric to justify the score (e.g., "Matches Band 7: 'Argument is clear but may lack refinement'").
* **Annotations:** In the "Annotated Text Review", you must strictly use the JSON data to pinpoint errors. Do not hallucinate new errors.

### 4. FINAL OUTPUT FORMAT
Produce a clean Markdown report strictly following the template below.
"""

In [5]:
# Define the Prompt Template
prompt_template = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", """
**ESSAY QUESTION:** {essay_question}

**RUBRIC:** {rubric_json}

**LOGIC & RELEVANCE REPORT:** {logic_json}

**FACT CHECK REPORT:** {fact_json}

**LANGUAGE REPORT:** {language_json}

**STUDENT ESSAY:** {essay_content}

---
Generate the **Academic Assessment Report** now.
""")
])

# Create the Chain
grading_chain = prompt_template | llm | StrOutputParser()

In [10]:
# --- Execution Function ---
def generate_final_grade(
    essay_content: str,
    essay_question: str,
    rubric_data: dict,
    logic_data: dict,
    fact_data: list,
    language_data: dict
):
    print("üë©‚Äçüè´ Synthesizing Final Report...")
    
    # Convert dicts to JSON strings for the prompt 
    # (Explicit JSON Serialization) using json.dumps
    try:
        report = grading_chain.invoke({
            "essay_question": essay_question,
            "essay_content": essay_content,
            "rubric_json": json.dumps(rubric_data, indent=2),
            "logic_json": json.dumps(logic_data, indent=2),
            "fact_json": json.dumps(fact_data, indent=2),
            "language_json": json.dumps(language_data, indent=2)
        })
        return report
    except Exception as e:
        return f"Error generating report: {e}"

In [11]:
# load all data from previous functions
#unserialized JSON data using json.load

EXTRACTED_RUBRICS_PATH = "C:\\Users\\HP\\Documents\\repos\\essay-checker-agentic-rag\\data\\processed\\extracted_rubrics.json"
FACT_CHECKING_OUTPUT_PATH = "C:\\Users\\HP\\Documents\\repos\\essay-checker-agentic-rag\\data\\processed\\fact_checking_output.json"
LOGIC_ANALYSIS_OUTPUT_PATH = "C:\\Users\\HP\\Documents\\repos\\essay-checker-agentic-rag\\data\\processed\\logic_analysis_output.json"
LANGUAGE_ANALYSIS_OUTPUT_PATH = "C:\\Users\\HP\\Documents\\repos\\essay-checker-agentic-rag\\data\\processed\\language_analysis_output.json"

with open(EXTRACTED_RUBRICS_PATH, "r") as f:
    extracted_rubrics = json.load(f)

with open(FACT_CHECKING_OUTPUT_PATH, "r") as f:
    fact_checking_output = json.load(f)

with open(LOGIC_ANALYSIS_OUTPUT_PATH, "r") as f:
    logic_analysis_output = json.load(f)

with open(LANGUAGE_ANALYSIS_OUTPUT_PATH, "r") as f:
    language_analysis_output = json.load(f)

print("JSON files loaded successfully.")

ESSAY_QUESTION_PATH = "C:\\Users\\HP\\Documents\\repos\\essay-checker-agentic-rag\\data\\raw\\essay_question.pdf"
ESSAY_CONTENT_PATH = "C:\\Users\\HP\\Documents\\repos\\essay-checker-agentic-rag\\data\\raw\\essay_content.pdf"

loader = PyPDFLoader(ESSAY_QUESTION_PATH)
docs = loader.load()
essay_question = "\n\n".join([d.page_content for d in docs])

loader = PyPDFLoader(ESSAY_CONTENT_PATH)
docs = loader.load()
essay_content = "\n\n".join([d.page_content for d in docs])

print("PDF files loaded successfully.")
    

JSON files loaded successfully.
PDF files loaded successfully.


In [12]:
final_report = generate_final_grade(
    essay_content=essay_content,
    essay_question=essay_question,
    rubric_data=extracted_rubrics, # Your variable
    logic_data=logic_analysis_output, # Your variable
    fact_data=fact_checking_output, # Your variable
    language_data=language_analysis_output # Your variable
)
    
print(final_report)

üë©‚Äçüè´ Synthesizing Final Report...
# Academic Assessment Report

## Scorecard Summary

| Criteria                      | Band Score | Weighted Score | Justification |
|-------------------------------|------------|----------------|---------------|
| Task Response & Argument      | Band 9     | 20%            | Matches Band 9: "Fully addresses all parts of the task with a clear, insightful, and original argument" and "Demonstrates a nuanced understanding of GenAI's challenges and benefits" |
| Critical Thinking & Evaluation| Band 9     | 20%            | Matches Band 9: "Demonstrates excellent critical engagement with sources and ideas" and "Evaluates implications, limitations, and counterarguments insightfully" |
| Use of Sources & Referencing  | Band 8     | 20%            | Matches Band 8: "Uses an appropriate range of sources effectively" and "Referencing is largely correct with occasional minor errors" |
| Academic Writing Style & Register | Band 8 | 15%         | Matches Band

In [13]:
REPORT_OUTPUT_PATH = "C:\\Users\\HP\\Documents\\repos\\essay-checker-agentic-rag\\data\\final_report\\final_report.txt"

with open(REPORT_OUTPUT_PATH, "w", encoding="utf-8") as f:
    f.write(final_report)

## Final

In [None]:
import os
import json
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

# --- Setup the Model (DeepSeek/GPT-4o) ---
# We use a standard ChatModel with a high context window.
# We do NOT use structured output here because you want a Markdown report.
llm = ChatOpenAI(
    model="deepseek-ai/DeepSeek-V3", # or "gpt-4o"
    openai_api_key=OPENAI_API_KEY,
    openai_api_base="https://api.siliconflow.cn/v1",
    temperature=0.2 # Slight creativity for feedback writing, but low for consistency
)

# --- Refined System Prompt ---
system_prompt = """
You are the **Lead Academic Examiner** for an advanced university course.
Your task is to grade a student essay by strictly synthesizing data from three expert AI sub-agents (Logic, Fact, Language) and applying a specific Grading Rubric.

### 1. INPUT DATA OVERVIEW
You will receive:
1.  **Rubric:** The exact criteria and band descriptors.
2.  **Logic Report:** Scores on relevance, structure, and argument strength.
3.  **Fact Report:** Verification of claims and citations.
4.  **Language Report:** Analysis of grammar, vocabulary, and tone.
5.  **Student Essay:** The raw text.

### 2. GRADING ALGORITHM (Mental Steps)
Before writing the report, perform this analysis:
* **Step 1 (Relevance Check):** Look at `logic_analysis['relevance']['is_off_topic']`. If TRUE, the maximum score for "Task Response" is capped at **Band 5**.
* **Step 2 (Map Evidence to Rubric):**
    * *Task Response:* Use `logic_analysis['relevance']` and `logic_analysis['argument_strength_score']`.
    * *Cohesion/Structure:* Use `logic_analysis['structure']['flow_score']` and `language_analysis['structure']['flow_issues']`.
    * *Language/Style:* Use `language_analysis['grammar_issues']` count and `language_analysis['vocabulary']['score']`.
    * *Evidence/Referencing:* Use `fact_checking_output` (look for incorrect citations) and `logic_analysis['identified_fallacies']`.
* **Step 3 (Select Band):** For each criterion, find the Rubric Level where the `descriptor_points` best match your analysis.

### 3. OUTPUT RULES
* **Tone:** Professional, constructive, and authoritative.
* **Justification:** In the Scorecard, you MUST quote specific **Descriptor Points** from the rubric to justify the score (e.g., "Matches Band 7: 'Argument is clear but may lack refinement'").
* **Annotations:** In the "Annotated Text Review", you must strictly use the JSON data to pinpoint errors. Do not hallucinate new errors.

### 4. FINAL OUTPUT FORMAT
Produce a clean Markdown report strictly following the template below.
"""

# Define the Prompt Template
prompt_template = ChatPromptTemplate.from_messages([
    ("system", system_prompt),
    ("human", """
**ESSAY QUESTION:** {question}

**RUBRIC:** {rubric_json}

**LOGIC & RELEVANCE REPORT:** {logic_json}

**FACT CHECK REPORT:** {fact_json}

**LANGUAGE REPORT:** {language_json}

**STUDENT ESSAY:** {essay_text}

---
Generate the **Academic Assessment Report** now.
""")
])

# Create the Chain
grading_chain = prompt_template | llm | StrOutputParser()

# --- Execution Function ---
def generate_final_grade(
    essay_text: str,
    question: str,
    rubric_data: dict,
    logic_data: dict,
    fact_data: list,
    language_data: dict
):
    print("üë©‚Äçüè´ Synthesizing Final Report...")
    
    # Convert dicts to JSON strings for the prompt
    try:
        report = grading_chain.invoke({
            "question": question,
            "essay_text": essay_text,
            "rubric_json": json.dumps(rubric_data, indent=2),
            "logic_json": json.dumps(logic_data, indent=2),
            "fact_json": json.dumps(fact_data, indent=2),
            "language_json": json.dumps(language_data, indent=2)
        })
        return report
    except Exception as e:
        return f"Error generating report: {e}"

# --- Example Usage (Using your provided dummy data) ---
if __name__ == "__main__":
    # Load your raw data (Simulated here based on your prompt)
    # in a real app, these come from the previous functions
    
    # ... (Load your JSONs here) ...
    
    final_report = generate_final_grade(
        essay_text="Generative Artificial Intelligence (GenAI) tools, such as ChatGPT...", # Truncated
        question="Discuss the integration of GenAI in higher education.",
        rubric_data=extracted_rubrics, # Your variable
        logic_data=logic_analysis_output, # Your variable
        fact_data=fact_checking_output, # Your variable
        language_data=language_analysis_output # Your variable
    )
    
    print(final_report)