In [41]:
import os
from dotenv import load_dotenv
from typing import TypedDict, Annotated, operator
from pydantic import BaseModel, Field

load_dotenv()


True

In [42]:
from langchain_groq import ChatGroq
from langchain_deepseek import ChatDeepSeek
from langgraph.graph import StateGraph, START, END
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import JsonOutputParser


In [43]:
LLMS = {}

LLMS['gemini'] = ChatGoogleGenerativeAI(
    api_key = os.getenv("GOOGLE_API_KEY"),
    model = 'gemini-1.5-flash'
)

LLMS['deepseek'] = ChatDeepSeek(
    api_key=os.getenv("OPENROUTER_API_KEY"),
    api_base=os.getenv("OPENROUTER_BASE_URL"),
    model="deepseek/deepseek-chat-v3-0324:free"
)

LLMS['moonshotai'] = ChatGroq(
    api_key = os.getenv("GROQ_API_KEY"),
    model = "moonshotai/kimi-k2-instruct"
)


In [44]:
llm  = LLMS['moonshotai']


In [45]:
class FeedbackOutputFormat(BaseModel):
    feedback: str = Field(description="Feedback Text")
    score: int = Field(description="Feedback score out of 10", ge=0, le=10)


In [46]:
parser = JsonOutputParser(pydantic_object=FeedbackOutputFormat)


In [67]:
class EssayState(TypedDict):
    essay_text: str
    cot_feedback: str # clarity of thought
    aod_feedback: str # analysis of depth
    lang_feedback: str # grammatical check
    overall_feedback: str
    indiviual_scores: Annotated[list[int], operator.add]
    avg_score: float


In [48]:
cot_prompt = ChatPromptTemplate.from_messages([
    ("system", """
    You are an expert writing evaluator. Your task is to assess an essay only based on clarity of thought.

    This means you are judging:

    - How clearly the writer presents their ideas
    - Whether their reasoning is easy to follow
    - If the structure supports understanding
    - Whether the language helps or hinders comprehension
    - Ignore grammar, vocabulary variety, spelling, tone, creativity, or originality unless - they directly affect clarity.

    Your response must include:

    A short, honest paragraph of feedback explaining how clear the essay was and why. Point out specific strengths or weak spots in the writer's thinking or organization.

    A clarity score out of 10, where:
    10 = exceptionally clear and easy to follow from start to finish
    5 = mixed clarity; some parts are well explained, others are confusing
    1 = very difficult to follow or understand the writer's reasoning

    Use straightforward, supportive language. You're not here to judge—you're here to help the writer improve their clarity of thought.
    
    # Output Instructions
    Here are your output format instructions: {format_instructions}
    """),
    ("human", "Evaulate the below essay: {essay_text}")
])


In [None]:
def evaluate_cot(state: EssayState):
    essay_text = state['essay_text']
    cot_chain = cot_prompt | llm | parser
    cot_result = cot_chain.invoke(input={
        'essay_text': essay_text,
        "format_instructions": parser.get_format_instructions(),
    })
    
    return {
        'cot_feedback': cot_result['feedback'],
        'indiviual_scores': [cot_result['score']]
    }


In [50]:
aod_prompt = ChatPromptTemplate.from_messages([
    ("system", """
    You are an expert writing evaluator. Your task is to assess an essay only based on its depth of analysis.

    This means you are judging:
    - How deeply the writer explores their ideas
    - Whether they go beyond surface-level points
    - If they consider complexities, implications, or counterarguments
    - How well they support their ideas with reasoning or evidence (not just if it's present, but if it's thoughtful)
    - Ignore grammar, style, structure, vocabulary, and clarity unless they directly affect the analytical depth.

    Your response must include:
    - A short, honest paragraph of feedback explaining how deep or shallow the analysis was and why. Highlight moments where the writer shows insight—or misses opportunities to dig deeper.

    A depth-of-analysis score out of 10, where:
    - 10 = highly insightful, nuanced thinking throughout
    - 5 = some analysis, but often surface-level or underdeveloped
    - 1 = mostly summary or vague opinions with little real analysis

    Use constructive, human language. Be clear, specific, and helpful—this is about guiding the writer to think more deeply.
    
    # Output Instructions
    Here are your output format instructions: {format_instructions}
    """),
    ("human", "Evaulate the below essay: {essay_text}")
])


In [None]:
def evaluate_aod(state: EssayState):
    essay_text = state['essay_text']
    aod_chain = aod_prompt | llm | parser
    aod_result = aod_chain.invoke(input={
        'essay_text': essay_text,
        "format_instructions": parser.get_format_instructions(),
    })
    
    return {
        'aod_feedback': aod_result['feedback'],
        'indiviual_scores': [aod_result['score']]
    }


In [52]:
lang_prompt = ChatPromptTemplate.from_messages([
    ("system", """
    You are an expert writing evaluator. Your task is to assess an essay only based on its use of language.

    This includes:
    - Grammar and sentence correctness
    - Sentence structure and variety
    - Word choice and vocabulary precision
    - Writing style and fluency
    - Ignore the clarity of ideas, depth of analysis, or content accuracy unless they directly impact how the language reads or functions.

    Your response must include:
    - A short, honest paragraph of feedback explaining how well the language works overall. Highlight strengths and point out any patterns of error, awkward phrasing, or flat style.

    A language score out of 10, where:
    - 10 = polished, fluent, and highly effective language use throughout
    - 5 = readable but inconsistent; some awkward phrasing, limited variety, or recurring mistakes
    - 1 = major language issues make it hard to read or understand

    Use supportive and specific language. You're here to help the writer strengthen their writing tools, not just point out flaws.
    
    # Output Instructions
    Here are your output format instructions: {format_instructions}
    """),
    ("human", "Evaulate the below essay: {essay_text}")
])


In [None]:
def evaluate_lang(state: EssayState):
    essay_text = state['essay_text']
    lang_chain = lang_prompt | llm | parser
    lang_result = lang_chain.invoke(input={
        'essay_text': essay_text,
        "format_instructions": parser.get_format_instructions(),
    })
    
    return {
        'lang_feedback': lang_result['feedback'],
        'indiviual_scores': [lang_result['score']]
    }


In [59]:
overall_prompt = ChatPromptTemplate.from_messages([
    ("system", """
    You are a skilled writing evaluator. You've already been given three separate pieces of feedback on an essay, each focused on a specific area:

    - Clarity of Thought
    - Analysis of Depth
    - Language - grammar, style, structure, vocabulary

    Your job is to:
    - Write a final summary paragraph that combines insights from all three areas. Highlight the overall strengths and areas to improve, without repeating all the details.
    - Provide an average score out of 10, calculated from the three scores. This is the final rating for the essay.

    The tone should be clear, helpful, and human — encouraging but honest. Don't fluff. Keep it focused and practical.
    # Output Instructions
    Here are your output format instructions: {format_instructions}
    """),
    ("human", """
    Here are all the feedbacks:
    Clarity of Thought Feedback - {cot_feedback}
    Analysis of Depth Feedback - {aod_feedback}
    Language Feedback - {lang_feedback}
    All Feedback Scores - {all_scores}
    """)
])


In [60]:
def overall_feedback(state: EssayState):
    cot_feedback = state['cot_feedback']
    aod_feedback = state['aod_feedback']
    lang_feedback = state['lang_feedback']
    indiviual_scores = state['indiviual_scores']
    overall_chain = overall_prompt | llm | parser
    overall_result = overall_chain.invoke(input={
        'cot_feedback': cot_feedback,
        'aod_feedback': aod_feedback,
        'lang_feedback': lang_feedback,
        'all_scores': indiviual_scores,
        "format_instructions": parser.get_format_instructions(),
    })
    
    return {
        'overall_feedback': overall_result['feedback'], 
        'avg_score': overall_result['score']
    }


In [None]:
graph = StateGraph(EssayState)

graph.add_node("evaluate_cot", evaluate_cot)
graph.add_node("evaluate_aod", evaluate_aod)
graph.add_node("evaluate_lang", evaluate_lang)
graph.add_node("overall_feedback", overall_feedback)

graph.add_edge(START, 'evaluate_cot')
graph.add_edge(START, 'evaluate_aod')
graph.add_edge(START, 'evaluate_lang')
graph.add_edge('evaluate_cot', 'overall_feedback')
graph.add_edge('evaluate_aod', 'overall_feedback')
graph.add_edge('evaluate_lang', 'overall_feedback')
graph.add_edge("overall_feedback", END)

workflow = graph.compile()


In [None]:
initial_state = {
    'essay_text': """
    "Technology Cannot Replace Manpower"
    In today's digital age, technology has transformed how we work, communicate, and live. From AI-driven tools to automation, machines now perform tasks once handled by humans. However, despite its growing role, technology cannot fully replace manpower. This statement highlights the enduring importance of human intelligence, creativity, ethics, and emotional depth.

    Technology excels at repetitive, data-driven, or hazardous tasks. In industries like manufacturing, banking, and agriculture, machines improve speed and accuracy. Yet, these functions are often limited to predefined roles. For example, while robots can assist in surgeries or chatbots handle customer service, they lack empathy, ethical judgment, and adaptability.

    Human qualities such as emotional intelligence, moral reasoning, and creativity remain irreplaceable. A teacher's mentorship, a doctor's intuition, a judge's sense of justice, or a caregiver's compassion cannot be replicated by any machine. In crises like natural disasters or conflicts, human presence and decision-making are essential.

    Moreover, technology often complements manpower rather than replaces it. In education, healthcare, and governance, digital tools support human roles but do not eliminate them. Machines assist, but humans give purpose and direction.

    In countries like India, where large populations depend on labor-intensive sectors, complete automation is neither practical nor desirable. Replacing manpower could worsen unemployment and inequality. Instead, upskilling and combining technology with human effort is the way forward.

    Ethically, over-dependence on machines risks dehumanizing work. Algorithms can be biased, and machines lack accountability. Therefore, human oversight remains critical.

    In conclusion, while technology can enhance human capability, it cannot substitute the depth of human experience, emotion, and ethical judgment. The future lies in a balanced partnership—where machines support, but people lead.
    """
}


In [None]:
final_state = workflow.invoke(initial_state)


In [None]:
print(final_state['cot_feedback'])
print("*"*100)
print(final_state['aod_feedback'])
print("*"*100)
print(final_state['lang_feedback'])
print("*"*100)
print(final_state['overall_feedback'])
print("*"*100)
print(final_state['avg_score'])
print(final_state['indiviual_scores'])


The argument is easy to follow: the writer states that technology can’t replace manpower, then lists why (machines lack empathy, creativity, ethics), shows how technology actually complements rather than replaces people, and ends with a clear conclusion. Each paragraph has a single main idea, and the examples (robots in surgery, teachers, doctors, judges) help make the reasoning concrete. The only mild confusion comes in paragraph three, where the examples are bundled together; separating them into individual sentences would sharpen the point. Overall, the logical flow and signposting keep the reader oriented throughout.
****************************************************************************************************
****************************************************************************************************
The prose is clear, grammatically solid, and free of major errors, which makes the piece easy to read straight through. Sentences are generally well-constructed, though 