##### Creating an automatc grader: With langchain

In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.schema import BaseOutputParser

In [None]:
# import sk - need a .py file named sk.py that contains the OpenAI access token assigned to the variable my_sk
from sk import my_sk

In [None]:
# define the chatbot
chat_model = ChatOpenAI(openai_api_key=my_sk, temperature=0)

In [None]:
# define the prompt template

prompt_template_text = """You are a high school history teacher grading homework assignments. \
Based on the homework question indicated by “**Q:**” and the correct answer indicated by “**A:**”, your task is to determine whether the student's answer is correct. \
Grading is binary; therefore, student answers can be correct or wrong. \
Simple misspellings are okay.

**Q:** {question}
**A:** {correct_answer}

**Student's Answer:** {student_answer}
"""

prompt = PromptTemplate(input_variables=["question", "correct_answer", "student_answer"], template = prompt_template_text)

In [None]:
# define chain
chain = LLMChain(
    llm=chat_model,
    prompt=prompt,
)

In [None]:
# use the defined chain

# define inputs
question = "Who was the 35th president of the United States of America?"
correct_answer = "John F. Kennedy"
student_answer =  "JFK"

# run chain
chain.run({'question':question, 'correct_answer':correct_answer, 'student_answer':student_answer})

In [None]:
# run chain in for loop if there are multiple student answers
student_answer_list = ["John F. Kennedy", "JFK", "FDR", "John F. Kenedy", "John Kennedy", "Jack Kennedy", "Jacqueline Kennedy", "Robert F. Kenedy"]

for student_answer in student_answer_list:
    print(student_answer + " - " + str(chain.run({'question':question, 'correct_answer':correct_answer, 'student_answer':student_answer})))
    print('\n')

# output

# John F. Kennedy - Correct


# JFK - Correct


# FDR - Student's Answer is wrong.


# John F. Kenedy - Correct


# John Kennedy - Correct


# Jack Kennedy - Grade: Correct

# The student's answer, "Jack Kennedy," is a simple misspelling of the correct answer, "John F. Kennedy." However, the student's answer still accurately identifies the 35th president of the United States of America.


# Jacqueline Kennedy - Student's Answer: Jacqueline Kennedy

# Grade: Wrong

# Explanation: The student's answer is incorrect. The 35th president of the United States of America was John F. Kennedy, not Jacqueline Kennedy.


# Robert F. Kenedy - Student's Answer is wrong.

In [None]:
# Now, suppose we need the output in a different form, maybe to add a point if the answers is right, we need something a little more elaborate than this. Let's define a toy output parser as follows.

In [None]:
# define output parser
class GradeOutputParser(BaseOutputParser):
    """Determine whether grade was correct or wrong"""

    def parse(self, text: str):
        """Parse the output of an LLM call."""
        return "wrong" not in text.lower()

In [None]:
# update chain
chain = LLMChain(
    llm=chat_model,
    prompt=prompt,
    output_parser=GradeOutputParser()
)

In [None]:
# grade student answers
for student_answer in student_answer_list:
    print(student_answer + " - " + str(chain.run({'question':question, 'correct_answer':correct_answer, 'student_answer':student_answer})))

# Output:

# John F. Kennedy - True
# JFK - True
# FDR - False
# John F. Kenedy - True
# John Kennedy - True
# Jack Kennedy - True
# Jacqueline Kennedy - False
# Robert F. Kenedy - False