In [2]:
import dspy
import os
from dotenv import load_dotenv
load_dotenv()

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
GOOGLE_API_KEY = os.getenv('GOOGLE_API_KEY')
TEMPERATURE = 0.7

lm_gpt = dspy.LM('openai/gpt-4o', api_key=OPENAI_API_KEY, temperature=TEMPERATURE)
lm_gemini = dspy.LM('gemini/gemini-1.5-pro', api_key=GOOGLE_API_KEY, temperature=TEMPERATURE)
# dspy.configure(lm=lm_gpt)


* 'fields' has been removed
  from .autonotebook import tqdm as notebook_tqdm


# Generate answer

In [3]:
dspy.configure(lm=lm_gemini)
def gen_answer(question, ms = None, target_score = None):
    prompt = (
        f"Please answer the following A level Economic essay question in about 300 words.\n"
        f"Question: {question}\n"
    )
    if ms:
        prompt += f"Please answer according to the mark scheme:\n {ms}\n"
    
    if target_score:
        prompt += f"Please answer aim for a score of {target_score}.\n"

    return lm_gemini(prompt)

gen_answer("Assess whether all market economies should become mixed economies.")

['A pure market economy, devoid of government intervention, exists only in theory.  In practice, all functioning economies are to some degree “mixed,” incorporating elements of both market-based allocation and government intervention.  The question, therefore, is not whether economies *should* become mixed, but rather *to what extent* they should embrace government involvement.  A complete shift towards a centrally planned economy is rarely advocated today, given historical failures of such systems.  The debate centers on the optimal balance.\n\nArguments for greater government involvement in market economies stem from market failures.  Information asymmetry, where one party in a transaction has more information than the other, can lead to exploitation.  Externalities, like pollution, represent costs or benefits not reflected in market prices, requiring government intervention through regulation or taxation.  Public goods, such as national defense, are non-excludable and non-rivalrous,

In [56]:
# read csv file
import pandas as pd
df = pd.read_csv('/Users/chen/Desktop/Econ/papers/all_papers.csv')
df["answer"] = df.apply(lambda row: gen_answer(row["question"]), axis=1)
df["answer_ms"] = df.apply(lambda row: gen_answer(row["question"], row["mark_scheme"]), axis=1)

# save to csv
df.to_csv('all_papers_with_answers.csv', index=False)



In [57]:
# read csv file
import pandas as pd
df = pd.read_csv('all_papers_with_answers.csv')
df["answer_ms50"] = df.apply(lambda row: gen_answer(row["question"], row["mark_scheme"],row["total_marks"] * 0.5), axis=1)
df["answer_ms75"] = df.apply(lambda row: gen_answer(row["question"], row["mark_scheme"],row["total_marks"] * 0.75), axis=1)

# save to csv
df.to_csv('all_papers_with_answers.csv', index=False)

# Evaluate

In [68]:
dspy.configure(lm=lm_gpt)
def evaluate_answer(question, answer, ms):

    prompt = f"""
You are a strict AP Economics grader. Your task is to evaluate a student's answer based on the provided rubric. Please adhere strictly to the rubric without any deviation, and provide detailed reasoning for the score as well as suggestions for improvement.

Question: 
{question}

Student Answer: 
{answer}

Rubric: 
{ms}


Scoring Requirements:
1. Score:
Assign a clear score based on the rubric.
The score must strictly follow the rubric; do not adjust arbitrarily.
Do not deduct points for missing graphs or diagrams, since the student is unable to include them in this response.


2. Rationale:
Explain the specific reasons for the assigned score, including how the student's answer meets or does not meet the rubric criteria.
If the answer partially meets the criteria, specify lost points, referencing the rubric.
Clearly list the specific deduction points, stating exactly why the answer lost points.
If the rubric requires a graph for full credit, do not deduct points for its absence, but clearly describe what a complete and correct graph should include to earn full points.


3. Suggestions for Improvement:
Provide specific suggestions to help the student improve their answer.
Indicate what content needs to be added, modified, or refined to meet the rubric standards.

Please grade strictly according to the rubric and provide clear rationale and improvement suggestions. 
And please provide the whole evaluation both in English and Chinese.
"""
    print(prompt)

    return lm_gpt(prompt)

In [67]:
# read csv file
import pandas as pd
df = pd.read_csv('all_papers_with_answers.csv')
df["evaluation"] = df.apply(lambda row: evaluate_answer(row["question"], row["answer"], row["mark_scheme"]), axis=1)
df["evaluation_ms"] = df.apply(lambda row: evaluate_answer(row["question"], row["answer_ms"], row["mark_scheme"]), axis=1)
df["evaluation_ms50"] = df.apply(lambda row: evaluate_answer(row["question"], row["answer_ms50"], row["mark_scheme"]), axis=1)
df["evaluation_ms75"] = df.apply(lambda row: evaluate_answer(row["question"], row["answer_ms75"], row["mark_scheme"]), axis=1)

# save to csv
df.to_csv('all_papers_with_answers&evaluation.csv', index=False)


In [69]:
# read csv file
import pandas as pd
df = pd.read_csv('all_papers_with_answers.csv')
# test a single row
row = df.iloc[0]
evaluate_answer(row["question"], row["answer"], row["mark_scheme"])



You are a strict AP Economics grader. Your task is to evaluate a student's answer based on the provided rubric. Please adhere strictly to the rubric without any deviation, and provide detailed reasoning for the score as well as suggestions for improvement.

Question: 
With the help of a diagram, assess the extent to which a government can intervene to correct market failure caused by negative externalities in air travel.

Student Answer: 
['Air travel generates significant negative externalities, primarily through noise and air pollution, contributing to climate change and respiratory problems. These costs are not reflected in the market price of air travel, leading to overconsumption and market failure. Governments can intervene to correct this, although the effectiveness of these interventions varies.\n\nOne common intervention is taxation. A tax on air travel, such as an excise duty on fuel or a per-passenger levy, internalizes the externality by increasing the price closer to the 

["**English Version:**\n\n**Score:**\n\nAO1 Knowledge and Understanding and AO2 Analysis: 12/14  \nAO3 Evaluation: 5/6\n\n**Rationale:**\n\nAO1 and AO2:\n\n- The student's response adequately identifies market failure due to negative externalities in air travel, specifically citing noise and air pollution. This corresponds well with the rubric's expectation of addressing allocative inefficiency due to externalities. \n- The answer provides a clear explanation of how taxation can internalize externalities and uses a theoretical diagram to illustrate the shift from private marginal cost (PMC) to social marginal cost (SMC). This aligns with the rubric's requirement for a diagram showing market equilibrium and allocatively efficient levels.\n- The student also discusses other forms of government intervention, such as regulation and emissions trading schemes, demonstrating a strong understanding of various approaches.\n- However, while the answer is comprehensive, it misses an explicit defi