## Setup
Initialize firebase and openai dependencies.

In [None]:
!python3 -m pip install firebase_admin
import firebase_admin
from firebase_admin import credentials, firestore
import pandas as pd

# Initialize Firebase app
cred = credentials.Certificate("tmp-feedback-test-firebase-adminsdk-flei0-26191ff34a.json")
firebase_admin.initialize_app(cred)
db = firestore.client()

In [None]:
!python3 -m pip install openai
from openai import OpenAI
import json

with open('openai-stanford-api-key.json', 'r') as file:
    data = json.load(file)

client = OpenAI(
   api_key=data['key'],
)

## For now, just generate for 1 test essay.

In [8]:
system_prompt = "You are an {grade}th grade English Language Arts teacher."
task_prompt = """You will be provided information about a writing assignment and a piece of student writing, denoted by triple quotes. 
    Your task is to provide both inline feedback and a overall summary for your student's work.
    
    Your feedback should help your student see about how their work aligns with assignment instructions and evaluation standards, 
    help your student to reflect on and improve their writing process, and suggest specific actions they can take to revise and improve their work. 

    Your feedback should foreground the student's voice and meaning-making process and open up interpretive possibilities.
    You should use conversational student-friendly language appropriate for {grade}th grade. 

    Format your response as JSON output with two fields, inlineFeedback and overallFeedback. 
    The field inlineFeedback should be a list of 3-7 pairs, each containing an exact 'excerpt' from the student's essay and a corresponding feedback 'comment'. 
    Choose short excerpts that are most directly relevant to the feedback comment. 
    The field overallFeedback should be a single text field. 

    Assignment Instructions: \"\"\"{instructions}\"\"\" \n
    Student Writing:\"\"\"{essay}\"\"\" \n
    """

In [None]:
import re

def escape_reg_exp(text):
    # Escapes special characters for regex
    return re.escape(text)

def create_flexible_pattern(excerpt):
    # Normalize spaces and create a regex pattern
    words = re.split(r'\s+', excerpt.strip())
    pattern = r'\s+'.join(map(escape_reg_exp, words))
    return re.compile(pattern, re.IGNORECASE)

def find_excerpt_indices(original_text, excerpt):
    flexible_pattern = create_flexible_pattern(excerpt)
    match = flexible_pattern.search(original_text)

    if match:
        start_index = match.start()
        end_index = match.end()
        return {'start_index': start_index, 'end_index': end_index}

    return None  # No match found

In [None]:
import json
testessayid = 18271 # pick this known essay
essay = db.document('annotation/' + str(testessayid)).get()
essay_info = essay.to_dict()

system = system_prompt.format(grade=essay_info['grade'])
prompt = task_prompt.format(grade=essay_info['grade'], instructions=essay_info['assignmentInstructions'], essay=essay_info['essay'])

response = client.chat.completions.create(
  model="gpt-3.5-turbo",
  response_format={ "type": "json_object" },
  messages=[
    {"role": "system", "content": system},
    {"role": "user", "content": prompt}
  ]
)

res = response.choices[0].message.content
#print(res)
data = json.loads(res)
#print(data['inlineFeedback'][0]['excerpt'])

In [22]:
# load inline comments to firebase
for index, value in enumerate(data['inlineFeedback']):
    commentid = "gencomment" + str(index)
    matches = find_excerpt_indices(essay_info['essay'], value['excerpt'])
    if matches != None:
        gen_inline_data = {
            'excerpt': value['excerpt'],
            'gen_comment': value['comment'],
            'status': 'pending',
            'startIndex': matches['start_index'],
            'endIndex': matches['end_index'],
        }
        db.document('annotation/' + str(testessayid) + '/genInlineFeedback/' + commentid).set(gen_inline_data)

# load overall feedback to firebase
gen_overall_data = {
    'gen_text': data['overallFeedback'],
}
db.document('annotation/' + str(testessayid) + '/genOverallFeedback/data').set(gen_overall_data)

update_time {
  seconds: 1708894108
  nanos: 910956000
}