In [1]:
import os
import json
from openai import OpenAI
import instructor
from pydantic import BaseModel, Field
from typing import List
import numpy as np
from utils import get_structured_feedback, remove_duplicates, Evaluation, CriteraFeedback

In [2]:
openai_client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"),
)

instructor_client = instructor.from_openai(openai_client)

In [3]:
remove_duplicates(openai_client=openai_client, strings=['Clear and logical organization of the essay', 'Clear and logical organization'])

Strings: ['Clear and logical organization of the essay', 'Clear and logical organization']
Similarity matrix: [[1.         0.81707956]
 [0.81707956 1.        ]]
Threshold: 0.8170795552569061



['Clear and logical organization of the essay']

In [5]:
# read messages from user1-messages.json
with open("user1-messages.json", "r") as f:
	chat_history = json.load(f)

### Storing all evaluations from a conversation

In [6]:
feedbacks = get_structured_feedback(instructor_client, chat_history)

In [7]:
feedbacks

[Evaluation(introduction=CriteraFeedback(strengths=['Clear and compelling framing of the argument on environmental conservation'], weaknesses=[], suggestions=[]), structure=CriteraFeedback(strengths=['Thoughtful organization of the essay from the importance of conservation to specific examples'], weaknesses=['Consider developing the counterargument section further', 'Add transition sentences between paragraphs for better flow'], suggestions=['Develop counterargument with more details or statistics', 'Include transition sentences for seamless flow']), argumentation=CriteraFeedback(strengths=['Use of examples like protecting natural habitats and global warming to solidify the argument'], weaknesses=[], suggestions=[]), evidence=CriteraFeedback(strengths=['Use of examples to strengthen the argument'], weaknesses=[], suggestions=[]), conclusion=CriteraFeedback(strengths=['Effective restatement of thesis and summary of main points'], weaknesses=[], suggestions=[])),
 Evaluation(introduction

### Combining evaluations from a chat session to remove duplicates

In [8]:
def aggregate_feedback(feedbacks: List[Evaluation]):
    # Initialize lists for each feedback category
    aggregated = {
        "introduction": {"strengths": [], "weaknesses": [], "suggestions": []},
        "structure": {"strengths": [], "weaknesses": [], "suggestions": []},
        "argumentation": {"strengths": [], "weaknesses": [], "suggestions": []},
        "evidence": {"strengths": [], "weaknesses": [], "suggestions": []},
        "conclusion": {"strengths": [], "weaknesses": [], "suggestions": []}
    }
    
    # Aggregate all feedbacks
    for feedback in feedbacks:
        for key in aggregated:
            critera_feedback = getattr(feedback, key)
            aggregated[key]['strengths'].extend(critera_feedback.strengths)
            aggregated[key]['weaknesses'].extend(critera_feedback.weaknesses)
            aggregated[key]['suggestions'].extend(critera_feedback.suggestions)
    
    # Deduplicate lists
    for key in aggregated:
        for subkey in aggregated[key]:
            aggregated[key][subkey] = remove_duplicates(openai_client, aggregated[key][subkey])

    # Construct a new Evaluation object
    return Evaluation(
        introduction=CriteraFeedback(**aggregated['introduction']),
        structure=CriteraFeedback(**aggregated['structure']),
        argumentation=CriteraFeedback(**aggregated['argumentation']),
        evidence=CriteraFeedback(**aggregated['evidence']),
        conclusion=CriteraFeedback(**aggregated['conclusion'])
    )

In [9]:
aggregated_feedback = aggregate_feedback(feedbacks)

Strings: ['Clear and compelling framing of the argument on environmental conservation', 'Clear and compelling framing of the argument in the introduction and conclusion', 'Thoughtful organization of the essay structure', 'Effectively framing the argument in the introduction and conclusion']
Similarity matrix: [[1.         0.62856473 0.37782844 0.52671438]
 [0.62856473 1.         0.51110325 0.88345688]
 [0.37782844 0.51110325 1.         0.53058314]
 [0.52671438 0.88345688 0.53058314 1.        ]]
Threshold: 0.7319746548059236

Strings: ['Consider developing the counterargument section further']
Similarity matrix: [[1.]]
Threshold: nan



  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)


Strings: ['Thoughtful organization of the essay from the importance of conservation to specific examples', 'Clear and logical organization of paragraphs', 'Clear and logical organization of the essay']
Similarity matrix: [[1.         0.46101496 0.60352818]
 [0.46101496 1.         0.80592049]
 [0.60352818 0.80592049 1.        ]]
Threshold: 0.7650005338986422

Strings: ['Consider developing the counterargument section further', 'Add transition sentences between paragraphs for better flow', 'Consider enhancing the transition sentences between paragraphs', 'Consider adding transition sentences between paragraphs']
Similarity matrix: [[1.         0.30751865 0.44118206 0.40139504]
 [0.30751865 1.         0.82514842 0.86408588]
 [0.44118206 0.82514842 1.         0.89971539]
 [0.40139504 0.86408588 0.89971539 1.        ]]
Threshold: 0.8671867427526172

Strings: ['Develop counterargument with more details or statistics', 'Include transition sentences for seamless flow', 'Consider enhancing the 

### Semantic deduplication doesn't seem to work with a dynamic threshold

In [10]:
for criteria, feedback in aggregated_feedback:
	print(f"{criteria}:")
	print(f"Strengths: {feedback.strengths}")
	print(f"Weaknesses: {feedback.weaknesses}")
	print(f"Suggestions: {feedback.suggestions}")
	print("\n")

introduction:
Strengths: ['Clear and compelling framing of the argument on environmental conservation']
Weaknesses: []
Suggestions: ['Consider developing the counterargument section further']


structure:
Strengths: ['Thoughtful organization of the essay from the importance of conservation to specific examples']
Weaknesses: ['Consider developing the counterargument section further']
Suggestions: ['Develop counterargument with more details or statistics']


argumentation:
Strengths: ['Use of examples like protecting natural habitats and global warming to solidify the argument']
Weaknesses: ['Consider developing the counterargument section with more details or statistics to strengthen the position']
Suggestions: ['Consider developing the counterargument section with more details or statistics to strengthen the position']


evidence:
Strengths: ['Use of examples to strengthen the argument']
Weaknesses: []
Suggestions: []


conclusion:
Strengths: ['Effective restatement of thesis and summa