In [1]:
from typing import Dict, List
from brewval.model import Prompt, Label
from brewval.eval import Evaluator

from langchain.llms import OpenAI, BaseLLM

In [2]:
prompt = Prompt(
"""There are 6 basic emotions: hapiness, sadness, fear, disgust, anger, surprise.
Emotion can be detected from it's description.
Description: Feelings of disappointment, grief, hopelessness, disinterest, and dampened mood.
Emotion: sadness
Description: muscles become tense, your heart rate and respiration increase, and your mind becomes more alert, priming your body to either run from the danger or stand and fight
Emotion: fear
Description: {description}
Emotion: {result}""")

labels = [
    Label('fear', {'description': 'heart rate and respiration increase'}),
    Label('surprise', {'description': 'quite brief and is characterized by a physiological startle response following something unexpected'}),
    Label('anger', {'description': 'Characterized by feelings of hostility, agitation, frustration, and antagonism towards others.'})
]

In [3]:
models: Dict[str, BaseLLM] = {
    'OpenAI[davinci-003]': OpenAI(model_name='text-davinci-003'),
    'OpenAI[davinci-002]': OpenAI(model_name='text-davinci-002'),
    'OpenAI[ada-001]': OpenAI(model_name='text-ada-001')
}

In [4]:
evaluator = Evaluator(models)

results = evaluator.evaluate(prompt, labels)
for result in results:
    print(f'Model {result.model_name} accuracy: {result.accuracy * 100}%')

Model OpenAI[davinci-003] accuracy: 100.0%
Model OpenAI[davinci-002] accuracy: 33.33333333333333%
Model OpenAI[ada-001] accuracy: 0.0%
