Skip to content

Commit

Permalink
feat: Add user frustration eval (#2928)
Browse files Browse the repository at this point in the history
* added user frustration eval

* Clean up notebook

---------

Co-authored-by: jlopatec <jlopatec@gmail.com>
  • Loading branch information
anticorrelator and jlopatec committed Apr 19, 2024
1 parent b787f5b commit 406938b
Show file tree
Hide file tree
Showing 3 changed files with 1,579 additions and 0 deletions.
4 changes: 4 additions & 0 deletions packages/phoenix-evals/src/phoenix/evals/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
SQL_GEN_EVAL_PROMPT_TEMPLATE,
TOXICITY_PROMPT_RAILS_MAP,
TOXICITY_PROMPT_TEMPLATE,
USER_FRUSTRATION_PROMPT_RAILS_MAP,
USER_FRUSTRATION_PROMPT_TEMPLATE,
)
from .evaluators import (
HallucinationEvaluator,
Expand Down Expand Up @@ -76,6 +78,8 @@
"SQL_GEN_EVAL_PROMPT_TEMPLATE",
"CODE_FUNCTIONALITY_PROMPT_RAILS_MAP",
"CODE_FUNCTIONALITY_PROMPT_TEMPLATE",
"USER_FRUSTRATION_PROMPT_RAILS_MAP",
"USER_FRUSTRATION_PROMPT_TEMPLATE",
"NOT_PARSABLE",
"run_evals",
"LLMEvaluator",
Expand Down
61 changes: 61 additions & 0 deletions packages/phoenix-evals/src/phoenix/evals/default_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,59 @@

CODE_FUNCTIONALITY_PROMPT_RAILS_MAP = OrderedDict({True: "bug_free", False: "is_bug"})

USER_FRUSTRATION_PROMPT_BASE_TEMPLATE = """
You are given a conversation where between a user and an assistant.
Here is the conversation:
[BEGIN DATA]
*****************
Conversation:
{conversation}
*****************
[END DATA]
Examine the conversation and determine whether or not the user got frustrated from the experience.
Frustration can range from midly frustrated to extremely frustrated. If the user seemed frustrated
at the beginning of the conversation but seemed satisfied at the end, they should not be deemed
as frustrated. Focus on how the user left the conversation.
Your response must be a single word, either "frustrated" or "ok", and should not
contain any text or characters aside from that word. "frustrated" means the user was left
frustrated as a result of the conversation. "ok" means that the user did not get frustrated
from the conversation.
"""

USER_FRUSTRATION_PROMPT_TEMPLATE_WITH_EXPLANATION = """
You are given a conversation where between a user and an assistant.
Here is the conversation:
[BEGIN DATA]
*****************
Conversation:
{conversation}
*****************
[END DATA]
Examine the conversation and determine whether or not the user got frustrated from the experience.
Frustration can range from midly frustrated to extremely frustrated. If the user seemed frustrated
at the beginning of the conversation but seemed satisfied at the end, they should not be deemed
as frustrated. Focus on how the user left the conversation.
You are going to respond with an EXPLANATION and LABEL.
Please read the text carefully, then write out in a step by step manner an
EXPLANATION as to why you think the user is frusterated.
Your LABEL response must be a single word, either "frustrated" or "ok", and should not
contain any text or characters aside from that word. "frustrated" means the user was left
frustrated as a result of the conversation. "ok" means that the user did not get frustrated
from the conversation.
Example response:
************
EXPLANATION: An explanation of your reasoning for why the user is frusterated
LABEL: "frustrated" or "ok"
************
"""

USER_FRUSTRATION_PROMPT_RAILS_MAP = OrderedDict({True: "frustrated", False: "ok"})

RAG_RELEVANCY_PROMPT_TEMPLATE = ClassificationTemplate(
rails=list(RAG_RELEVANCY_PROMPT_RAILS_MAP.values()),
template=RAG_RELEVANCY_PROMPT_BASE_TEMPLATE,
Expand Down Expand Up @@ -640,6 +693,13 @@
scores=[1, 0],
)

USER_FRUSTRATION_PROMPT_TEMPLATE = ClassificationTemplate(
rails=list(USER_FRUSTRATION_PROMPT_RAILS_MAP.values()),
template=USER_FRUSTRATION_PROMPT_BASE_TEMPLATE,
explanation_template=USER_FRUSTRATION_PROMPT_TEMPLATE_WITH_EXPLANATION,
scores=[1, 0],
)


class EvalCriteria(Enum):
RELEVANCE = RAG_RELEVANCY_PROMPT_TEMPLATE
Expand All @@ -652,3 +712,4 @@ class EvalCriteria(Enum):
HUMAN_VS_AI = HUMAN_VS_AI_PROMPT_TEMPLATE
SQL_GEN_EVAL = SQL_GEN_EVAL_PROMPT_TEMPLATE
CODE_FUNCTIONALITY = CODE_FUNCTIONALITY_PROMPT_TEMPLATE
USER_FRUSTRATION = USER_FRUSTRATION_PROMPT_TEMPLATE
1,514 changes: 1,514 additions & 0 deletions tutorials/evals/evaluate_user_frustration_classifications.ipynb

Large diffs are not rendered by default.

0 comments on commit 406938b

Please sign in to comment.