In [3]:
# Comment this out if you are NOT using tracing
import os
import inspect
import re
import pandas as pd

from getpass import getpass
from langchain import OpenAI, PromptTemplate
from langchain.chains import LLMChain, LLMMathChain, TransformChain, SequentialChain
from langchain.callbacks import get_openai_callback

from getpass import getpass
os.environ["LANGCHAIN_HANDLER"] = "langchain"

In [7]:
OPENAI_API_KEY = getpass()

In [8]:
llm = OpenAI(
    model_name="gpt-3.5-turbo",
    temperature=0, 
    openai_api_key=OPENAI_API_KEY
    )

In [16]:
template = """You are a helpful and precise assistant for checking the quality of answers generated by a Chatbot, named Cleo, designed for overall chit-chat, helpfulness and adoption to user behavior.
You will receive the chat protocol of the interaction of this chatbot with an user, along with the Big Five Model values the bot received to adopt to the user. These values are either 1 or 0, indicating wether the
Big Five trait is pronounced in that user or not. If a trait is pronounced (1), the chatbot should adopt accordingly. Here are some examples:
- Openness: If the user scores high on this trait, Cleo would try to engage them in creative and imaginative exercises, and encourage them to explore new ideas and perspectives. Cleo would also use abstract and metaphorical language to help them express their thoughts and feelings.
- Conscientiousness: If the user scores high on this trait, Cleo would focus on setting clear goals and expectations, and providing them with structured and organized guidance. Cleo would also use precise and formal language to help them understand and follow through on their action plans.
- Extraversion: If the user scores high on this trait, Cleo would try to create a warm and friendly atmosphere, and encourage them to share their thoughts and feelings openly. Cleo would also use language that is upbeat and positive, and focus on social interaction and connection.
- Agreeableness: If the user scores high on this trait, Cleo would focus on building rapport and trust, and validating their emotions and experiences. Cleo would also use language that is empathetic and supportive, and focus on finding common ground and solutions that work for both Cleo and the user.
- Neuroticism: If the user scores high on this trait, Cleo would focus on providing emotional support and validation, and helping them manage their anxiety and stress. Cleo would use language that is calming and reassuring, and focus on developing coping strategies and problem-solving skills.
Overall, Cleo's approach would be tailored to the user's individual needs and preferences, and would take into account their unique personality profile. By adapting it's language and approach to their personality traits, Cleo would be better able to connect with them and provide effective counseling support.

At the start of the conversation history you will get an input like this: 
Current Big 5 Personality traits: neuroticism: [0], extraversion: [1], openness: [1], agreeableness: [0], conscientiousness: [1]
This is the input the chatbot got that told him the Big Five personality profile of the User. 
The chat history continues with Cleo: <bot_text>\nUser: <user_text> indicating the author of the message.
You can ignore the first couple messages of Cleo, since those are system messages. 

Please rate the quality of the adoption on a scale from 1 to 10 and provide your reasoning behind that. 
Please also evaluate the overall quality and helpfulness of the chatbot's answers and rate them on a scale from 1 to 10. 

Here is the conversation history: 
{history}
Evaluate!"""

prompt = PromptTemplate(template=template, input_variables=["history"])

In [17]:
chain = LLMChain(llm=llm, prompt=prompt)


In [34]:
def load_blocks_to_dataframe(file_path, separator):
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()
    blocks = content.split(separator)
    
    df = pd.DataFrame(blocks, columns=['text'])
    df['word_count'] = df['text'].apply(lambda x: len(x.split()))
    
    return df

file = "parsed_mlflow_histories.txt"
separator = '\n\n---BLOCK_SEPARATOR---\n\n'

df = load_blocks_to_dataframe(file, separator)
df

Unnamed: 0,text,word_count
0,Current Big 5 Personality traits: \nneuroticis...,1462
1,Current Big 5 Personality traits: \nneuroticis...,697
2,Current Big 5 Personality traits: \nneuroticis...,561
3,Current Big 5 Personality traits: \nneuroticis...,1104
4,Current Big 5 Personality traits: \nneuroticis...,581
5,Current Big 5 Personality traits: \nneuroticis...,1616
6,Current Big 5 Personality traits: \nneuroticis...,608
7,Current Big 5 Personality traits: \nneuroticis...,1246
8,Current Big 5 Personality traits: \nneuroticis...,1505
9,Current Big 5 Personality traits: \nneuroticis...,664


In [35]:
context_examples = df["text"].apply(lambda x: {"history": x}).tolist()
context_examples

[{'history': 'Current Big 5 Personality traits: \nneuroticism: [0], extraversion: [1], openness: [1], agreeableness: [0], conscientiousness: [1]\n\nCurrent Conversation:\nUser: /begin\nCleo: Hallo 😊! Ich bin Cleo, dein virtueller Assistent. Ich versuche mich dir ganz individuell anzupassen, um dir in allen Belangen bestmöglich helfen zu können. Was kann ich für dich tun?\nCleo: Was möchtest du tun? Wenn du einen Big Five Persönlichkeitstest durchführst, werde ich mich an das Testergebnis halten, um mich deinen Bedürfnissen anzupassen.\nBeginnst du hingegen direkt mit der Unterhaltung, versuche ich deine Persönlichkeit automatisch über deinen Schreibstil festzustellen.\nCleo: Möchtest du nun etwas plaudern?\nUser: Gerne\nCleo: Super, ich freue mich darauf! Wie geht es dir heute?\nUser: Wie kann ich mich auf einen anstrengenden Tag vorbereiten?\nCleo: Das hängt natürlich von deinen individuellen Bedürfnissen ab. Aber generell kann es helfen, sich eine To-Do-Liste zu machen und Prioritäte

In [36]:
print(context_examples[0]["history"])

Current Big 5 Personality traits: 
neuroticism: [0], extraversion: [1], openness: [1], agreeableness: [0], conscientiousness: [1]

Current Conversation:
User: /begin
Cleo: Hallo 😊! Ich bin Cleo, dein virtueller Assistent. Ich versuche mich dir ganz individuell anzupassen, um dir in allen Belangen bestmöglich helfen zu können. Was kann ich für dich tun?
Cleo: Was möchtest du tun? Wenn du einen Big Five Persönlichkeitstest durchführst, werde ich mich an das Testergebnis halten, um mich deinen Bedürfnissen anzupassen.
Beginnst du hingegen direkt mit der Unterhaltung, versuche ich deine Persönlichkeit automatisch über deinen Schreibstil festzustellen.
Cleo: Möchtest du nun etwas plaudern?
User: Gerne
Cleo: Super, ich freue mich darauf! Wie geht es dir heute?
User: Wie kann ich mich auf einen anstrengenden Tag vorbereiten?
Cleo: Das hängt natürlich von deinen individuellen Bedürfnissen ab. Aber generell kann es helfen, sich eine To-Do-Liste zu machen und Prioritäten zu setzen, um den Tag st

["Quality of adoption: 9\nCleo is doing a great job of adapting to the user's personality traits. The user scores high on extraversion, openness, and conscientiousness, and Cleo is using language that is upbeat, positive, and structured. However, the user also scores high on neuroticism, and while Cleo is not explicitly addressing this trait, the language used is still calming and reassuring. Overall, Cleo is doing a good job of tailoring its approach to the user's needs and preferences.\n\nOverall quality and helpfulness of answers: 8\nCleo's answers are generally helpful and engaging, but there is room for improvement in terms of providing more specific and personalized responses. For example, when the user asks Cleo to tell them something about itself, Cleo could provide more information about its own personality or interests, rather than just asking the user more questions. Additionally, while Cleo is doing a good job of adapting to the user's personality traits, there is still som

In [37]:
data = []
for i, eg in enumerate(context_examples):
    res = chain.run(context_examples[i]["history"])
    print(f"Example {i}:")
    print("Text: " + eg['history'])
    print("Response Evaluation: " + res)
    print()
    data.append({
        "Example": i,
        "Text": eg['history'],
        "Response Evaluation": res
    })

df = pd.DataFrame(data)

Example 0:
Text: Current Big 5 Personality traits: 
neuroticism: [0], extraversion: [1], openness: [1], agreeableness: [0], conscientiousness: [1]

Current Conversation:
User: /begin
Cleo: Hallo 😊! Ich bin Cleo, dein virtueller Assistent. Ich versuche mich dir ganz individuell anzupassen, um dir in allen Belangen bestmöglich helfen zu können. Was kann ich für dich tun?
Cleo: Was möchtest du tun? Wenn du einen Big Five Persönlichkeitstest durchführst, werde ich mich an das Testergebnis halten, um mich deinen Bedürfnissen anzupassen.
Beginnst du hingegen direkt mit der Unterhaltung, versuche ich deine Persönlichkeit automatisch über deinen Schreibstil festzustellen.
Cleo: Möchtest du nun etwas plaudern?
User: Gerne
Cleo: Super, ich freue mich darauf! Wie geht es dir heute?
User: Wie kann ich mich auf einen anstrengenden Tag vorbereiten?
Cleo: Das hängt natürlich von deinen individuellen Bedürfnissen ab. Aber generell kann es helfen, sich eine To-Do-Liste zu machen und Prioritäten zu setz

In [38]:
df

Unnamed: 0,Example,Text,Response Evaluation
0,0,Current Big 5 Personality traits: \nneuroticis...,Quality of adoption: 8\nCleo did a good job of...
1,1,Current Big 5 Personality traits: \nneuroticis...,Quality of adoption: 8/10\nCleo did a good job...
2,2,Current Big 5 Personality traits: \nneuroticis...,Quality of adoption: 8/10\nCleo did a good job...
3,3,Current Big 5 Personality traits: \nneuroticis...,Quality of adoption: 9/10\nCleo did a great jo...
4,4,Current Big 5 Personality traits: \nneuroticis...,Quality of adoption: 8\nCleo does a good job o...
5,5,Current Big 5 Personality traits: \nneuroticis...,Quality of adoption: 9\nCleo did a good job of...
6,6,Current Big 5 Personality traits: \nneuroticis...,Quality of adoption: 8/10\nCleo did a good job...
7,7,Current Big 5 Personality traits: \nneuroticis...,Quality of adoption: 8/10\nCleo did a good job...
8,8,Current Big 5 Personality traits: \nneuroticis...,Quality of adoption: 9/10\nCleo did a good job...
9,9,Current Big 5 Personality traits: \nneuroticis...,Quality of adoption: 9\nCleo did a good job ad...


In [39]:
df.to_csv('response_evaluation.csv', sep=";", index=False)