In [1]:
from tqdm import tqdm

tqdm.pandas()

# Loading Dataset

In [2]:
from datasets import load_dataset

dataset = load_dataset("go_emotions", "raw")
dataset.set_format(type="pandas")
df_train = dataset["train"][:]

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
emotions = list(df_train.columns)[9:]
columns = ["text"] + emotions
# select only texts and emotions
df_train = df_train[columns]

In [4]:
# create new column with list of emotions (in strings)
df_train["ds_emotions"] = df_train[emotions].progress_apply(
    lambda x: x.index[x == 1].tolist(), axis=1
)

  1%|          | 1663/211225 [00:00<00:12, 16625.73it/s]

100%|██████████| 211225/211225 [00:12<00:00, 16482.69it/s]


In [5]:
# check if neutral can come with other emotions
values = df_train["ds_emotions"].value_counts().keys()
for value in values:
    if "neutral" in value:
        print(value)

['neutral']


In [18]:
seed = 7102023
# get random 20 examples
samples = df_train.sample(20, random_state=seed)
# drop emotions columns
samples = samples.drop(emotions, axis=1)

Unnamed: 0,text,ds_emotions
160394,Oh wow I genuinely thought this was funny can’...,"[amusement, realization, surprise]"
33084,Are those potatoes on the bottom right...?,[confusion]
127168,No. People just realized how dogs make good co...,[realization]
90564,I love this meme,"[admiration, amusement, love]"
12743,Not really. My group of friends are awesome in...,[approval]
51254,"""You're a good man [NAME]."" Yo [NAME], are you...",[neutral]
164497,The chicken and bacon one sounds absolutely ba...,[gratitude]
127480,But thats for [NAME] and our secret getaway pl...,[amusement]
30742,You're going to seriously regret this in a few...,[remorse]
129702,Credit? For what? Using an old Soviet tactic t...,[curiosity]


In [7]:
gt_emotions = [
    ["neutral"],
    ["approval"],
    ["neutral"],
    ["disgust"],
    ["disapproval"],
    ["caring", "joy"],
    ["love", "approval"],
    ["sadness", "remorse"],
    ["disgust"],
    ["curiosity"],
    ["amusement", "anger"],
    ["neutral"],
    ["joy"],
    ["pride"],
    ["disapproval", "annoyance"],
    ["anger"],
    ["annoyance", "disapproval"],
    ["neutral"],
    ["disapproval"],
    ["neutral"],
]

samples["gt_emotions"] = gt_emotions

# LLM Labeling

In [139]:
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

SYSTEM_MESSAGE = "You're an AI expert trained to analyze and categorize emotions present in text. Your goal is to analyze each piece of text according to instructions"

PROMPT_TEMPLATE = f"Given this comment {{comment}} , make an analysis of the emotions explicitly and implicitly present in the comment according to this list of emotions ONLY {emotions} and after the analysis write the class/es that apply according to the given list (up to 3 classes) inside <answer> containing the classes"


final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", SYSTEM_MESSAGE),
        # few_shot_prompt,
        ("human", PROMPT_TEMPLATE),
    ]
)

In [None]:
groq_api = ""
openai_api = ""

In [150]:
# Define models
chat1 = ChatOpenAI(
    temperature=1,
    model_name="llama3-8b-8192",
    openai_api_base="https://api.groq.com/openai/v1",
    openai_api_key=groq_api,
)

chain1 = final_prompt | chat1 | StrOutputParser()

chat2 = ChatOpenAI(api_key=openai_api)

chain2 = final_prompt | chat2 | StrOutputParser()

In [146]:
comment = samples.iloc[0]["text"]
label = samples.iloc[0]["ds_emotions"]
print(f"comment: {comment}")
print(f"label: {label}")
print("*" * 20)
response1 = chain1.invoke({"comment": comment})
response2 = chain1.invoke({"comment": comment})
response3 = chain1.invoke({"comment": comment})
response4 = chain1.invoke({"comment": comment})
response5 = chain1.invoke({"comment": comment})

comment: Oh wow I genuinely thought this was funny can’t believe you copped it so hard
label: ['amusement', 'realization', 'surprise']
********************


In [149]:
print(response3)

After analyzing the comment, I found the following emotions:

* Amusement: The comment starts with "Oh wow I genuinely thought this was funny", indicating that the speaker finds something amusing.
* Approval: The phrase "can't believe you copped it so hard" suggests that the speaker is impressed or approving of someone's actions.
* Amusement: The phrase "copped it so hard" is likely being used in a humorous way, implying that the speaker finds the situation amusing.

According to the list of emotions, the classes that apply are:

<answer>
['amusement', 'approval']
</answer>

Note that there is no explicit anger, annoyance, or disapproval in the comment, despite the phrase "copped it so hard" potentially being used in a humorous way. The tone of the comment is overall lighthearted and playful.


In [None]:
# # Few Shot
# from langchain_core.prompts import FewShotChatMessagePromptTemplate

# examples = [
#     {"input": "2+2", "output": "4"},
#     {"input": "2+3", "output": "5"},
# ]

# example_prompt = ChatPromptTemplate.from_messages(
#     [
#         ("human", "{input}"),
#         ("ai", "{output}"),
#     ]
# )
# few_shot_prompt = FewShotChatMessagePromptTemplate(
#     example_prompt=example_prompt,
#     examples=examples,
# )

In [28]:
final_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a wondrous wizard of math."),
        # few_shot_prompt,
        ("human", "{input}"),
    ]
)