In [1]:
import requests
import json
import pandas as pd
import numpy as np
import re
from sklearn.metrics import f1_score, classification_report, confusion_matrix

In [2]:
# ==== CONFIGURATION ====
API_BASE = "http://194.171.191.227:30080"  # Server URL
TOKEN = "sk-ba75d292d92e457fa1615f217b11fe09"  # Replace with your API token
MODEL_NAME = "llama3.2:3b"  # Using a lightweight model for efficiency
DATA_PATH = "nlp_features_extra.xlsx"  # Replace with your dataset path
RESULTS_LOG = "prompt_engineering_log.csv"  # File to store prompt test results
NUM_SAMPLES = 20  # Number of samples to test prompts on
TARGET_EMOTIONS = ["happiness", "sadness", "anger", "surprise", "fear", "disgust", "neutral"]

In [3]:
# ==== LOAD DATA ====
# df = pd.read_excel(DATA_PATH, engine="openpyxl")

# # Select a subset of data for evaluation
# df_sample = df.sample(NUM_SAMPLES, random_state=42)

# # Ensure dataset contains required columns
# if "ru_text" not in df_sample.columns or "general_emotion" not in df_sample.columns:
#     raise ValueError("Dataset must contain 'ru_text' and 'general_emotion' columns.")

# # Convert emotion labels to lowercase for consistency
# df_sample["general_emotion"] = df_sample["general_emotion"].str.lower()

In [4]:
df = pd.read_excel(DATA_PATH, engine="openpyxl")

# Suppose we want 10 examples of each emotion (fear, neutral, etc.)
# We will create a dictionary that specifies how many samples we want per emotion.
desired_counts = {
    "fear": 10,
    "neutral": 10,
    "happiness": 10,
    "sadness": 10,
    "anger": 10,
    "disgust": 10,
    "surprise": 10
}

# Optional: we can store the subsets in a list and then concatenate them
dfs_list = []

for emotion, n_samples in desired_counts.items():
    # Filter the dataframe for the given emotion
    subset = df[df["general_emotion"] == emotion]
    
    # If the subset has fewer rows than n_samples, sample(frac=1) 
    # will just give all rows for that emotion (or you can handle it differently)
    if len(subset) >= n_samples:
        chosen = subset.sample(n=n_samples, random_state=42)
    else:
        chosen = subset.sample(frac=1, random_state=42)  # or handle the case as you wish
    dfs_list.append(chosen)

# Concatenate all sampled subsets
df_sample = pd.concat(dfs_list, ignore_index=True)

# Now df_sample is your balanced subset with the desired distribution.
# Double-check the distribution:
print(df_sample["general_emotion"].value_counts())

general_emotion
fear         10
neutral      10
happiness    10
sadness      10
anger        10
disgust      10
surprise     10
Name: count, dtype: int64


In [5]:
# ==== FUNCTION TO QUERY LLM ====
# def query_llm(prompt):
#     """ Sends a request to the locally deployed LLM and returns the model's response. """
#     url = f"{API_BASE}/api/chat/completions"
#     headers = {
#         "Authorization": f"Bearer {TOKEN}",
#         "Content-Type": "application/json"
#     }
#     data = {
#         "model": MODEL_NAME,
#         "messages": [{"role": "user", "content": prompt}],
#         "temperature": 0.0
#     }
    
#     response = requests.post(url, headers=headers, json=data)
    
#     if response.status_code == 200:
#         return response.json()["choices"][0]["message"]["content"].strip()
#     else:
#         print(f"Error {response.status_code}: {response.text}")
#         return None

In [6]:
def query_llm_system(messages):
    """
    Sends a request with both system and user messages.
    """
    url = f"{API_BASE}/api/chat/completions"
    headers = {
        "Authorization": f"Bearer {TOKEN}",
        "Content-Type": "application/json"
    }
    data = {
        "model": MODEL_NAME,
        "messages": messages,
        "temperature": 0.0,
        "top_p": 0.9,
        "top_k": 400,
        "repetition_penalty": 1.0,
        "context_length": 1000
    }
    response = requests.post(url, headers=headers, json=data)
    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"].strip()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

In [7]:
# ==== FUNCTION TO EXTRACT EMOTION FROM TEXT ====
def extract_emotion(text):
    """
    Extracts the first occurrence of a target emotion or its synonym from the given text.
    Uses regex to search for any of the target words.
    If a synonym is found, it maps it to the corresponding target emotion.
    Returns the extracted emotion if found, otherwise 'neutral'.
    """
    # 1. Try to extract from JSON format
    json_match = re.search(r'\"emotion\"\s*:\s*\"(.*?)\"', text, re.IGNORECASE)
    if json_match:
        emotion_found = json_match.group(1).lower()
        if emotion_found in TARGET_EMOTIONS:
            return emotion_found
    
    # 2. If not found, fallback to your synonyms mapping
    # Define synonyms mapping (all in lowercase)
    synonym_mapping = {
        "elation": "happiness",
        "joy": "happiness",
        "cheerfulness": "happiness",
        "amusement": "happiness",
        "admiration": "happiness",
        "gratitude": "happiness",
        "optimism": "happiness",
        "pride": "happiness",
        "excitement": "happiness",
        "love": "happiness",
        "relief": "happiness",
        "caring": "happiness",
        "approval": "happiness",
    
        "disappointment": "sadness",
        "grief": "sadness",
        "sadness": "sadness",
        "remorse": "sadness",
        "sorrow": "sadness",
        "melancholy": "sadness",
    
        "annoyance": "anger",
        "disapproval": "anger",
        "anger": "anger",
    
        "surprise": "surprise",
        "realization": "surprise",
        "confusion": "surprise",
    
        "fear": "fear",
        "nervousness": "fear",
    
        "disgust": "disgust",
        "emberrasment": "disgust",
    
        "neutral": "neutral"
    }
    # Combine target emotions and synonyms into one list
    all_keywords = TARGET_EMOTIONS + list(synonym_mapping.keys())
    # Create a regex pattern to match any keyword as a whole word, case-insensitive
    pattern = r'\b(' + '|'.join(all_keywords) + r')\b'
    # pattern = r"classification is:\s*(\w+)"
    match = re.search(pattern, text, flags=re.IGNORECASE)
    if match:
        word = match.group(1).lower()
        # Map synonym if necessary
        return synonym_mapping.get(word, word)
    else:
        return "neutral"

In [8]:
# ==== BASELINE PROMPT ====
def baseline_prompt(sentence):
    return f"Analyze the following sentence and classify it as one of the six core emotions (happiness, sadness, anger, surprise, fear, disgust) or neutral:\n\n{sentence}"

In [9]:
# ==== EXPERIMENTAL PROMPTS ====
def few_shot_prompt(sentence):
    """ Few-shot prompting: providing examples to improve accuracy. """
    return f"""
Analyze the following sentence and classify it as one of the six core emotions (happiness, sadness, anger, surprise, fear, disgust) or neutral.

Examples:
1. "I just won a million dollars!" → happiness
2. "I can't believe he betrayed me." → anger
3. "She cried when she heard the news." → sadness
4. "This roller coaster is thrilling!" → surprise

Now classify the following sentence:
"{sentence}"
"""


def structured_prompt(sentence):
    """ Structured instructions with format constraints. """
    return f"""
Analyze the following sentence and determine the primary emotion expressed.
- Available emotions: happiness, sadness, anger, surprise, fear, disgust, neutral
- Return the answer in JSON format: {{ "emotion": "emotion_name" }}

Sentence: "{sentence}"
"""


def definition_prompt(sentence):
    """ Providing explicit definitions for emotions. """
    return f"""
Analyze the following sentence and classify it based on the provided definitions.

Definitions:
- Happiness: A positive, joyful feeling
- Sadness: A feeling of loss or disappointment
- Anger: A strong feeling of displeasure
- Surprise: A reaction to something unexpected
- Fear: A response to perceived danger
- Disgust: A feeling of strong dislike
- Neutral: No strong emotional reaction

Sentence: "{sentence}"

Which category best describes this sentence?
"""

In [10]:
def refined_structured_prompt(sentence):
    """
    Structured instructions with more explicit directions and examples.
    """
    return f"""
You are an assistant specialized in emotion analysis. 
Your task is to classify the following sentence into exactly one of these categories:
[happiness, sadness, anger, surprise, fear, disgust, neutral].

Instructions:
1. Read the sentence carefully.
2. Decide which single emotion from the list best fits the sentence.
3. Return ONLY the final emotion in JSON format, like: {{ "emotion": "anger" }}

Here is the sentence:
\"{sentence}\"

If you are not sure which category fits best, choose "neutral".
"""

def strict_format_prompt(sentence):
    """
    Prompt that forces the model to respond in a strict JSON format.
    """
    return f"""
You are an advanced AI for emotion classification. 
Classify the sentence into exactly one of these emotions:
[happiness, sadness, anger, surprise, fear, disgust, neutral].

RETURN ONLY IN THIS EXACT FORMAT (no extra text):
{{"emotion":"VALUE"}}

Sentence: "{sentence}"
"""

def improved_few_shot_prompt(sentence):
    return f"""
You are an AI for emotion classification. Classify each sentence into 
[happiness, sadness, anger, surprise, fear, disgust, neutral].

Examples:
1. "I just won a million dollars!" => happiness
2. "I hate it when you yell at me." => anger
3. "I feel so lonely without you." => sadness
4. "This roller coaster is thrilling!" => surprise
5. "I find rotten food gross." => disgust
6. "I feel anxious before the exam." => fear
7. "I'm doing okay, nothing special." => neutral

Now classify this sentence: "{sentence}"
"""

In [11]:
def system_prompt(sentence):
    """
    Prompt that uses a system message to strongly guide the model.
    """
    messages = [
        {"role": "system", "content": (
            "You are a sophisticated emotion classification AI. "
            "You will respond ONLY with one of these categories: "
            "[happiness, sadness, anger, surprise, fear, disgust, neutral]."
            "No additional text or explanation."
        )},
        {"role": "user", "content": f"Classify the emotion of this sentence:\n{sentence}"}
    ]
    return messages

def system_few_shot_prompt(sentence):
    """
    Combines a system instruction with a few-shot approach.
    """
    system_content = (
        "You are a sophisticated emotion classification AI. "
        "You will respond ONLY with the single emotion from this list: "
        "[happiness, sadness, anger, surprise, fear, disgust, neutral]. "
        "If the sentence does not clearly express any emotion, choose 'neutral'. "
        "No additional text or explanation."
    )
    
    examples_text = (
        "Here are examples:\n"
        "1. 'good bot' => happiness\n"
        "1. 'Enjoy your dinner' => happiness\n"
        "1. 'it doesn t count if you re drunk ? Wow.. Imagine that being used as an excuse in other situations.' => happiness\n"
        
        "2. 'I hate it when you yell at me.' => anger\n"
        "2. 'I hate it when you yell at me.' => anger\n"
        "2. 'I hate it when you yell at me.' => anger\n"
        
        "3. 'I'm so lonely without you.' => sadness\n"
        "4. 'This roller coaster is thrilling!' => surprise\n"
        "5. 'I find rotten food gross.' => disgust\n"
        "6. 'I feel anxious before the exam.' => fear\n"
        "7. 'I'm doing okay, nothing special.' => neutral\n"
        "8. 'This is not terrible, but not great either.' => neutral\n"
    )
    
    user_prompt = (
        f"Classify the emotion of this sentence:\n{sentence}\n\n"
        "Return only one emotion from the list above."
    )
    
    messages = [
        {"role": "system", "content": system_content},
        {"role": "user", "content": examples_text},
        {"role": "user", "content": user_prompt}
    ]
    return messages

In [12]:
# ==== FUNCTION TO TEST PROMPTS ====
def test_prompt(prompt_function, prompt_name):
    """ 
    Runs a test on a given prompt format and logs results.
    It queries the LLM, extracts the emotion keyword from the response, 
    and compares it to the true label.
    """
    predictions = []
    
    print(f"\nRunning test for: {prompt_name}")
    
    for _, row in df_sample.iterrows():
        sentence = row["text"]
        true_label = row["general_emotion"]
        
        # Generate prompt and query the LLM
        prompt = prompt_function(sentence)
        # predicted_label = query_llm(prompt)
        predicted_label = query_llm_system(prompt)
        
        # Post-process output (convert to lowercase, strip spaces)
        if predicted_label:
            predicted_label = predicted_label.lower().strip()
            # Extract emotion keyword from the response using regex mapping
            predicted_label = extract_emotion(predicted_label)
        else:
            predicted_label = "neutral"
        
        # Ensure only valid labels are considered
        if predicted_label not in TARGET_EMOTIONS:
            predicted_label = "neutral"  # Default to neutral if unclear
        
        predictions.append((sentence, true_label, predicted_label))
    
    # Convert results to DataFrame
    results_df = pd.DataFrame(predictions, columns=["Sentence", "True_Label", "Predicted_Label"])
    
    # Compute evaluation metrics
    f1 = f1_score(results_df["True_Label"], results_df["Predicted_Label"], average="macro")
    unique_labels = sorted(results_df["True_Label"].unique())  # Get unique true labels from the sample
    report = classification_report(
        results_df["True_Label"], 
        results_df["Predicted_Label"], 
        labels=unique_labels,            # Specify labels present in the sample
        target_names=unique_labels,      # Use the same order for target_names
        zero_division=0
    )
    conf_matrix = confusion_matrix(results_df["True_Label"], results_df["Predicted_Label"], labels=unique_labels)
    
    print(f"\n=== Results for {prompt_name} ===")
    print(f"F1 Score: {f1:.4f}")
    print("Classification Report:\n", report)
    
    # Save results to log file
    results_df["Prompt_Type"] = prompt_name
    results_df.to_csv(RESULTS_LOG, mode="a", header=not pd.io.common.file_exists(RESULTS_LOG), index=False)

    return f1

In [13]:
# ==== RUN EXPERIMENTS ====
# f1_baseline = test_prompt(baseline_prompt, "Baseline") #58(2) 42(5)
# f1_few_shot = test_prompt(few_shot_prompt, "Few-Shot") #54
# f1_structured = test_prompt(structured_prompt, "Structured") #47
# f1_definition = test_prompt(definition_prompt, "Definition-Based") #30
# f1_refined_structured = test_prompt(refined_structured_prompt, "refined_structured_prompt") #61(2) 52(5)
# f1_strict_format = test_prompt(strict_format_prompt, "strict_format_prompt") #67(2) 56(5)
# f1_improved_few_shot = test_prompt(improved_few_shot_prompt, "improved_few_shot_prompt") #55(2) 50(5)

# f1_system = test_prompt(system_prompt, "system_prompt")
f1_system_few_shot = test_prompt(system_few_shot_prompt, "system_few_shot_prompt")


Running test for: system_few_shot_prompt

=== Results for system_few_shot_prompt ===
F1 Score: 0.4981
Classification Report:
               precision    recall  f1-score   support

       anger       0.40      0.40      0.40        10
     disgust       0.44      0.80      0.57        10
        fear       1.00      0.50      0.67        10
   happiness       0.53      0.80      0.64        10
     neutral       0.50      0.10      0.17        10
     sadness       0.83      0.50      0.62        10
    surprise       0.36      0.50      0.42        10

    accuracy                           0.51        70
   macro avg       0.58      0.51      0.50        70
weighted avg       0.58      0.51      0.50        70



In [14]:
# ==== SELECT BEST PROMPT ====
best_prompt = max(
    [("Baseline", f1_baseline),
     # ("Few-Shot", f1_few_shot),
     # ("Structured", f1_structured),
     # ("Definition-Based", f1_definition)],
     ("refined_structured", f1_refined_structured)],
     ("strict_format", f1_strict_format)],
     ("improved_few_shot", f1_improved_few_shot)],
    key=lambda x: x[1]
)

print(f"\n✅ Best prompt: {best_prompt[0]} with F1-score: {best_prompt[1]:.4f}")

SyntaxError: closing parenthesis ']' does not match opening parenthesis '(' on line 2 (2848820042.py, line 8)