In [1]:
import requests
import json
import pandas as pd
# import numpy as np
import re
from sklearn.metrics import f1_score, classification_report

In [None]:
# ==== CONFIGURATION ====
API_BASE = "http://194.171.191.227:30080"
TOKEN = "sk-ba75d292d92e457fa1615f217b11fe09"
MODEL_NAME = "llama3.3:latest"
DATA_PATH = "nlp_features.xlsx"

GROUP_SIZE = 5

TARGET_EMOTIONS = ["happiness", "sadness", "anger", "surprise", "fear", "disgust", "neutral"]

In [3]:
# ==== LOAD DATA ====


In [4]:
# ==== FUNCTION TO QUERY LLM ====
def query_llm(messages):
    """
    Sends a request to the LLM with multiple messages (system/user).
    Expects the model to return a string containing JSON or text.
    """
    url = f"{API_BASE}/api/chat/completions"
    headers = {
        "Authorization": f"Bearer {TOKEN}",
        "Content-Type": "application/json"
    }
    data = {
        "model": MODEL_NAME,
        "messages": messages,
        "temperature": 0.01,
        "top_p": 0.9,
        "top_k": 600,
        "repetition_penalty": 0.1,
        "max_tokens": 4000,
        "context_length": 2048
    }
    response = requests.post(url, headers=headers, json=data)
    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"].strip()
    else:
        print(f"Error {response.status_code}: {response.text}")
        return None

In [5]:
# ==== FUNCTION TO EXTRACT EMOTION FROM TEXT ====
def extract_emotion(emotion_str):
    """
    Map extracted emotion to our allowed set or default to neutral.
    """
    emotion_str = emotion_str.lower().strip()
    if emotion_str in TARGET_EMOTIONS:
        return emotion_str
    else:
        return "neutral"

In [None]:
def make_prompt_for_chunk(chunk):
    """
    Builds a single prompt for a chunk of 5 consecutive sentences.
    The model is asked to consider all 5 lines as context 
    and then produce an emotion label for each line.
    Output format: JSON array with "line" and "emotion".
    """
    lines = []
    for i, row in chunk.iterrows():
        # e.g. i = actual index in DataFrame, row["Translation"] is the text
        lines.append(row["Translation"])

    # Combine lines with numbering
    numbered_lines = "\n".join([f"{idx+1}) {text}" for idx, text in enumerate(lines)])
    
    # System instructions
    system_content = (
        f"You are an advanced AI. You are given {GROUP_SIZE} consecutive lines of text in English. "
        "You can consider the entire context of all lines to decide the emotion for each line. "
        "Emotions must be one of [happiness, sadness, anger, surprise, fear, disgust, neutral]. "
        "If the sentence is purely factual or there is no clear positive or negative emotion, respond with neutral. "
        f"Return a JSON object with a field 'emotions' which is an array of {GROUP_SIZE} items. "
        "Each item should have the structure: {\"line\": <line_number>, \"emotion\": \"emotion_name\"}.\n"
        "No extra text or explanation. Only valid JSON.\n"
    )

    examples_text = """
        Example:
        Line 1: "From Monday to Friday, five completely different people go to visit each other." => neutral
        Line 2: "Each of them has the same conditions when buying products." => neutral
        Line 3: "And only three hours to make a dinner of three dishes." => neutral
        Line 4: "In which everyone can prove that it is he who is an unsurpassed owner." => happiness
        Line 5: "But grandfather forbade." => anger
        Line 6: "It smells." => disgust
    """

    user_content = (
        f"{examples_text}\n"
        "Analyze the following sentences and determine the primary emotion expressed.\n"
        "- Available emotions: happiness, sadness, anger, surprise, fear, disgust, neutral\n"
        f"Sentences: \n{numbered_lines}\n\n"
        "Please output JSON like this:\n"
        "{\"emotions\": [\n"
        "  {\"line\": 1, \"emotion\": \"neutral\"},\n"
        "  {\"line\": 2, \"emotion\": \"happiness\"},\n"
        "  ... etc ...\n"
        "]}\n\n"
        "Remember to use only the 7 valid labels."
    )

    messages = [
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_content}
    ]
    return messages

In [7]:
def make_prompt_for_chunk_extended_few_shot(chunk, group_size=GROUP_SIZE):
    # Convert chunk to numbered lines
    lines_str = ""
    for i, (_, row) in enumerate(chunk.iterrows()):
        lines_str += f"{i+1}) {row['Translation']}\n"

    system_content = (
        f"You are an advanced AI specialized in emotion classification. "
        f"You receive {group_size} lines in English. You can use the entire context of all lines to decide. "
        "Assign an emotion to each line from [happiness, sadness, anger, surprise, fear, disgust, neutral]. "
        "If a line is purely factual or no clear positive/negative emotion, choose 'neutral'. "
        "Output a single JSON object with an array 'emotions' of length {group_size}, "
        "where each item is {\"line\": i, \"emotion\": \"...\"}. "
        "No explanation, just JSON."
    )

    # Additional 10 examples (some neutral, some emotional)
    examples_text = """
Here are examples of individual lines and their emotions:

1) "I just won the lottery!" => happiness
2) "Why did this terrible thing happen to me?!" => surprise
3) "I feel so depressed these days..." => sadness
4) "He completely ruined my day." => anger
5) "This smells awful, I'm disgusted." => disgust
6) "I'm anxious about the exam." => fear
7) "It's just a normal day, nothing special." => neutral
8) "We are simply going to the store." => neutral
9) "I'm so excited for the trip tomorrow!" => happiness
10) "He quietly opened the door and left." => neutral
"""

    user_content = (
        f"{examples_text}\n"
        f"Now classify these {group_size} lines:\n{lines_str}\n"
        "Return JSON exactly like:\n"
        "{\"emotions\": [\n"
        "  {\"line\": 1, \"emotion\": \"neutral\"},\n"
        "  {\"line\": 2, \"emotion\": \"happiness\"},\n"
        "  ... etc ...\n"
        "]}\n"
    )

    return [
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_content}
    ]

In [8]:
def make_prompt_for_chunk_hidden_cot(chunk, group_size=GROUP_SIZE):
    lines_str = ""
    for i, (_, row) in enumerate(chunk.iterrows()):
        lines_str += f"{i+1}) {row['Translation']}\n"

    system_content = (
        f"You are an advanced AI. You are given {GROUP_SIZE} consecutive lines of text in English. "
        "You can consider the entire context of all lines to decide the emotion for each line. "
        "Emotions must be one of [happiness, sadness, anger, surprise, fear, disgust, neutral]. "
        # "If the sentence is purely factual or there is no clear positive or negative emotion, respond with neutral. "
        "If uncertain, choose neutral."
        f"Return a JSON object with a field 'emotions' which is an array of {GROUP_SIZE} items. "
        "Each item should have the structure: {\"line\": <line_number>, \"emotion\": \"emotion_name\"}.\n"
        "Think step by step internally, but do NOT reveal your chain-of-thought. "
        "Finally, output only valid JSON. "
    )

    examples_text = """
        Example:
        Line 1: "From Monday to Friday, five completely different people go to visit each other." => neutral
        Line 2: "Each of them has the same conditions when buying products." => neutral
        Line 3: "And only three hours to make a dinner of three dishes." => neutral
        Line 4: "In which everyone can prove that it is he who is an unsurpassed owner." => happiness
        Line 5: "But grandfather forbade." => anger
        Line 6: "It smells." => disgust
    """

    user_content = (
        f"{examples_text}\n"
        f"Analyze the next {group_size} lines:\n{lines_str}\n\n"
        "Return JSON in the form:\n"
        "{\"emotions\": [\n"
        "  {\"line\": 1, \"emotion\": \"neutral\"},\n"
        "  {\"line\": 2, \"emotion\": \"happiness\"},\n"
        "  ... etc ...\n"
        "]}\n\n"
        "No explanation, no chain-of-thought, just the JSON result. "
    )

    return [
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_content}
    ]

In [9]:
def make_prompt_for_chunk_rare_emphasis(chunk, group_size=GROUP_SIZE):
    lines_str = ""
    for i, (_, row) in enumerate(chunk.iterrows()):
        lines_str += f"{i+1}) {row['Translation']}\n"

    system_content = (
        "You are a specialized emotion classifier. "
        "You can consider the entire context of all lines to decide the emotion for each line. "
        "Often lines are neutral, but if there's ANY clue of negativity, it might be anger, fear, or disgust. "
        "If positive clue => happiness. If uncertain => neutral. "
        "Emotions: [happiness, sadness, anger, surprise, fear, disgust, neutral]. "        
        "If the sentence is purely factual or there is no clear positive or negative emotion, respond with neutral. "
        
        "Output JSON only, no commentary."
    )

    examples_text = """
        Example:
        Line 1: "From Monday to Friday, five completely different people go to visit each other." => neutral
        Line 2: "Each of them has the same conditions when buying products." => neutral
        Line 3: "And only three hours to make a dinner of three dishes." => neutral
        Line 4: "In which everyone can prove that it is he who is an unsurpassed owner." => happiness
        Line 5: "But grandfather forbade." => anger
        Line 6: "It smells." => disgust
    """

    user_content = (
        f"{examples_text}\n"
        f"These {group_size} lines may have subtle negative or positive clues.\n"
        f"{lines_str}\n\n"
        "Return a JSON: {\"emotions\":[{\"line\":1,\"emotion\":\"...\"}, ...]}"
    )

    return [
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_content}
    ]

In [10]:
def make_prompt_for_chunk_ultimate(chunk, group_size=GROUP_SIZE):
    """
    # 1) In the system message, we specify that the model is an advanced multi-language emotion classifier,
    # uses hidden chain-of-thought (does not output it).
    # 2) We give a detailed few-shot, including examples for all emotions, including neutral.
    # 3) Set up that if the sentence is unemotional, we take neutral, and if we see negativity, we highlight anger/fear/disgust.
    # 4) Expect strict JSON in the format {“emotions”: [{...}, ...]}.
    # 5) Since we are grouping 5 lines, user_content will contain numbered_lines.
    """

    # Build numbered lines from the chunk
    lines_str = ""
    for i, (_, row) in enumerate(chunk.iterrows()):
        # row['Translation'] - your column name for the text
        lines_str += f"{i+1}) {row['Translation']}\n"

    # SYSTEM role content
    system_content = (
        "You are an advanced multilingual emotion classification AI. "
        "You will receive exactly 5 lines of text. Think step by step internally, but do NOT reveal your chain-of-thought. "
        "Finally, output only valid JSON with an array of 5 items. "
        "Each item must have the form: {\"line\": i, \"emotion\": \"...\"}. "
        "Emotions are strictly one from this list: [happiness, sadness, anger, surprise, fear, disgust, neutral]. "
        "If the sentence is purely factual or uncertain, choose 'neutral'. "
        "If you see negativity, consider anger, fear, or disgust. If it's clearly positive, choose happiness. "
        "If the text is not in English, you may do an internal translation, but return final emotion in the JSON."
    )

    # Extended few-shot examples
    few_shot_examples = (
        "Example lines and their assigned emotions:\n"
        "1) 'I just won a million dollars!' => happiness\n"
        "2) 'Why on earth did this happen to me?!' => surprise\n"
        "3) 'I'm feeling so down and lonely.' => sadness\n"
        "4) 'He ruined my entire plan, I'm furious.' => anger\n"
        "5) 'This rotten smell is disgusting.' => disgust\n"
        "6) 'I can't sleep, I'm so worried.' => fear\n"
        "7) 'It's just an ordinary day, nothing special.' => neutral\n"
        "8) 'We are heading to the supermarket at noon.' => neutral\n"
        "9) 'I love this wonderful surprise party!' => happiness\n"
        "10) 'What is that creepy noise outside?' => fear\n"
    )

    user_content = (
        f"{few_shot_examples}\n\n"
        "Now, analyze the following 5 lines. "
        "Decide the best-fitting emotion for each, using the entire context. "
        "Finally, return only valid JSON. For example:\n"
        "{\"emotions\": [\n"
        "  {\"line\":1, \"emotion\":\"neutral\"},\n"
        "  {\"line\":2, \"emotion\":\"anger\"},\n"
        "  ...\n"
        "]}\n\n"
        f"Here are the 5 lines:\n{lines_str}\n"
    )

    # We combine them into messages
    messages = [
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_content}
    ]
    return messages

In [11]:
def make_prompt_for_chunk_super_complex(chunk, group_size=GROUP_SIZE):
    """
    Explanation:
    1. We have included an extended chain-of-thought, few-shot with a few complex examples,
       drawn from real negative/positive/neutral situations.
    2. Added mention of domain context (e.g., TV show, cooking show) so that the model understands
       that many “simple” factual phrases can be encountered, and this is normal -> neutral.
    3. emphasized rare emotions (fear, disgust, anger) and encouraged the model to be more attentive to them (but if there is no such thing, it's okay).
       to them (but if not sure - neutral).
    4. Asked to explicitly filter ambiguous vocabulary and choose neutral when in doubt.
    5. Provided for “bilingual” content and allowed internal translation.
    6. We use an extended few-shot format, where in addition to short examples there are “scripted” phrases (mentioning the context).
       (mentioning the context). 
    """

    # Combine lines from chunk
    lines_str = ""
    for i, (_, row) in enumerate(chunk.iterrows()):
        lines_str += f"{i+1}) {row['Translation']}\n"

    # SYSTEM message with advanced instructions
    system_content = (
        "You are an advanced multilingual emotion classification AI. "
        "You will be provided with exactly 5 consecutive lines of text (possibly from a reality TV show or daily conversation). "
        "You must carefully analyze them, leveraging the entire context. "
        "Internally, you may reason step-by-step (chain-of-thought), but do NOT reveal that reasoning. "
        "Finally, you must respond ONLY with valid JSON, containing an array of objects, one per line. "
        "Each object must be: {\"line\": <i>, \"emotion\": <label>}. "
        "The label must be exactly one from [happiness, sadness, anger, surprise, fear, disgust, neutral]. "
        "If the line is purely factual (no strong emotional words or tone), or if you are uncertain, use 'neutral'. "
        "If there's negative sentiment, consider anger/fear/disgust. If there's positive vibe, consider happiness. "
        "If it suggests sadness or emotional pain, use 'sadness'. If there's a sense of shock or wonder, use 'surprise'. "
        "You may do an internal translation if the text is not in English. Absolutely no extra commentary beyond the JSON."
    )

    # Extended examples with advanced domain references, negativity, neutrality, context, etc.
    few_shot_examples = (
        # Introduce advanced scenario-based lines
        "ADVANCED FEW-SHOT EXAMPLES:\n\n"
        "Line A: \"I can't believe she stole my recipe!\" => anger\n"
        "Line B: \"Everything tastes so bland, I'm not sure if I like it.\" => neutral\n"
        "Line C: \"I'm so delighted to share my secret sauce with everyone.\" => happiness\n"
        "Line D: \"Why did I even sign up for this show? It's terrifying here.\" => fear\n"
        "Line E: \"The smell of rotten eggs is making me sick.\" => disgust\n"
        "Line F: \"I feel so lonely in this competition, no one supports me.\" => sadness\n"
        "Line G: \"What a shocking twist that he used store-bought dough!\" => surprise\n"
        "Line H: \"We have to cook a three-course meal in under two hours.\" => neutral\n"
        "Line I: \"I absolutely adore how they decorated the table.\" => happiness\n"
        "Line J: \"This dish is awful, I'm so disappointed.\" => sadness (or anger if she blames someone, use best judgment)\n"
        "Line K: \"I'm not really feeling anything right now.\" => neutral\n"
        "Line L: \"Is that a spider crawling under the counter?!\" => fear\n"
        "Line M: \"He was so rude, I'm furious at his behavior.\" => anger\n"
        "Line N: \"Gosh, it's unbelievably stinky in here.\" => disgust\n"
        "Line O: \"Everything is just as usual, day by day.\" => neutral\n"
    )

    user_content = (
        f"{few_shot_examples}\n\n"
        "NOW YOU HAVE 5 NEW LINES:\n"
        f"{lines_str}\n"
        "YOUR TASK:\n"
        "1) Possibly do an internal chain-of-thought. Do NOT reveal it.\n"
        "2) Assign EXACTLY ONE emotion from [happiness, sadness, anger, surprise, fear, disgust, neutral] per line.\n"
        "3) If uncertain or purely factual, choose 'neutral'.\n"
        "4) Output only JSON in the form:\n"
        "{\"emotions\":[\n"
        "  {\"line\":1,\"emotion\":\"neutral\"},\n"
        "  {\"line\":2,\"emotion\":\"anger\"},\n"
        "  ...\n"
        "]}\n"
        "No extra commentary, no explanation, no text besides valid JSON.\n"
    )

    messages = [
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_content}
    ]
    return messages

In [12]:
def make_prompt_for_chunk_minimal(chunk, group_size=5):
    lines_str = ""
    for i, (_, row) in enumerate(chunk.iterrows()):
        lines_str += f"{i+1}) {row['Translation']}\n"

    system_content = (
        "You are an AI that classifies each line of text into exactly one of the following emotions: "
        "[happiness, sadness, anger, surprise, fear, disgust, neutral]. "
        "If the line is factual or has no strong emotion, choose neutral. "
        "Output only valid JSON with 'emotions' array of length {group_size}."
    )
    user_content = (
        f"Classify these {group_size} lines:\n{lines_str}\n\n"
        "Return a JSON like:\n"
        "{\"emotions\":[{\"line\":1,\"emotion\":\"neutral\"},...]} \n"
        "No extra explanation."
    )

    messages = [
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_content}
    ]
    return messages

In [13]:
def make_prompt_for_chunk_examples_in_system(chunk, group_size=5):
    lines_str = "\n".join([f"{i+1}) {row['Translation']}" 
                           for i, (_,row) in enumerate(chunk.iterrows())])
    
    system_content = (
        "You are an AI that classifies lines into [happiness, sadness, anger, surprise, fear, disgust, neutral]. "
        "If the line is purely factual, choose 'neutral'. "
        "Examples:\n"
        " - 'In which everyone can prove that it is he who is an unsurpassed owner.' => happiness\n"
        " - 'Ready to take part in our show.' => happiness\n"
        " - 'To show everyone that a woman can always gain her female happiness,' => happiness\n"
        
        " - 'My mother Natalya Konstantinna was a biologist, but in 41 the war began,' => sadness\n"
        " - 'I have never met him, the consequences are irreversible.' => sadness\n"
        " - 'Yes, in the 43rd year, dad left back to the front.' => sadness\n"
        
        " - 'Ladies from 30 react to me a little more skeptical, or something.' => anger\n"
        " - 'But such a portrait is at home, I would not hang it.' => anger\n"
        " - 'Perhaps because I would not want to have a portrait' => anger\n"

        " - 'Oh, oh.' => surprise\n"
        " - 'And the culture of communication is generally different things.' => surprise\n"
        " - 'It's clear.' => surprise\n"
        
        " - 'Very tasty, but I'm afraid that there' => fear\n"
        " - 'I'm afraid to go to him.' => fear\n"
        " - 'Frankly, I did not know who would come to me,' => fear\n"
        
        " - 'It smells.' => disgust\n"
        " - 'They have a taste of such, you know, land.' => disgust\n"
        " - 'The cheese is already covered with mold.' => disgust\n"

        " - 'But he was very hungry' => neutral\n"
        " - 'The feeling is like' => neutral\n"
        " - 'My name is Dmitry. I am the director of the patent bureau.' => neutral\n"
        "Only output valid JSON: {\"emotions\": [ ... ]}."
    )
    
    user_content = (
        f"Classify the following {group_size} lines:\n{lines_str}\n\n"
        "Return JSON: {\"emotions\":[{\"line\":1,\"emotion\":\"...\"},...]}\n"
    )
    
    return [
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_content}
    ]

In [14]:
def make_prompt_for_chunk_with_synonyms(chunk, group_size=5):
    # Build numbered lines from chunk
    lines_str = ""
    for i, (_, row) in enumerate(chunk.iterrows()):
        lines_str += f"{i+1}) {row['Translation']}\n"

    system_content = (
        "You are an advanced emotion classification AI. "
        "You will receive exactly 5 lines of text. If the text is purely factual, choose 'neutral'. "
        "You have the following emotion mapping for synonyms:\n"
        "- joy, amusement, admiration, gratitude, optimism, pride, excitement, love, relief, caring, approval => happiness\n"
        "- disappointment, grief => sadness\n"
        "- annoyance, disapproval => anger\n"
        "- realization, confusion => surprise\n"
        "- nervousness => fear\n"
        "- embarrassment => disgust\n"
        "- curiosity => neutral\n"
        "Finally, all these subemotions map to [happiness, sadness, anger, surprise, fear, disgust, neutral]. "
        "Return only valid JSON: {\"emotions\":[ {\"line\":1,\"emotion\":\"...\"}, ... ]}. "
        "No extra text."
    )

    # Optional few-shot or short example
    examples_text = (
        "Examples:\n"
            " - 'In which everyone can prove that it is he who is an unsurpassed owner.' => happiness\n"
            " - 'Ready to take part in our show.' => happiness\n"
            " - 'To show everyone that a woman can always gain her female happiness,' => happiness\n"
            
            " - 'My mother Natalya Konstantinna was a biologist, but in 41 the war began,' => sadness\n"
            " - 'I have never met him, the consequences are irreversible.' => sadness\n"
            " - 'Yes, in the 43rd year, dad left back to the front.' => sadness\n"
            
            " - 'Ladies from 30 react to me a little more skeptical, or something.' => anger\n"
            " - 'But such a portrait is at home, I would not hang it.' => anger\n"
            " - 'Perhaps because I would not want to have a portrait' => anger\n"
    
            " - 'Oh, oh.' => surprise\n"
            " - 'And the culture of communication is generally different things.' => surprise\n"
            " - 'It's clear.' => surprise\n"
            
            " - 'Very tasty, but I'm afraid that there' => fear\n"
            " - 'I'm afraid to go to him.' => fear\n"
            " - 'Frankly, I did not know who would come to me,' => fear\n"
            
            " - 'It smells.' => disgust\n"
            " - 'They have a taste of such, you know, land.' => disgust\n"
            " - 'The cheese is already covered with mold.' => disgust\n"
    
            " - 'But he was very hungry' => neutral\n"
            " - 'The feeling is like' => neutral\n"
            " - 'My name is Dmitry. I am the director of the patent bureau.' => neutral\n"
    )

    user_content = (
        f"{examples_text}\n\n"
        f"Here are 5 lines:\n{lines_str}\n"
        "Assign exactly one final emotion from [happiness, sadness, anger, surprise, fear, disgust, neutral]."
    )

    messages = [
        {"role": "system", "content": system_content},
        {"role": "user", "content": user_content}
    ]
    return messages

In [15]:
def parse_json_emotions(raw_response):
    """
    Attempt to parse a JSON object with the structure:
    {
      "emotions": [
        {"line": 1, "emotion": "..."},
        ...
      ]
    }
    If parsing fails or if data is missing, return an empty list.
    """
    try:
        data = json.loads(raw_response)
        if "emotions" in data:
            return data["emotions"]  # should be a list of dicts
        else:
            return []
    except:
        return []

In [17]:
def main():
    # Load entire DataFrame
    df = pd.read_excel(DATA_PATH, engine="openpyxl")

    # Check columns
    if "Translation" not in df.columns or "general_emotion" not in df.columns:
        raise ValueError("Dataset must contain 'Translation' and 'general_emotion' columns.")
    
    # Convert to lowercase
    df["general_emotion"] = df["general_emotion"].str.lower()

    predictions = []
    
    # We'll iterate over the DataFrame in steps of 5
    for start_idx in range(0, len(df), GROUP_SIZE):
        chunk = df.iloc[start_idx : start_idx + GROUP_SIZE]
        if len(chunk) == 0:
            break
        
        # Build prompts f1 score for different GROUP_SIZE
        # messages = make_prompt_for_chunk(chunk) #0.4513(10) #0.4868(5)
        # messages = make_prompt_for_chunk_extended_few_shot(chunk) #0.3239(10) #0.4535(5)
        # messages = make_prompt_for_chunk_hidden_cot(chunk) #0.3519(10) 0.3284(5)
        # messages = make_prompt_for_chunk_rare_emphasis(chunk) #0.42056(10) #?(5)
        # messages = make_prompt_for_chunk_ultimate(chunk) #0.4279(5)
        # messages = make_prompt_for_chunk_super_complex(chunk) #0.4481(5)
        # messages = make_prompt_for_chunk_minimal(chunk) #0.4256(5)
        # messages = make_prompt_for_chunk_examples_in_system(chunk) #0.4691(5) -> repetition_penalty - lower
        messages = make_prompt_for_chunk_with_synonyms(chunk) #0.4897(5) -> 0.4966(repetition_penalty": 0.22 -> 0.1) ->
                                                              # -> 0.5761(added examples in the prompt) -> 
                                                              # -> 0.5587("temperature": 0.1 -> 0.01, repetition_penalty": 0.1 -> 0.01) ->
                                                              # -> 0.5617("top_k": 400 -> 600, "repetition_penalty": 0.01 -> 0.1, "context_length": 1000 -> 2048) ->


        raw_resp = query_llm(messages)
        
        # Attempt to parse the JSON
        emotions_list = parse_json_emotions(raw_resp if raw_resp else "")
        
        # Now we match them with the chunk rows
        # chunk rows: index-based iteration
        lines = list(chunk.iterrows())  # (index, row)
        
        for i in range(len(lines)):
            df_index, row = lines[i]
            true_label = row["general_emotion"]
            
            # find in emotions_list the object with "line": i+1
            predicted_label = "neutral"  # default
            for item in emotions_list:
                # item: {"line": 1, "emotion": "..."}
                if item.get("line", None) == i+1:
                    # map or extract
                    predicted_label = extract_emotion(item["emotion"])
                    break
            
            predictions.append((df_index, row["Translation"], true_label, predicted_label))
    
    # Build a results DataFrame
    results_df = pd.DataFrame(predictions, columns=["df_index","Sentence","True_Label","Pred_Label"])
    
    # Evaluate
    # (Если в вашем датасете много эмоций, подключите confusion_matrix и т.д.)
    f1 = f1_score(results_df["True_Label"], results_df["Pred_Label"], average="macro")
    print("Overall F1 (macro):", f1)
    print(classification_report(
        results_df["True_Label"], 
        results_df["Pred_Label"], 
        labels=TARGET_EMOTIONS,
        zero_division=0
    ))

    # Optionally save results
    results_df.to_csv("batch_prompt_results.csv", index=False)
    print("Done. Saved predictions to batch_prompt_results.csv")


if __name__ == "__main__":
    main()

Overall F1 (macro): 0.5617707461269942
              precision    recall  f1-score   support

   happiness       0.74      0.66      0.70       375
     sadness       0.50      0.65      0.57        20
       anger       0.41      0.49      0.44        37
    surprise       0.36      0.45      0.40        60
        fear       0.56      1.00      0.71         5
     disgust       0.27      0.67      0.38        12
     neutral       0.73      0.72      0.73       535

    accuracy                           0.68      1044
   macro avg       0.51      0.66      0.56      1044
weighted avg       0.69      0.68      0.68      1044

Done. Saved predictions to batch_prompt_results.csv


In [None]:
# previous attempts
"""
Focused on checking system_prompts (50 example of each emotion, llama3.2:3b, English text):
=== Results for system_prompt ===
F1 Score: 0.2963
=== Results for system_few_shot_prompt ===
F1 Score: 0.4389

Added control of generation parameters: temperature: 0.0, top_p: 0.9, top_k: 50, repetition_penalty: 1.1:
=== Results for system_prompt ===
F1 Score: 0.3159
=== Results for system_few_shot_prompt ===
F1 Score: 0.4031

Changed repetition_penalty: 1.0:
=== Results for system_prompt ===
F1 Score: 0.3412
=== Results for system_few_shot_prompt ===
F1 Score: 0.4083

Changed repetition_penalty: 1.0, top_k: 400:
=== Results for system_prompt ===
F1 Score: 0.3059
=== Results for system_few_shot_prompt ===
F1 Score: 0.4225

Changed 10 examples of each emotion:
=== Results for system_prompt ===
F1 Score: 0.3364
=== Results for system_few_shot_prompt ===
F1 Score: 0.4327

Added “context_length”: 100:
=== Results for system_prompt ===
F1 Score: 0.3055
=== Results for system_few_shot_prompt ===
F1 Score: 0.4563

Try “context_length”: 2048:
=== Results for system_prompt ===
F1 Score: 0.2639
=== Results for system_few_shot_prompt ===
F1 Score: 0.4399

Try “context_length”: 1000:
=== Results for system_prompt ===
F1 Score: 0.3256
=== Results for system_few_shot_prompt ===
F1 Score: 0.4634

Trying “context_length”: 1500:
=== Results for system_prompt ===
F1 Score: 0.3230
=== Results for system_few_shot_prompt ===
F1 Score: 0.3959

Trying “context_length”: 500:
=== Results for system_prompt ===
F1 Score: 0.2752
=== Results for system_few_shot_prompt ===
F1 Score: 0.4227

Trying “context_length”: 750:
=== Results for system_prompt ===
F1 Score: 0.3038
=== Results for system_few_shot_prompt ===
F1 Score: 0.4070

Trying “context_length”: 1250:
=== Results for system_prompt ===
F1 Score: 0.2690
=== Results for system_few_shot_prompt ===
F1 Score: 0.4600

Trying “context_length”: 1125:
=== Results for system_prompt ===
F1 Score: 0.2954
=== Results for system_few_shot_prompt ===
F1 Score: 0.4133

Best result with context_length 1000 - I leave it
"""




"""
llama3.3:latest, GROUP_SIZE = 5, "temperature": 0.01, "repetition_penalty": 1.0, "max_tokens": 1000, "context_length": 2048
f1: 0,43

GROUP_SIZE = 10, "repetition_penalty": 0.5
f1: 0,45

GROUP_SIZE = 7, "temperature": 0, "repetition_penalty": 0.1, slightly changed the prompt - in user_content I repeat the task. 
f1: 0,46

"repetition_penalty": 0.2, "max_tokens": 5000
f1: 0,46

GROUP_SIZE = 10, "temperature": 0.1, "top_p": 0.9, "repetition_penalty": 0.2, "max_tokens": 2000
f1: 0,4634

"temperature": 0.0, "top_k": 400,
f1: 0,4714
"""