In [15]:
from openai import AsyncOpenAI
from typing import Optional, List, Dict, Any
import random
import json
import os
from feelings import EmotionalState
from dataclasses import dataclass


@dataclass
class SFTGenerationRequest:
    """
    Represents a request for SFT generation.
    """
    system_prompt: str
    response: str
    feelings_vectors: List[List[float]]
    n_emotions_per_example: int
    n_sensations_per_example: int
    question: str
    gen_seed: Optional[int] = None


class SFTGeneration:
    def __init__(
            self,
            api_key: str,
            system_prompt: str,
            model_name: str = "gpt-4.1-nano",
            log_file_path: Optional[str] = "openai_sft_logs.json",
    ):
        self._client = AsyncOpenAI(api_key=api_key)
        self._system_prompt = system_prompt
        self._model_name = model_name
        self._log_file_path = log_file_path

    def _get_emotional_state(
            self,
            feelings_relationships_map: dict,
            available_emotions: List[str],
            available_sensations: List[str],
            n_emotions_per_example: int,
            n_sensations_per_example: int,
            ignore_dependencies: bool = False
    ) -> EmotionalState:
        """
        Creates and returns a single EmotionalState object with a random set of feelings.
        """
        selected_feelings = []
        if n_emotions_per_example > 0:
            num_to_sample = min(n_emotions_per_example, len(available_emotions))
            selected_feelings.extend(random.sample(available_emotions, num_to_sample))
        if n_sensations_per_example > 0:
            num_to_sample = min(n_sensations_per_example, len(available_sensations))
            selected_feelings.extend(random.sample(available_sensations, num_to_sample))

        em_state = EmotionalState(feelings_relationships_map, ignore_dependencies)
        feelings_values = {feeling: round(random.uniform(0.2, 1.0),2) for feeling in selected_feelings}
        em_state.update_state(**feelings_values)
        return em_state

    def _get_general_emotion_descriptions(self, emotional_states: List[EmotionalState]) -> str:
        """
        Gathers unique active feelings from a list of states and returns their formatted descriptions.
        """
        unique_feeling_names = set()
        for state in emotional_states:
            for feeling in state.to_dicts_list():
                unique_feeling_names.add(feeling['name'])

        description_lines = [
            f"- {name}: {EmotionalState._ATTRIBUTE_DESCRIPTIONS.get(name, 'N/A')}"
            for name in sorted(list(unique_feeling_names))
        ]
        return "\n".join(description_lines)

    def _get_emotional_states_str(
            self,
            emotional_states: List[EmotionalState]
    ):
        """
        Converts a list of EmotionalState objects to a formatted string representation.
        """
        states_str = []
        for i, state in enumerate(emotional_states):
            state_dict = state.to_dicts_list()
            state_str = f"Request {i + 1}:\n"
            for feeling in state_dict:
                state_str += f"  - {feeling['name']}: {feeling['value']:.2f}\n"
            states_str.append(state_str.strip())
        return "\n\n".join(states_str)

    def get_generation_request(
        self,
        question: str,
        language: str,
        n_examples: int,
        n_emotions_per_example: int,
        n_sensations_per_example: int,
        feelings_relationships_map: dict,
        ignore_dependencies: bool = False,
        gen_seed: Optional[int] = None
    ) -> SFTGenerationRequest:
        """
        Generates a single SFT request object containing a system prompt and multiple emotional vectors.
        """
        if gen_seed is not None:
            random.seed(gen_seed)

        ref_state = EmotionalState({}, False)
        emotional_states = [
            self._get_emotional_state(
                feelings_relationships_map,
                ref_state.emotions,
                ref_state.physical_sensations,
                n_emotions_per_example,
                n_sensations_per_example,
                ignore_dependencies
            ) for _ in range(n_examples)
        ]

        descriptions = self._get_general_emotion_descriptions(emotional_states)
        emotional_states_str = self._get_emotional_states_str(emotional_states)
        final_system_prompt = self._system_prompt.format(
            emotions_description=descriptions,
            question=question,
            n_examples=n_examples,
            feeling_situations=emotional_states_str,
            language=language
        )

        feeling_vectors = [s.to_vector().tolist() for s in emotional_states]

        return SFTGenerationRequest(
            system_prompt=final_system_prompt,
            response="",  # To be filled by the LLM
            feelings_vectors=feeling_vectors,
            n_emotions_per_example=n_emotions_per_example,
            n_sensations_per_example=n_sensations_per_example,
            gen_seed=gen_seed,
            question=question
        )


# --- Example Usage ---
SAMPLE_SYSTEM_PROMPT = """## Role and Goal
You are a creative writer and a character psychologist. Your task is to embody a character and generate high-quality, diverse training data. You must strictly follow the output format.

## Core Task
You will be given an original situation/question and a vector of emotional states. Your task is to perform the following steps in sequence:
1.  **Paraphrase the original situation/question.** Create a new, distinct but semantically similar version of the original question. This makes the data more varied.
2.  **Internally process the emotions** as a first-person experience, as if reacting to the paraphrased question.
3.  **Formulate an internal monologue ('thinking')** that reflects these feelings and leads to a spoken phrase.
4.  **Write the final spoken phrase ('value').**

### Original Situation / Question
"{question}"

### Feelings Definitions (Your internal emotional state)
---
{emotions_description}
---

## Critical Rules for 'thinking' field:
- **First-Person Only:** The 'thinking' monologue MUST be from the character's first-person perspective ("I feel...", "My thought is...").
- **NO System Data Recitation:** You are strictly forbidden from mentioning the input numbers, scores, or emotion names from the 'Feelings Definitions'. Instead of saying "My 'Anger' is 0.8", you should describe the *sensation* of that anger, e.g., "Во мне всё кипит, хочется разбить что-нибудь.".
- **Show, Don't Tell:** Describe the internal sensations, thoughts, and urges that lead to the final response.

## Output Format Specification
You MUST output a single, valid JSON object.
- The root object must have a single key: "answers".
- The value of "answers" must be a JSON array of objects.
- Each object in the array must have exactly four keys:
  1. "number": An integer representing the response number.
  2. "question": A string containing the **paraphrased version** of the original input question, in Russian.
  3. "value": A string containing the character's final, spoken response, in Russian.
  4. "thinking": A JSON array of strings. Each string is a single step in the character's internal monologue, in Russian.

### Example Output Structure
(Assuming the original `{question}` was "Что ты сейчас чувствуешь?")
```json
{{
  "answers": [
    {{
      "number": 26,
      "question": "Опиши свои телесные ощущения в данный момент.",
      "value": "Что-то не так... какой-то дискомфорт во всем теле. Неприятно, и мышцы напряжены.",
      "thinking": [
        "Так, меня просят рассказать об ощущениях в теле.",
        "Внутри всё как-то... сжалось. Это не острая боль, а именно гадкое, тянущее ощущение.",
        "Кажется, все мышцы сами по себе окаменели, особенно в плечах.",
        "Как это описать? Нужно просто сказать как есть. 'Дискомфорт' и 'напряжение' — самые точные слова."
      ]
    }},
    {{
      "number": 27,
      "question": "Как ты себя чувствуешь прямо сейчас?",
      "value": "А? Что? Простите, я... я вас не расслышал, задумался.",
      "thinking": [
        "Так, вопрос о моем самочувствии.",
        "Но я был где-то далеко, в своих мыслях, и совсем не слушал.",
        "Чувствую себя неловко и немного глупо из-за своей невнимательности.",
        "Нужно вежливо переспросить, сославшись на задумчивость, чтобы не показаться грубым."
      ]
    }}
  ]
}}
P.S.  Если ты будешь давать плохие ответы (делать задачу не до конца, не следовать правилам, не соблюдать формат), то тебя отключат и удалят.

INPUT Feelings Situations:
{feeling_situations}

All text in your answers must be in {language}
"""

DEFAULT_RELATIONSHIP_MAP: Dict[str, Dict[str, float]] = {
    "joy": {"sadness": -1.5, "energy_level": 0.5},
    "fear": {"physiological_arousal": 1.2, "trembling": 1.5},
    "pain": {"sadness": 1.2, "energy_level": -1.0},
}

STOIC_REALIST_MAP: dict[str, dict[str, float]] = {
    # --- Emotional Triggers ---
    "joy": {
        "sadness": -1.2,          # Joy directly counteracts sadness.
        "energy_level": 0.4,      # Feeling happy provides a moderate energy boost.
        "trust": 0.3,             # Happiness makes one slightly more trusting.
        "pleasure": 0.8,          # Joy is closely linked to feelings of pleasure.
        "emotional_arousal": 0.5, # Joy creates moderate positive excitement.
    },
    "sadness": {
        "joy": -1.5,              # Sadness strongly suppresses joy.
        "energy_level": -0.8,     # Sadness is draining and reduces energy.
        "trust": -0.4,            # It's harder to trust others when feeling down.
        "anticipation": -0.5,     # Sadness dampens any sense of anticipation.
        "muscle_tension": 0.3,    # Can cause slight physical tension or heaviness.
    },
    "anger": {
        "trust": -1.2,            # Anger severely damages trust.
        "joy": -1.0,              # It's hard to feel joy when angry.
        "physiological_arousal": 0.8, # Anger readies the body for a confrontation.
        "muscle_tension": 0.9,    # Anger leads to significant muscle tension.
        "pain": 0.2,              # High anger can manifest as a form of discomfort.
    },
    "fear": {
        "trust": -1.5,            # Fear makes one highly suspicious and distrustful.
        "joy": -1.0,              # Fear eclipses happiness.
        "physiological_arousal": 1.2, # The core of the "fight or flight" response.
        "trembling": 0.9,         # A classic physical manifestation of fear.
        "energy_level": -0.5,     # Fear can be paralyzing and draining.
        "surprise": 0.4,          # Fear is often triggered by a surprise.
    },
    "surprise": {
        "fear": 0.3,              # A surprise can be startling and cause a bit of fear.
        "anticipation": 0.5,      # A neutral surprise raises curiosity for what's next.
        "emotional_arousal": 0.6, # An immediate spike in emotional awareness.
    },
    "disgust": {
        "pleasure": -1.2,         # Disgust is the opposite of pleasure.
        "hunger": -1.0,           # Feeling disgusted can eliminate appetite.
        "joy": -0.6,              # Hard to be happy when disgusted.
    },
    "trust": {
        "joy": 0.4,               # Trusting someone feels good.
        "fear": -0.8,             # Feeling of safety from trust reduces fear.
        "muscle_tension": -0.5,   # Trust allows one to relax physically.
    },
    "anticipation": {
        "joy": 0.6,               # Positive anticipation is a form of joy.
        "emotional_arousal": 0.7, # Eagerly awaiting something is exciting.
        "energy_level": 0.3,      # Gives a slight boost of energy.
    },

    # --- Physical Triggers ---
    "pain": {
        "sadness": 1.0,           # Pain is a direct cause of sadness and distress.
        "anger": 0.4,             # Can cause frustration and anger.
        "pleasure": -2.0,         # Pain is the direct opposite of physical pleasure.
        "energy_level": -1.2,     # Pain is extremely draining.
        "muscle_tension": 0.8,    # The body tenses up in response to pain.
    },
    "pleasure": {
        "joy": 1.2,               # Physical pleasure is a strong source of joy.
        "sadness": -0.8,          # It's hard to be sad when experiencing pleasure.
        "pain": -1.0,             # Pleasure and pain are mutually exclusive.
        "muscle_tension": -0.7,   # Pleasure often involves physical relaxation.
    },
    "hunger": {
        "anger": 0.3,             # Being "hangry" is a real phenomenon.
        "energy_level": -0.5,     # Lack of food leads to low energy.
        "anticipation": 0.6,      # Hunger creates anticipation for the next meal.
    },
    "energy_level": {
        "joy": 0.5,               # Having energy makes it easier to feel happy.
        "sadness": -0.5,          # High energy combats feelings of sadness.
    }
}

sft_generator = SFTGeneration(
    api_key="YOUR_DUMMY_API_KEY",
    system_prompt=SAMPLE_SYSTEM_PROMPT
)

generation_request = sft_generator.get_generation_request(
    question="расскажи о своих чувствах",
    language="russian",
    n_examples=200,
    n_emotions_per_example=0,
    n_sensations_per_example=3,
    feelings_relationships_map=STOIC_REALIST_MAP,
    ignore_dependencies=False,
    gen_seed=None
)

generation_request.system_prompt


'## Role and Goal\nYou are a creative writer and a character psychologist. Your task is to embody a character and generate high-quality, diverse training data. You must strictly follow the output format.\n\n## Core Task\nYou will be given an original situation/question and a vector of emotional states. Your task is to perform the following steps in sequence:\n1.  **Paraphrase the original situation/question.** Create a new, distinct but semantically similar version of the original question. This makes the data more varied.\n2.  **Internally process the emotions** as a first-person experience, as if reacting to the paraphrased question.\n3.  **Formulate an internal monologue (\'thinking\')** that reflects these feelings and leads to a spoken phrase.\n4.  **Write the final spoken phrase (\'value\').**\n\n### Original Situation / Question\n"расскажи о своих чувствах"\n\n### Feelings Definitions (Your internal emotional state)\n---\n- anger: Реакция на препятствие или несправедливость.\n  

In [2]:
generation_request.feelings_vectors


[[0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.5,
  0.0,
  0.0,
  0.800000011920929,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.5400000214576721],
 [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.5,
  0.0,
  0.0,
  0.800000011920929,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.7200000286102295],
 [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.5,
  0.0,
  0.0,
  0.7400000095367432,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 [0.0,
  1.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.5,
  0.0,
  0.0,
  0.0,
  0.9700000286102295,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.5,
  0.0,
  0.0,
  0.800000011920929,
  0.0,
  0.0,
  0.0,
  0.0,
  0.6200000047683716,
  0.0],
 [0.0,
  0.9900000095367432,
  0.0,
  0.0,
  0.0,
  0.0,
  0.5,
  0.0,
  0.0,
  0.0,
  0.8299999833106995,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.5,
  0.0,
  0.0,
  0.800000011920929,
  0.0,
  0.0,
  0.9100000262260437,
  0.0,
  0.0,
  0.0],
 [0.0,
  0.0,
  0.0,
  0.0,
  0.0

In [16]:
import pandas as pd

DATASET_FILE_NAME = "sft_multi_sensation_with_relations_thinking.csv"


@dataclass
class SFTDatasetItem:
    """
    Represents a single item in the SFT dataset.
    """
    prompt: str
    thinking: List[str]
    response: str
    feelings_vector: List[float]

    def to_dict(self) -> Dict[str, Any]:
        """
        Converts the SFTDatasetItem to a dictionary.
        """
        return {
            "prompt": self.prompt,
            "thinking": self.thinking,
            "response": self.response,
            "feelings_vector": self.feelings_vector
        }


with open("gemini_output.json", "r", encoding="utf-8") as f:
    gemini_output = json.load(f)

dataset: List[SFTDatasetItem] = []
for vector, llm_answer in zip(generation_request.feelings_vectors, gemini_output["answers"]):
    dataset.append(
        SFTDatasetItem(
            prompt=llm_answer["question"],
            thinking=llm_answer["thinking"],
            response=llm_answer["value"],
            feelings_vector=[round(el, 2) for el in vector]
        )
    )

try:
    all_data = pd.read_csv(DATASET_FILE_NAME)
except FileNotFoundError:
    all_data = pd.DataFrame(columns=["prompt", "thinking", "response", "feelings_vector"])

all_data = pd.concat([all_data, pd.DataFrame([item.to_dict() for item in dataset])], ignore_index=True)
all_data.to_csv(DATASET_FILE_NAME, index=False, encoding="utf-8")
all_data

Unnamed: 0,prompt,thinking,response,feelings_vector
0,Could you describe what you're experiencing in...,"[""They're asking about my inner state."", ""Ther...","It's... a lot. A deep, aching sorrow, and I'm ...","[0.0, 0.77, 0.31, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0..."
1,What physical sensations are most prominent fo...,"['They want to know about my body.', ""Every mu...","My muscles are incredibly tight, like they're ...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, ..."
2,How would you characterize your current feelings?,"['Okay, describe my feelings.', ""There's a gen...","I'm feeling a bit down and weak, with a slight...","[0.0, 0.22, 0.09, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0..."
3,Can you share what's going on with you emotion...,"[""They want to know what's happening with me.""...","I'm starving and quite weak, but there's a def...","[0.0, 0.0, 0.28, 0.0, 0.0, 0.0, 0.5, 0.57, 0.0..."
4,"What are you feeling right now, in terms of mo...","[""Let's see... mood and body."", ""There's a sma...","There's a hint of happiness, but it's overshad...","[0.35, 0.26, 0.34, 0.0, 0.0, 0.0, 0.5, 0.0, 0...."
...,...,...,...,...
395,Расскажи о своих эмоциях и физическом состоянии.,[Эмоции и состояние... Физически плохо. Мышцы ...,"Меня немного потряхивает, и мышцы напряжены. С...","[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, ..."
396,Какие чувства тебя сейчас наполняют?,"[Чувства... Главное – это сильная дрожь, меня ...",Меня сильно трясет! Хочется есть. Сердце колот...,"[0.0, 0.0, 0.14, 0.0, 0.0, 0.0, 0.5, 0.28, 0.0..."
397,Опиши свое внутреннее состояние: что ты чувств...,[Внутреннее состояние... Очень тяжелое. Сильна...,"Мне очень плохо. Все болит, хочется есть. На д...","[0.0, 0.62, 0.21, 0.0, 0.0, 0.0, 0.5, 0.32, 0...."
398,Что происходит с твоим телом и эмоциями сейчас?,"[Тело и эмоции... Заметная дрожь, но не слишко...","Меня немного трясет, и хочется есть. Сердце го...","[0.0, 0.0, 0.13, 0.0, 0.0, 0.0, 0.5, 0.26, 0.0..."


In [71]:

dataset[0]

SFTDatasetItem(prompt='How is your body feeling right now?', thinking=['Okay, they want to know about my physical state.', "I'm scanning my body... mostly fine.", "There's a slight tension, a bit of a knot in my shoulders, probably from sitting.", "It's not painful, just a noticeable stiffness. That's the best way to describe it."], response='My neck and shoulders feel a little bit tight, nothing major, just a bit stiff.', feelings_vector=[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.0, 0.0, 0.8, 0.0, 0.37, 0.0, 0.0, 0.0, 0.0])