In [None]:
!pip install --force-reinstall -U ipywidgets
!pip install --force-reinstall unsloth

!pip3 install --force-reinstall torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118

In [None]:
import unsloth


In [None]:
# import unsloth

import torch

print(f"Версия PyTorch: {torch.__version__}")
print(f"Доступна ли CUDA: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"Версия CUDA для PyTorch: {torch.version.cuda}")
    print(f"Имя GPU: {torch.cuda.get_device_name(0)}")
else:
    print(">>> CUDA недоступна. Установлена CPU-версия PyTorch или есть проблема с совместимостью.")

In [1]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import ast
from datasets import Dataset
from typing import Optional, List, Dict, Any

import unsloth
from unsloth import FastLanguageModel
from transformers import AutoConfig, TrainingArguments, DataCollatorForLanguageModeling
from trl import SFTTrainer

# ==============================================================================
# 1. DEFINITION OF THE CUSTOM MODEL WRAPPER
# (This class remains unchanged)
# ==============================================================================
class EmotionUnslothModel(nn.Module):
    """
    An unsloth-optimized wrapper that includes a trainable vector projector.
    This class takes a raw, fixed-size emotion vector, projects it to the
    model's hidden dimension, and then injects it into the forward pass.
    """
    def __init__(
        self,
        model_name_or_path: str,
        raw_emotion_vector_size: int,
        lora_rank: int = 16,
        lora_alpha: int = 16,
        use_4bit: bool = True,
        max_seq_length: int = 2048,
    ):
        """
        Initializes the EmotionUnslothModel with a vector projector.

        Args:
            model_name_or_path (str): The name or path of the base model.
            raw_emotion_vector_size (int): The dimension of the input emotion vector.
            lora_rank (int): The rank for LoRA decomposition.
            lora_alpha (int): The alpha parameter for LoRA.
            use_4bit (bool): Whether to load the model in 4-bit.
            max_seq_length (int): The maximum sequence length for the model.
        """
        super().__init__()
        self.model, self.tokenizer = FastLanguageModel.from_pretrained(
            model_name=model_name_or_path,
            max_seq_length=max_seq_length,
            load_in_4bit=use_4bit,
            cache_dir="./model_cache",
        )
        model_hidden_size = self.model.config.hidden_size
        self.vector_projector = nn.Linear(
            in_features=raw_emotion_vector_size,
            out_features=model_hidden_size,
            bias=False
        )
        self.vector_projector.to("cuda", self.model.dtype)
        self.peft_model = FastLanguageModel.get_peft_model(
            self.model,
            r=lora_rank,
            lora_alpha=lora_alpha,
            lora_dropout=0,
            bias="none",
            use_gradient_checkpointing=True,
            random_state=42,
            target_modules=[
                "q_proj", "k_proj", "v_proj", "o_proj",
                "gate_proj", "up_proj", "down_proj"
            ],
        )
        for param in self.vector_projector.parameters():
            param.requires_grad = True
        self.peft_model.print_trainable_parameters()

    def forward(
        self,
        input_ids: torch.LongTensor,
        attention_mask: torch.Tensor,
        emotion_vector: torch.Tensor,
        labels: Optional[torch.LongTensor] = None,
        **kwargs,
    ) -> Dict[str, torch.Tensor]:
        """
        Performs the forward pass with projection and injection.
        """
        projected_vector = self.vector_projector(emotion_vector)
        embedding_layer = self.peft_model.get_input_embeddings()
        token_embeddings = embedding_layer(input_ids)
        combined_embeddings = token_embeddings + projected_vector.unsqueeze(1)
        model_outputs = self.peft_model(
            inputs_embeds=combined_embeddings,
            attention_mask=attention_mask,
            labels=labels,
            return_dict=True
        )
        return model_outputs

# ==============================================================================
# 2. DEFINITION OF THE CUSTOM DATA COLLATOR
# (This class remains unchanged)
# ==============================================================================
class DataCollatorForEmotionLM(DataCollatorForLanguageModeling):
    """
    Custom data collator that handles tokenizing text and stacking emotion vectors.
    """
    def __call__(
        self,
        features: List[Dict[str, Any]]
    ) -> Dict[str, Any]:
        """
        Processes a list of features to create a batch.
        """
        emotion_vectors = [feature.pop("emotion_vector") for feature in features]
        batch = super().__call__(features)
        batch['emotion_vector'] = torch.stack(emotion_vectors)
        return batch


def process_and_tokenize_example(
    example: Dict[str, Any]
) -> Dict[str, Any]:
    """
    Processes a single dataset example by formatting text, parsing the
    emotion vector, and tokenizing the text.
    """
    thinking = "\n".join(ast.literal_eval(example["thinking"]))
    assistant_output = f"<thinking>{thinking}</thinking>\n{example['response']}"
    formatted_text = prompt_template.format(example["prompt"], assistant_output)
    vector_as_list = ast.literal_eval(example["feelings_vector"])
    emotion_vector = torch.tensor(vector_as_list, dtype=model_dtype)
    tokenized_example = tokenizer(
        formatted_text,
        truncation=True,
        max_length=MAX_SEQ_LENGTH,
        padding=False,
        return_tensors=None,
    )
    tokenized_example["labels"] = tokenized_example["input_ids"][:]
    tokenized_example["emotion_vector"] = emotion_vector
    return tokenized_example

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.4.1+cu124 with CUDA 1204 (you have 2.6.0+cu118)
    Python  3.12.5 (you have 3.12.10)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


Unsloth: Failed to patch Gemma3ForConditionalGeneration.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
MODEL_NAME = "unsloth/Qwen3-0.6B-unsloth-bnb-4bit"
MAX_SEQ_LENGTH = 2048
RAW_EMOTION_VECTOR_SIZE = 16

# --- Model Initialization ---
print("Initializing the model...")
emotion_model_wrapper = EmotionUnslothModel(
    model_name_or_path=MODEL_NAME,
    raw_emotion_vector_size=RAW_EMOTION_VECTOR_SIZE,
    max_seq_length=MAX_SEQ_LENGTH
)
model_dtype = emotion_model_wrapper.model.dtype
tokenizer = emotion_model_wrapper.tokenizer

Initializing the model...


  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"cuda:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.3.19: Fast Qwen3 patching. Transformers: 4.51.3.
   \\   /|    NVIDIA GeForce RTX 3090. Num GPUs = 1. Max memory: 24.0 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.6.0+cu118. CUDA: 8.6. CUDA Toolkit: 11.8. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Making `model.base_model.model.model` require gradients
trainable params: 10,092,544 || all params: 606,142,464 || trainable%: 1.6650


In [3]:
DATA_FILE_PATH = "sft_one_emotion_thinking.csv"

# --- Data Preparation ---
print(f"Preparing the dataset from '{DATA_FILE_PATH}'...")
prompt_template = "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n{}<|im_end|>"
raw_dataset = Dataset.from_csv(DATA_FILE_PATH)

def process_and_tokenize_example(
    example: Dict[str, Any]
) -> Dict[str, Any]:
    """
    Processes a single dataset example by formatting text, parsing the
    emotion vector, and tokenizing the text.
    """
    thinking = "\n".join(ast.literal_eval(example["thinking"]))
    assistant_output = f"<thinking>{thinking}</thinking>\n{example['response']}"
    formatted_text = prompt_template.format(example["prompt"], assistant_output)
    vector_as_list = ast.literal_eval(example["feelings_vector"])
    emotion_vector = torch.tensor(vector_as_list, dtype=model_dtype)
    tokenized_example = tokenizer(
        formatted_text,
        truncation=True,
        max_length=MAX_SEQ_LENGTH,
        padding=False,
        return_tensors=None,
    )
    tokenized_example["labels"] = tokenized_example["input_ids"][:]
    tokenized_example["emotion_vector"] = emotion_vector
    return tokenized_example

tokenized_dataset = raw_dataset.map(
    process_and_tokenize_example,
    remove_columns=raw_dataset.column_names
)


Preparing the dataset from 'sft_one_emotion_thinking.csv'...


Map:   0%|          | 0/600 [00:00<?, ? examples/s]

In [6]:


# --- Inference Example ---
print("\n--- Пример генерации текста (инференс) ---")

inference_example = raw_dataset[0]
inference_prompt = inference_example["prompt"]
inference_vector_str = inference_example["feelings_vector"]
inference_vector_list = ast.literal_eval(inference_vector_str)

print(f"Prompt: '{inference_prompt}'")
print(f"Using feelings_vector: {inference_vector_list}...")

formatted_prompt_for_inference = prompt_template.format(inference_prompt, "")
inputs = tokenizer(formatted_prompt_for_inference, return_tensors="pt").to("cuda")

emotion_vector_tensor = torch.tensor([inference_vector_list], dtype=model_dtype).to("cuda")
projected_vector = emotion_model_wrapper.vector_projector(emotion_vector_tensor)
embedding_layer = emotion_model_wrapper.peft_model.get_input_embeddings()
token_embeddings = embedding_layer(inputs.input_ids)
combined_embeddings = token_embeddings + projected_vector.unsqueeze(1)

# --- CORRECTED CALL TO .generate() ---
# Unsloth's fast generation path requires `input_ids` to be passed,
# even when providing `inputs_embeds`.
generated_ids = emotion_model_wrapper.peft_model.generate(
    input_ids=inputs.input_ids,  # This argument is required by Unsloth
    inputs_embeds=combined_embeddings,
    attention_mask=inputs.attention_mask,
    max_new_tokens=100,
    use_cache=True,
    do_sample=True,
    top_p=0.9,
    temperature=0.7,
    pad_token_id=tokenizer.eos_token_id,
)
full_generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=False)
assistant_part = full_generated_text.split("<|im_start|>assistant\n")[-1]

print("\n--- Сгенерированный текст ---")
print(assistant_part)



--- Пример генерации текста (инференс) ---
Prompt: 'How are you feeling at this moment?'
Using feelings_vector: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.77, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]...

--- Сгенерированный текст ---
<|im_end|>ícuenciaincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincincinc


In [7]:
inference_example

{'prompt': 'How are you feeling at this moment?',
 'thinking': '["They\'re asking how I am.", \'I feel this incredible buzz, a thrumming energy deep inside me.\', "It\'s like waiting for the curtain to rise on the best show ever.", "My mind is racing with all the wonderful possibilities. It\'s so close!", \'I should convey this excitement, this feeling that something great is just around the corner.\']',
 'response': "I'm on the edge of my seat! Something amazing is about to happen, I can feel it.",
 'feelings_vector': '[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.77, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'}

In [8]:
# ==============================================================================
# 3. MAIN TRAINING SCRIPT
# ==============================================================================

# --- Trainer Setup ---
print("Setting up the trainer...")
data_collator = DataCollatorForEmotionLM(
    tokenizer=tokenizer,
    mlm=False
)

trainer = SFTTrainer(
    model=emotion_model_wrapper.peft_model,
    tokenizer=tokenizer,
    train_dataset=tokenized_dataset,
    data_collator=data_collator,
    max_seq_length=MAX_SEQ_LENGTH,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_ratio=0.1,
        num_train_epochs=5,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_strategy="steps",
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=42,
        output_dir="outputs",
        save_strategy="epoch",
    ),
)

# --- Start Training ---
print("Starting training...")
trainer.train()
print("Training finished!")

# --- Inference Example ---
print("\n--- Пример генерации текста (инференс) ---")



print(f"Prompt: '{inference_prompt}'")
print(f"Using feelings_vector: {inference_vector_list}...")


formatted_prompt_for_inference = prompt_template.format(inference_prompt, "")
inputs = tokenizer(formatted_prompt_for_inference, return_tensors="pt").to("cuda")

emotion_vector_tensor = torch.tensor([inference_vector_list], dtype=model_dtype).to("cuda")
projected_vector = emotion_model_wrapper.vector_projector(emotion_vector_tensor)
embedding_layer = emotion_model_wrapper.peft_model.get_input_embeddings()
token_embeddings = embedding_layer(inputs.input_ids)
combined_embeddings = token_embeddings + projected_vector.unsqueeze(1)

# --- CORRECTED CALL TO .generate() ---
# Unsloth's fast generation path requires `input_ids` to be passed,
# even when providing `inputs_embeds`.
generated_ids = emotion_model_wrapper.peft_model.generate(
    input_ids=inputs.input_ids,  # This argument is required by Unsloth
    inputs_embeds=combined_embeddings,
    attention_mask=inputs.attention_mask,
    max_new_tokens=100,
    use_cache=True,
    do_sample=True,
    top_p=0.9,
    temperature=0.7,
    pad_token_id=tokenizer.eos_token_id,
)

full_generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
assistant_part = full_generated_text.split("<|im_start|>assistant\n")[-1]

print("\n--- Сгенерированный текст ---")
print(full_generated_text)




Setting up the trainer...
Starting training...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 600 | Num Epochs = 5 | Total steps = 375
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 10,092,544/6,000,000,000 (0.17% trained)
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Step,Training Loss
10,4.4914
20,3.3559
30,2.3567
40,1.9114
50,1.6591
60,1.5863
70,1.5094
80,1.3684
90,1.2826
100,1.2443


Training finished!

--- Пример генерации текста (инференс) ---
Prompt: 'How are you feeling at this moment?'
Using feelings_vector: [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.5, 0.77, 0.0, 0.8, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]...

--- Сгенерированный текст ---
system
You are a helpful assistant.
user
How are you feeling at this moment?
assistant
脐{}kökökökökökökökökökökökökökökökökö dokładnieodakökökökökökökökökökökökökökö sekököyyyyyyyyyyyyyyyyyyyyyyy(kkökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökökö


In [None]:
import pandas as pd
import datasets

ASSISTANT_TEMPLATE = """<|im_start|>user\n{user_prompt}<|im_end|><|im_start|>assistant\n<think>{thinking}</think>{assistant_answer}<|im_end|>\n"""
ASSISTANT_TEMPLATE = """\n<think>{thinking}</think>{assistant_answer}\n"""

def generate_conversation(examples):
    user_message = examples["prompt"]
    thinking = examples["thinking"]
    response = examples["response"]

    conversations = []
    for index in range(len(user_message)):
        thinking_text = "\n".join(eval(thinking[index]))
        assistant_answer = ASSISTANT_TEMPLATE.format(
            user_prompt=user_message[index],
            thinking=thinking_text,
            assistant_answer=response[index]
        )
        conversations.append([
            {"role" : "user",      "content" : user_message[index]},
            {"role" : "assistant", "content" : assistant_answer},
        ])
    return { "conversations": conversations, }


one_emotion_dataset  = pd.read_csv("sft_one_emotion_thinking.csv")
one_emotion_dataset = datasets.Dataset.from_pandas(one_emotion_dataset)
one_emotion_dataset = one_emotion_dataset.map(
    generate_conversation,
    remove_columns=one_emotion_dataset.column_names,
    batched=True,
    batch_size=1000,
)

In [None]:
import pandas as pd
one_emotion_dataset  = pd.read_csv("sft_one_emotion_thinking.csv")
one_emotion_dataset

In [None]:
one_emotion_dataset