In [1]:
# !pip install unsloth openai datasets transformers trl


In [2]:
import os
os.environ['HF_HOME'] = '/root/autodl-tmp/cache/'
os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

In [3]:

import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from openai import OpenAI
import time
from tqdm import tqdm


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth Zoo will now patch everything to make training faster!


In [4]:
# 读取环境变量中的 API Key
api_key = os.environ.get("DeepSeek_API_KEY")


client = OpenAI(
    api_key=api_key,
    base_url="https://api.deepseek.com"
)


In [5]:
max_seq_length = 2048
dtype = torch.float16  # 适用于 4-bit 量化
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)


==((====))==  Unsloth 2025.3.10: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    NVIDIA vGPU-32GB. Num GPUs = 1. Max memory: 31.503 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [6]:
prompt_style = """Below is an instruction that describes a task, paired with
an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by
step chain of thoughts to ensure a logical and accurate response.
### Instruction:
You are an expert in sentiment analysis with advanced knowledge in understanding
and interpreting emotions from text.
Please analyze the sentiment of the following text and output 0 (negative) or 1 (positive).
### Text:
{}
### Response:
<think>{}"""

train_prompt_style = """Below is an instruction that describes a task, paired with
an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by
step chain of thoughts to ensure a logical and accurate response.
### Instruction:
You are an expert in sentiment analysis with advanced knowledge in understanding
and interpreting emotions from text.
Please analyze the sentiment of the following text and output 0 (negative) or 1 (positive).
### Text:
{} 
### Response: 
<think> 
{} 
</think> 
{}"""


In [7]:
def generate_cot(text, label, max_retries=3):
    """
    生成思维链的函数
    :param text: 需要分析的文本
    :param label: 情感标签（0/1）
    :param max_retries: 最大重试次数
    :return: 生成的CoT字符串
    """
    sentiment_map = {0: "negative", 1: "positive"}

    prompt = f"""As a sentiment analysis expert, generate a step-by-step Chain of Thought (CoT) in English to explain why the following text is {sentiment_map[label]}. 
The CoT should follow this structure:
1. Identify key sentiment-bearing words/phrases
2. Analyze contextual clues
3. Consider linguistic patterns
4. Synthesize overall sentiment
5. Conclude with the final sentiment label (0 for negative, 1 for positive)

Text: {text}
CoT:"""

    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="deepseek-chat",
                messages=[
                    {"role": "system", "content": "You are an expert in sentiment analysis and logical reasoning."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.7,
                max_tokens=300,
                stream=False
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            if attempt < max_retries - 1:
                print(f"Attempt {attempt+1} failed, retrying...")
                time.sleep(2)
            else:
                print(f"Failed after {max_retries} attempts: {str(e)}")
                return "CoT generation failed"


In [8]:
def add_cot_to_dataset(dataset, sample_size=None):
    """
    为数据集添加CoT字段
    :param dataset: 原始数据集
    :param sample_size: 采样数量（测试时使用）
    :return: 包含CoT的新数据集
    """
    if sample_size:
        dataset = dataset.select(range(min(sample_size, len(dataset))))

    texts = dataset["sentence"]  # SST-2数据集的文本字段是"sentence"
    labels = dataset["label"]

    cots = []
    for text, label in tqdm(zip(texts, labels), total=len(texts)):
        cot = generate_cot(text, label)
        cots.append(cot)
        time.sleep(1)  # 控制API调用频率

    return dataset.add_column("Complex_CoT", cots)


In [9]:
dataset = load_dataset("glue", "sst2", split="train[:500]")#使用500条数据训练

# 生成CoT数据
enhanced_dataset = add_cot_to_dataset(dataset, sample_size=10)


100%|██████████| 10/10 [03:11<00:00, 19.13s/it]


In [10]:
EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    inputs = examples["sentence"]
    cots = examples["Complex_CoT"]
    outputs = examples["label"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

formatted_dataset = enhanced_dataset.map(formatting_prompts_func, batched=True)


Map: 100%|██████████| 10/10 [00:00<00:00, 1403.06 examples/s]


In [11]:
print(formatted_dataset[0]['Complex_CoT'])

1. **Identify key sentiment-bearing words/phrases**:  
   - "hide" implies concealment or secrecy, which often carries a negative connotation.  
   - "new secretions" is an unusual and somewhat unsettling phrase, suggesting something unnatural or undesirable.  
   - "parental units" is a detached and impersonal way to refer to parents, which can indicate emotional distance or resentment.  

2. **Analyze contextual clues**:  
   - The act of hiding something from parents suggests a lack of trust or fear of judgment.  
   - The phrase "new secretions" is ambiguous but evokes a sense of something being hidden because it is inappropriate, embarrassing, or harmful.  
   - The use of "parental units" instead of "parents" adds a cold, clinical tone, further distancing the speaker emotionally.  

3. **Consider linguistic patterns**:  
   - The choice of words like "hide" and "secretions" creates a sense of discomfort or unease.  
   - The impersonal term "parental units" dehumanizes the parent

In [12]:
print(formatted_dataset[0]['text'])

Below is an instruction that describes a task, paired with
an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by
step chain of thoughts to ensure a logical and accurate response.
### Instruction:
You are an expert in sentiment analysis with advanced knowledge in understanding
and interpreting emotions from text.
Please analyze the sentiment of the following text and output 0 (negative) or 1 (positive).
### Text:
hide new secretions from the parental units  
### Response: 
<think> 
1. **Identify key sentiment-bearing words/phrases**:  
   - "hide" implies concealment or secrecy, which often carries a negative connotation.  
   - "new secretions" is an unusual and somewhat unsettling phrase, suggesting something unnatural or undesirable.  
   - "parental units" is a detached and impersonal way to refer to parents, which can indicate emotional distance or resentment.  

2

In [13]:
def inference_example(text):
    FastLanguageModel.for_inference(model)
    inputs = tokenizer([prompt_style.format(text, "")], return_tensors="pt").to("cuda")
    outputs = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_new_tokens=1200,
        use_cache=True,
    )
    response = tokenizer.batch_decode(outputs)
    return response[0].split("### Response:")[1]


# 训练前推理示例
text = "I absolutely loved the movie! The acting was superb and the storyline was captivating."
print("训练前推理结果：")
print(inference_example(text))

训练前推理结果：

<think>
Okay, so I need to figure out the sentiment of the given text. The text is: "I absolutely loved the movie! The acting was superb and the storyline was captivating." Hmm, let's break this down.

First, the user starts with "I absolutely loved the movie!" That's a strong positive expression. "Loved" is a very positive word. Then they mention the acting was superb. "Superb" is another positive adjective, indicating high praise for the acting. 

Next, they say the storyline was captivating. "Captivating" implies something that holds attention and is enjoyable, which is another positive aspect. So, all the words used here are positive and express approval and enjoyment.

I don't see any negative words or phrases in there. There's no criticism or negative feedback. The user is clearly expressing enthusiasm and satisfaction with the movie.

So, considering all these points, the sentiment should definitely be positive. Therefore, the score should be 1.
</think>

1<｜end▁of▁sen

In [14]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)


Unsloth 2025.3.10 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [15]:
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        # warmup_steps=5,
        # max_steps=60, #max_steps=60：总共训练 60 步（适用于短暂测试，完整训练一般使用 num_train_epochs）
        num_train_epochs = 1,
        warmup_ratio = 0.1,
        learning_rate=2e-4,
        # fp16=not is_bfloat16_supported(), #fp16=not is_bfloat16_supported()：如果不支持 bfloat16，就使用 fp16。
        # bf16=is_bfloat16_supported(), #bf16=is_bfloat16_supported()：如果支持 bfloat16，就使用 bf16。
        fp16=True,
        logging_steps=10,
        optim="adamw_8bit",#采用 AdamW 优化器的 8-bit 版本（减少显存占用，提高大模型训练效率）
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none"  # 关闭 wandb
    ),
)

trainer_stats = trainer.train()


Unsloth: Tokenizing ["text"] (num_proc=2): 100%|██████████| 10/10 [00:01<00:00,  7.53 examples/s]
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 10 | Num Epochs = 1 | Total steps = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 41,943,040/4,670,623,744 (0.90% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss


In [16]:
print("训练后推理结果：")
print(inference_example(text))

训练后推理结果：

<think>
Okay, so I need to figure out the sentiment of the given text. The user provided a text: "I absolutely loved the movie! The acting was superb and the storyline was captivating." My task is to determine whether the sentiment is negative (0) or positive (1). 

First, I'll break down the text. The phrase "I absolutely loved the movie" immediately stands out. "Loved" is a strong positive word. It expresses a high level of satisfaction or enthusiasm. 

Next, the user mentions the acting was superb. "Superb" is another positive adjective, indicating excellence or outstanding quality. This reinforces the positive sentiment.

Then, the storyline is described as captivating. "Captivating" suggests that the story held the reader's attention and interest, which is another positive trait. 

Looking at the overall structure, the user is giving positive feedback on two key aspects of the movie: acting and storyline. There are no negative words or phrases that would indicate dissati

In [17]:
from huggingface_hub import login

hf_token = os.getenv("HF_TOKEN")  # 前提是设置了token的环境变量
login(token=hf_token)
new_model_local = "DeepSeek-R1-Sentiment-COT"
model.save_pretrained(new_model_local)
tokenizer.save_pretrained(new_model_local)
model.save_pretrained_merged(new_model_local, tokenizer, save_method="merged_16bit")


Unsloth: Merging 4bit and LoRA weights to 16bit...
Unsloth: Will use up to 522.53 out of 755.51 RAM for saving.
Unsloth: Saving model... This might take 5 minutes ...


100%|██████████| 32/32 [00:00<00:00, 41.78it/s]


Unsloth: Saving tokenizer... Done.


KeyboardInterrupt: 

In [None]:

new_model_online = "MelodyOfTears/DeepSeek-R1-Sentiment-COT"
model.push_to_hub(new_model_online)
tokenizer.push_to_hub(new_model_online)
model.push_to_hub_merged(new_model_online, tokenizer, save_method="merged_16bit")