In [None]:
!pip install unsloth openai


In [None]:
import os
# os.environ['HF_HOME'] = '/root/autodl-tmp/cache/'
# os.environ["HF_ENDPOINT"] = "https://hf-mirror.com"

In [None]:

import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from openai import OpenAI
import time
from tqdm import tqdm


In [None]:
# 读取环境变量中的 API Key  Kaggle 方式读取 API Key
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

# 获取 DeepSeek API Key
api_key = user_secrets.get_secret("DeepSeek_API_KEY")


client = OpenAI(
    api_key=api_key,
    base_url="https://api.deepseek.com"
)


In [None]:
max_seq_length = 2048
dtype = torch.float16  # 适用于 4-bit 量化
load_in_4bit = True
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)


In [None]:
prompt_style = """Below is an instruction that describes a task, paired with
an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by
step chain of thoughts to ensure a logical and accurate response.
### Instruction:
You are an expert in sentiment analysis with advanced knowledge in understanding
and interpreting emotions from text.
Please analyze the sentiment of the following text and output 0 (negative) or 1 (positive).
### Text:
{}
### Response:
<think>{}"""

train_prompt_style = """Below is an instruction that describes a task, paired with
an input that provides further context.
Write a response that appropriately completes the request.
Before answering, think carefully about the question and create a step-by
step chain of thoughts to ensure a logical and accurate response.
### Instruction:
You are an expert in sentiment analysis with advanced knowledge in understanding
and interpreting emotions from text.
Please analyze the sentiment of the following text and output 0 (negative) or 1 (positive).
### Text:
{} 
### Response: 
<think> 
{} 
</think> 
{}"""


In [None]:
import random
import time

def generate_cot(text, label, max_retries=10):
    """
    生成思维链的函数，使用二进制指数退避策略进行 API 调用重试。
    :param text: 需要分析的文本
    :param label: 情感标签（0/1）
    :param max_retries: 最大重试次数
    :return: 生成的 CoT 字符串
    """
    sentiment_map = {0: "negative", 1: "positive"}

    prompt = f"""As a sentiment analysis expert, generate a step-by-step Chain of Thought (CoT) in English to explain why the following text is {sentiment_map[label]}. 
The CoT should follow this structure:
1. Identify key sentiment-bearing words/phrases
2. Analyze contextual clues
3. Consider linguistic patterns
4. Synthesize overall sentiment
5. Conclude with the final sentiment label (0 for negative, 1 for positive)

Text: {text}
CoT:"""

    base_delay = 2  # 初始等待时间
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="deepseek-chat",
                messages=[
                    {"role": "system", "content": "You are an expert in sentiment analysis and logical reasoning."},
                    {"role": "user", "content": prompt}
                ],
                temperature=0.4,
                max_tokens=300,
                stream=False
            )
            return response.choices[0].message.content.strip()
        except Exception as e:
            if attempt < max_retries - 1:
                wait_time = base_delay * (2 ** attempt) + random.uniform(0, 1)  # 指数退避 + 随机抖动
                print(f"Attempt {attempt+1} failed, retrying in {wait_time:.2f} seconds...")
                time.sleep(wait_time)
            else:
                print(f"Failed after {max_retries} attempts: {str(e)}")
                return "CoT generation failed"



In [None]:
def add_cot_to_dataset(dataset, sample_size=None):
    """
    为数据集添加CoT字段
    :param dataset: 原始数据集
    :param sample_size: 采样数量（测试时使用）
    :return: 包含CoT的新数据集
    """
    texts = dataset["sentence"]  # SST-2数据集的文本字段是"sentence"
    labels = dataset["label"]

    cots = []
    for text, label in tqdm(zip(texts, labels), total=len(texts)):
        cot = generate_cot(text, label)
        cots.append(cot)
        time.sleep(1)  # 控制API调用频率

    return dataset.add_column("Complex_CoT", cots)


In [None]:
dataset = load_dataset("glue", "sst2", split="train[:500]")#使用500条数据训练

# 生成CoT数据
enhanced_dataset = add_cot_to_dataset(dataset)


In [None]:
EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    inputs = examples["sentence"]
    cots = examples["Complex_CoT"]
    outputs = examples["label"]
    texts = []
    for input, cot, output in zip(inputs, cots, outputs):
        text = train_prompt_style.format(input, cot, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

formatted_dataset = enhanced_dataset.map(formatting_prompts_func, batched=True)


In [None]:
print(formatted_dataset[0]['Complex_CoT'])

In [None]:
print(formatted_dataset[0]['text'])

In [None]:
def inference_example(text):
    FastLanguageModel.for_inference(model)
    inputs = tokenizer([prompt_style.format(text, "")], return_tensors="pt").to("cuda")
    outputs = model.generate(
        input_ids=inputs.input_ids,
        attention_mask=inputs.attention_mask,
        max_new_tokens=1200,
        use_cache=True,
    )
    response = tokenizer.batch_decode(outputs)
    return response[0].split("### Response:")[1]


# 训练前推理示例
text = "I absolutely loved the movie! The acting was superb and the storyline was captivating."
print("训练前推理结果：")
print(inference_example(text))

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)


In [None]:
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=formatted_dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        # warmup_steps=5,
        # max_steps=60, #max_steps=60：总共训练 60 步（适用于短暂测试，完整训练一般使用 num_train_epochs）
        num_train_epochs = 1,
        warmup_ratio = 0.1,
        learning_rate=2e-4,
        # fp16=not is_bfloat16_supported(), #fp16=not is_bfloat16_supported()：如果不支持 bfloat16，就使用 fp16。
        # bf16=is_bfloat16_supported(), #bf16=is_bfloat16_supported()：如果支持 bfloat16，就使用 bf16。
        fp16=True,
        logging_steps=10,
        optim="adamw_8bit",#采用 AdamW 优化器的 8-bit 版本（减少显存占用，提高大模型训练效率）
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none"  # 关闭 wandb
    ),
)

trainer_stats = trainer.train()


In [None]:
print("训练后推理结果：")
print(inference_example(text))

In [None]:
# 使用你的 Hugging Face 令牌登录
hf_token = user_secrets.get_secret("HF_TOKEN")
login(token=hf_token)
new_model_local = "DeepSeek-R1-Sentiment-COT"
model.save_pretrained(new_model_local)
tokenizer.save_pretrained(new_model_local)
model.save_pretrained_merged(new_model_local, tokenizer, save_method="merged_16bit")


In [None]:
new_model_online = "MelodyOfTears/DeepSeek-R1-Sentiment-COT"
model.push_to_hub(new_model_online)
tokenizer.push_to_hub(new_model_online)
model.push_to_hub_merged(new_model_online, tokenizer, save_method="merged_16bit")