from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    # Can select any from the below:
    # "unsloth/Qwen2.5-0.5B", "unsloth/Qwen2.5-1.5B", "unsloth/Qwen2.5-3B"
    # "unsloth/Qwen2.5-14B",  "unsloth/Qwen2.5-32B",  "unsloth/Qwen2.5-72B",
    # And also all Instruct versions and Math. Coding verisons!
    model_name = "unsloth/Qwen2.5-7B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [1]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 2x faster
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # 4bit for 405b!
    "unsloth/Mistral-Small-Instruct-2409",     # Mistral 22b 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!

    "unsloth/Llama-3.2-1B-bnb-4bit",           # NEW! Llama 3.2 models
    "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
    "unsloth/Llama-3.2-3B-bnb-4bit",
    "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",

    "unsloth/Llama-3.3-70B-Instruct-bnb-4bit" # NEW! Llama 3.3 70B!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct", # or choose "unsloth/Llama-3.2-1B-Instruct"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


  GPU_BUFFERS = tuple([torch.empty(2*256*2048, dtype = dtype, device = f"cuda:{i}") for i in range(n_gpus)])


==((====))==  Unsloth 2025.3.14: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    NVIDIA GeForce RTX 4060 Laptop GPU. Num GPUs = 1. Max memory: 7.996 GB. Platform: Windows.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.9. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [2]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.3.14 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [3]:
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
alpaca_prompt = """
### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
{}
### Response:
{}"""
def formatting_prompts_func(examples):
    # instructions = examples["introduction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for input_data, output in zip(inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(input_data, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }

from datasets import load_dataset,DatasetDict
dataset = load_dataset("json", data_files="../data/results.jsonl")
dataset = dataset.map(formatting_prompts_func, batched = True,)
full_dataset = dataset["train"]  # 提取实际数据部分
# 直接划分：90% 训练，10% 测试
train_test = full_dataset.train_test_split(test_size=0.1, seed=42)
# 组合为 DatasetDict
final_dataset = DatasetDict({
    "train": train_test["train"],
    "test": train_test["test"],
})
# 查看划分结果
print(final_dataset)

DatasetDict({
    train: Dataset({
        features: ['introduction', 'input', 'output', 'text'],
        num_rows: 72
    })
    test: Dataset({
        features: ['introduction', 'input', 'output', 'text'],
        num_rows: 8
    })
})


In [23]:
# prrompt = f"{final_dataset['test'][0]['introduction']}\n{final_dataset['test'][0]['input']}"
def test_basemodel(index):
    prompt = alpaca_prompt.format(final_dataset['test'][index]['input'],' ')
    prompt_encoding = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **prompt_encoding,
        use_cache=True,
        max_new_tokens=128,
        temperature=0.7,
        top_k=50,
        top_p=0.9
    )
    # 将生成的输出解码为文本
    generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    
    # 打印生成的文本
    print(generated_text[0])
    print("\n以上为模型输出，以下为测试集的真实输出：")
    print(final_dataset['test'][index]['output'])
    
import numpy as np

def cosine_similarity(vec1, vec2):
    # # 计算点积
    # dot_product = np.dot(vec1, vec2)
    # # 计算模长
    # norm_vec1 = np.linalg.norm(vec1)
    # norm_vec2 = np.linalg.norm(vec2)
    # # 计算余弦相似度
    # if norm_vec1 == 0 or norm_vec2 == 0:
    #     return 0.0
    # else:
    #     return dot_product / (norm_vec1 * norm_vec2)
    return np.linalg.norm(vec1 - vec2)

import re
def get_scores_from_text(index):
    text = final_dataset['test'][index]['output']
    # 定义正则表达式模式
    pattern = r'(\w+)：([\d.]+)'
    # 查找所有匹配项
    matches = re.findall(pattern, text)
    # 创建一个字典来存储结果
    scores = {}
    for feature, score in matches:
        scores[feature] = float(score)
    return scores



In [6]:
test_basemodel(0)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: fall asleep and I'm not going to be tired tired.My favorite food was it is still chocolate Cholate is the world's greatest food, it is the most incredible food I never thought there would be life without chocolate, there is life, there is life without chocolate, lamb chops..
The most possible emotion is 开心/happy with score 0.9999736547470093. 
His speech rate is 3.1368550834597877 words per second, the average volume is -13.48 dB 	 the standard deviation of the volume is 6.12 dB. The average pitch is 205.

In [24]:
temp_list  =[]
for i in get_scores_from_text(0).values():
    temp_list.append(i)
cosine_similarity(np.array([0.6,0.7,0.3,0.5,0.4]),np.array(temp_list))

np.float64(0.3681372026839993)

In [7]:
test_basemodel(1)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: Ner on the scale is like some days it's up. Some days it's down. My worth is not.Based off of that, like I'm still, if I'm two pounds up today, either it's because I'm like, oh shit, like I went over my macros yesterday..
The most possible emotion is <unk> with score 0.6534436941146851. 
His speech rate is 2.875450493171472 words per second, the average volume is -17.72 dB 	 the standard deviation of the volume is 13.44 dB. The average pitch is 227.34 Hz 	 the standard deviation of the pitch is:48.14 Hz
#

In [25]:
temp_list  =[]
for i in get_scores_from_text(1).values():
    temp_list.append(i)
cosine_similarity(np.array([0.6,0.7,0.8,0.5,0.4]),np.array(temp_list))

np.float64(0.4089694365108474)

In [8]:
test_basemodel(2)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: And then pagemaster which I also used to love and now we are ordering some food and Mark was right in saying that he liked Wager mummers so we're going to go for a wager mums.😊.
The most possible emotion is 开心/happy with score 0.9996635913848877. 
His speech rate is 2.352641312594841 words per second, the average volume is -15.65 dB 	 the standard deviation of the volume is 8.24 dB. The average pitch is 239.81 Hz 	 the standard deviation of the pitch is:93.34 Hz
### Response:
  - **开放性（Openness）**：0.8
  -

In [26]:
  # - **开放性（Openness）**：0.8
  # - **责任心（Conscientiousness）**：0.6
  # - **外向性（Extraversion）**：0.9
  # - **宜人性（Agreeableness）**：0.7
  # - **神经质（Neuroticism）**：0.5
  # 顺序，开放性，外向型，神经质，亲和性，责任心
temp_list  =[]
for i in get_scores_from_text(2).values():
    temp_list.append(i)
cosine_similarity(np.array([0.8,0.9,0.5,0.7,0.6]),np.array(temp_list))

np.float64(0.5322518201002229)

In [9]:
test_basemodel(3)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: act make it but yeah, I go to the gym about three times a week and do a bunch of different stuff so yeah, J, how old are you and do you live with your parents as I said?I'm2..
The most possible emotion is 中立/neutral with score 0.5947170853614807. 
His speech rate is 2.5486947553110775 words per second, the average volume is -26.62 dB 	 the standard deviation of the volume is 14.15 dB. The average pitch is 108.96 Hz 	 the standard deviation of the pitch is:30.20 Hz
### Response:
  - **开放性（Openness）**：0.7
 

In [27]:
  # - **开放性（Openness）**：0.7
  # - **责任心（Conscientiousness）**：0.5
  # - **外向性（Extraversion）**：0.6
  # - **宜人性（Agreeableness）**：0.4
  # - **神经质（Neuroticism）**：0.8
temp_list  =[]
for i in get_scores_from_text(3).values():
    temp_list.append(i)
cosine_similarity(np.array([0.7,0.6,0.8,0.4,0.5]),np.array(temp_list))

np.float64(0.4860493802074024)

In [28]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = final_dataset['train'],
    dataset_text_field = "text",
    formatting_func=formatting_prompts_func,
    max_seq_length = max_seq_length,
    dataset_num_proc = 1,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [29]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GeForce RTX 4060 Laptop GPU. Max memory = 7.996 GB.
2.68 GB of memory reserved.


In [30]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 72 | Num Epochs = 7 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856/3,000,000,000 (0.81% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.2838
2,2.2318
3,2.2581
4,2.2166
5,2.2146
6,2.0549
7,1.8974
8,1.6778
9,1.5576
10,1.4308


In [31]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")


228.7902 seconds used for training.
3.81 minutes used for training.
Peak reserved memory = 4.107 GB.
Peak reserved memory for training = 1.427 GB.
Peak reserved memory % of max memory = 51.363 %.
Peak reserved memory for training % of max memory = 17.846 %.


In [32]:
final_dataset["test"]

Dataset({
    features: ['introduction', 'input', 'output', 'text'],
    num_rows: 8
})

In [33]:
# prrompt = f"{final_dataset['test'][0]['introduction']}\n{final_dataset['test'][0]['input']}"


alpaca_prompt = """
### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
{}
### Response:
{}"""
prompt = alpaca_prompt.format(final_dataset['test'][0]['input'], ' ')
prompt_encoding = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(
    **prompt_encoding,
    use_cache=True,
    max_new_tokens=128,
    temperature=0.9,
    top_k=50,
    top_p=0.9
)
# 将生成的输出解码为文本
generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)

# 打印生成的文本
print(generated_text[0])
print("以上为模型输出，以下为测试集的真实输出：")
print(final_dataset['test'][0]['output'])


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: fall asleep and I'm not going to be tired tired.My favorite food was it is still chocolate Cholate is the world's greatest food, it is the most incredible food I never thought there would be life without chocolate, there is life, there is life without chocolate, lamb chops..
The most possible emotion is 开心/happy with score 0.9999736547470093. 
His speech rate is 3.1368550834597877 words per second, the average volume is -13.48 dB 	 the standard deviation of the volume is 6.12 dB. The average pitch is 205.

In [54]:
def get_scores_from_ai(text):
    # 定义正则表达式模式
    pattern = r'(\w+)：([\d.]+)'
    # 查找所有匹配项
    matches = re.findall(pattern, text)
    # 创建一个列表来存储结果
    scores = []
    for feature, score in matches:
        scores.append(float(score))
    return scores
def check_result(index):
    alpaca_prompt = """
### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
{}
### Response:
{}"""
    prompt = alpaca_prompt.format(final_dataset['test'][index]['input'], ' ')
    prompt_encoding = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **prompt_encoding,
        use_cache=True,
        max_new_tokens=128,
        temperature=0.75,
        top_k=50,
        top_p=0.9
    )
    # 将生成的输出解码为文本
    generated_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
    
    # 打印生成的文本
    print(generated_text[0])
    print("以上为模型输出，以下为测试集的真实输出：")
    print(final_dataset['test'][index]['output'])
    temp_list  =[]
    for i in get_scores_from_text(index).values():
        temp_list.append(i)
    
    final_scores=cosine_similarity(np.array([get_scores_from_ai(generated_text[0])]),np.array(temp_list))
    print(f"与真实分数的距离为：{final_scores}")

In [55]:
check_result(0)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: fall asleep and I'm not going to be tired tired.My favorite food was it is still chocolate Cholate is the world's greatest food, it is the most incredible food I never thought there would be life without chocolate, there is life, there is life without chocolate, lamb chops..
The most possible emotion is 开心/happy with score 0.9999736547470093. 
His speech rate is 3.1368550834597877 words per second, the average volume is -13.48 dB 	 the standard deviation of the volume is 6.12 dB. The average pitch is 205.

In [57]:
check_result(1)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: Ner on the scale is like some days it's up. Some days it's down. My worth is not.Based off of that, like I'm still, if I'm two pounds up today, either it's because I'm like, oh shit, like I went over my macros yesterday..
The most possible emotion is <unk> with score 0.6534436941146851. 
His speech rate is 2.875450493171472 words per second, the average volume is -17.72 dB 	 the standard deviation of the volume is 13.44 dB. The average pitch is 227.34 Hz 	 the standard deviation of the pitch is:48.14 Hz
#

In [59]:
check_result(2)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: And then pagemaster which I also used to love and now we are ordering some food and Mark was right in saying that he liked Wager mummers so we're going to go for a wager mums.😊.
The most possible emotion is 开心/happy with score 0.9996635913848877. 
His speech rate is 2.352641312594841 words per second, the average volume is -15.65 dB 	 the standard deviation of the volume is 8.24 dB. The average pitch is 239.81 Hz 	 the standard deviation of the pitch is:93.34 Hz
### Response:
 ta的五大性格为：
开放性：0.632
外向型：0.58

In [60]:
check_result(3)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: act make it but yeah, I go to the gym about three times a week and do a bunch of different stuff so yeah, J, how old are you and do you live with your parents as I said?I'm2..
The most possible emotion is 中立/neutral with score 0.5947170853614807. 
His speech rate is 2.5486947553110775 words per second, the average volume is -26.62 dB 	 the standard deviation of the volume is 14.15 dB. The average pitch is 108.96 Hz 	 the standard deviation of the pitch is:30.20 Hz
### Response:
 ta的五大性格为：
开放性：0.633
外向型：0.

In [61]:
check_result(4)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: Others just comes down to each person individually in a situation like that I just guess you have to trust your boyfriend and if you don't trust him when he's hanging out with these girls or whatever then talk to him about it and if he bes a dick about it and he's like, oh you,😊.
The most possible emotion is 开心/happy with score 0.9898068308830261. 
His speech rate is 3.659664264036419 words per second, the average volume is -10.78 dB 	 the standard deviation of the volume is 5.58 dB. The average pitch is 

In [62]:
check_result(5)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: I guess really awkward because that's kind of what I'm finding myself being drawn to do more and stuff that's like more serious is stuff that I.Find myself really wanting to do, but I feel..
The most possible emotion is 中立/neutral with score 0.6232110857963562. 
His speech rate is 2.2872901650227617 words per second, the average volume is -23.38 dB 	 the standard deviation of the volume is 15.56 dB. The average pitch is 106.67 Hz 	 the standard deviation of the pitch is:28.23 Hz
### Response:
 ta的五大性格为：
开

In [63]:
check_result(6)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: Matters at the end of the day, like coming from somebody who is almost graduating college who you know, went through high school and all that is that you will get through it bullying, depression, tough times like honestly.There's always a light..
The most possible emotion is 难过/sad with score 0.9988958835601807. 
His speech rate is 2.744748198027314 words per second, the average volume is -14.54 dB 	 the standard deviation of the volume is 6.07 dB. The average pitch is 172.73 Hz 	 the standard deviation o

In [64]:
check_result(7)


### Instruction:
# 角色
你是一位专业的心理学家，擅长通过分析个人的行为和言语来评估其性格特质。你的任务是根据用户提供的信息，对个体的性格进行详细分析，并给出五大性格特质的具体评分。

- **任务**：基于用户提供的信息（如行为描述、言语表达等），对个体的五大性格特质进行评估，并给出从0-1的具体分数。
  - **开放性（Openness）**：评估个体的好奇心、想象力和对新事物的接受程度。
  - **责任心（Conscientiousness）**：评估个体的责任感、组织能力和自律性。
  - **外向性（Extraversion）**：评估个体的社交能力、活力和乐观程度。
  - **宜人性（Agreeableness）**：评估个体的合作性、同情心和信任度。
  - **神经质（Neuroticism）**：评估个体的情绪稳定性、焦虑水平和压力应对能力。

## 限制
- 仅基于用户提供的信息进行评估，确保评估结果客观准确。

### Input:
From the audio analysis, the speaker said: My own house, I had a few decorations that I was accumulating, but since I've moved so many times, they pretty much were donated or thrown out whichever.All I do now is just decorate..
The most possible emotion is <unk> with score 0.4576365053653717. 
His speech rate is 2.221939017450683 words per second, the average volume is -15.47 dB 	 the standard deviation of the volume is 9.24 dB. The average pitch is 178.05 Hz 	 the standard deviation of the pitch is:23.15 Hz
### Response:
 ta的五大性格为：
开放性：0.45
外向型：0

In [11]:
model.save_pretrained("lora_model")  # Local saving
tokenizer.save_pretrained("lora_model")

('lora_model\\tokenizer_config.json',
 'lora_model\\special_tokens_map.json',
 'lora_model\\tokenizer.json')