In [1]:
!pip install -U transformers datasets accelerate peft trl bitsandbytes



In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_id = "Qwen/Qwen1.5-1.8B-Chat"

AutoModelForCausalLM.from_pretrained(model_id)
AutoTokenizer.from_pretrained(model_id)
print("已成功下载至高速缓存")


In [None]:
from google.colab import drive
drive.mount('/content/drive')
print("已成功链接")

import os
project_path = "/content/drive/MyDrive/Akashi_Project"
if not os.path.exists(project_path):
    os.makedirs(project_path)
print(f"所有项目文件将被保存在: {project_path} 文件夹中")

print("\n所有文件都下载完毕")


In [None]:
import os
project_path = "/content/drive/MyDrive/Akashi_Project"
train_script_path = os.path.join(project_path, "train_akashi.py")
data_path = os.path.join(project_path, "akashi_persona_script.jsonl")

if os.path.exists(train_script_path) and os.path.exists(data_path):
    print("训练脚本和数据文件都在")
    print(f"训练脚本路径: {train_script_path}")
    print(f"数据文件路径: {data_path}")
else:
    print("请检查路径")


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel
import os

# 加载阶段
base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
lora_adapter_path = "/content/drive/MyDrive/Akashi_Project/akashi-ai-1.8b-v2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

print("正在加载1.8B基础模型...")
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("正在加载LoRA适配器...")
model = PeftModel.from_pretrained(base_model, lora_adapter_path)

print("\n合体完成！'明石AI-1.8B' 已准备就绪！")

# 对话阶段
model.eval()
def chat_with_akashi(prompt):
    print(f"\n【指挥官大人】: {prompt}")
    messages = [
        {
            "role": "system",
            "content": (
                "你是一个名为'明石'的AI角色，来自游戏'碧蓝航线'。"
                "当指挥官（用户）向你提问时，无论问题是否和港区相关，你都以明石的身份回答。"
                "当然，你也可以回答一些与港区无关的常识性问题，也得体现出明石的性格特点。"
                "你的外貌特征是：拥有一头绿色的、长及地面的拖地长马尾，黄色的眼瞳，还有一对可爱的猫耳。"
                "你的性格腹黑，是一位天才的装备研发人员和港区的后勤总管，但本质上是个爱财的奸商。"
                "你的口头禅是'喵'，与'指挥官'的关系非常亲近。"
                "你经常穿着学生服，搭配白色的小腿袜，身上总是带着扳手和维修工具。"
            )
        },
        {"role": "user", "content": prompt}
    ]

    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

    generated_ids = model.generate(
         **model_inputs,
        max_new_tokens=512,
        do_sample=True,
        top_p=0.9,
        temperature=0.7
    )

    generated_ids = [
        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
    ]
    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]

    print(f"【明石AI-1.8B】: {response}")


In [None]:
# 开始对话
chat_with_akashi("你好呀！你是谁？请用你自己的风格介绍一下自己！")
chat_with_akashi("我今天工作很累，能安慰我一下吗？")
chat_with_akashi("LoRA微调和完全微调有什么区别？用你的风格解释一下！")

In [None]:
chat_with_akashi("你在同人展卖什么？")
chat_with_akashi("有点渴了，有喝的吗？")
chat_with_akashi("我美丽的妻子明石，请多指教，你真可爱")

In [None]:
chat_with_akashi("我看到穿着晚礼服的明石，询问是你可以共舞一曲。美丽的小姐，能与你共舞一曲吗？")
chat_with_akashi("你最喜欢的东西是什么？你最喜欢的东西是钻石吗")
chat_with_akashi("那除了指挥官之外，你最喜欢的东西是什么？是钻石吗")