# 快速开始

## 1. 准备模型权重

### huggingface下载

In [None]:
!pip install -U huggingface_hub

# 拉取模型至 Shanghai_AI_Laboratory/internlm2-chat-7b
!huggingface-cli download internlm/internlm2-chat-7b \
                            --local-dir Shanghai_AI_Laboratory/internlm2-chat-7b \
                            --local-dir-use-symlinks False \
                            --resume-download

### modelscope下载

In [None]:
!pip install -U modelscope

# 拉取模型至当前目录
!python -c "from modelscope import snapshot_download; snapshot_download('Shanghai_AI_Laboratory/internlm2-chat-7b', cache_dir='.')"

## 2. 准备微调数据集

### Huggingfacexiaz
git clone https://huggingface.co/datasets/burkelibbey/colors

### modelscope下载
git clone https://www.modelscope.cn/datasets/fanqiNO1/colors.git

## 3. 准备配置文件
复制一个配置文件到当前目录  
xtuner copy-cfg internlm2_7b_qlora_colorist_e5 .  
- 模型：internlm2_7b
- 微调方法：qlora
- 微调数据集：colorist
- 训练轮次：5epochs

## 4. 修改配置文件

In [None]:
#######################################################################
#                          PART 1  Settings                           #
#######################################################################
# Model
- pretrained_model_name_or_path = 'internlm/internlm2-7b'
+ pretrained_model_name_or_path = './Shanghai_AI_Laboratory/internlm2-chat-7b'

# Data
- data_path = 'burkelibbey/colors'
+ data_path = './colors/train.jsonl'
- prompt_template = PROMPT_TEMPLATE.default
+ prompt_template = PROMPT_TEMPLATE.internlm2_chat

...
#######################################################################
#                      PART 3  Dataset & Dataloader                   #
#######################################################################
train_dataset = dict(
    type=process_hf_dataset,
-   dataset=dict(type=load_dataset, path=data_path),
+   dataset=dict(type=load_dataset, path='json', data_files=dict(train=data_path)),
    tokenizer=tokenizer,
    max_length=max_length,
    dataset_map_fn=colors_map_fn,
    template_map_fn=dict(
        type=template_map_fn_factory, template=prompt_template),
    remove_unused_columns=True,
    shuffle_before_pack=True,
    pack_to_max_length=pack_to_max_length)

## 5. 启动微调
# 单机单卡
xtuner train ./internlm2_7b_qlora_colorist_e5_copy.py
# 单机多卡
NPROC_PER_NODE=${GPU_NUM} xtuner train ./internlm2_7b_qlora_colorist_e5_copy.py
# slurm 情况
srun ${SRUN_ARGS} xtuner train ./internlm2_7b_qlora_colorist_e5_copy.py --launcher slurm

## 6. 模型转型
训练获得的.pth文件转为huggingface格式

In [None]:
# 创建存放 hf 格式参数的目录
mkdir work_dirs/internlm2_7b_qlora_colorist_e5_copy/iter_720_hf

# 转换格式
xtuner convert pth_to_hf internlm2_7b_qlora_colorist_e5_copy.py \
                            work_dirs/internlm2_7b_qlora_colorist_e5_copy/iter_720.pth \
                            work_dirs/internlm2_7b_qlora_colorist_e5_copy/iter_720_hf

## 7. LoRA合并

In [None]:
# 创建存放合并后的参数的目录
mkdir work_dirs/internlm2_7b_qlora_colorist_e5_copy/merged

# 合并参数
xtuner convert merge Shanghai_AI_Laboratory/internlm2-chat-7b \
                        work_dirs/internlm2_7b_qlora_colorist_e5_copy/iter_720_hf \
                        work_dirs/internlm2_7b_qlora_colorist_e5_copy/merged \
                        --max-shard-size 2GB

## 8. 模型对话

合并后模型

In [None]:
xtuner chat work_dirs/internlm2_7b_qlora_colorist_e5_copy/merged \
                --prompt-template internlm2_chat \
                --system-template colorist

未合并模型，直接与 LLM + LoRA Adapter 进行对话

In [None]:
xtuner chat Shanghai_AI_Laboratory/internlm2-chat-7b
                --adapter work_dirs/internlm2_7b_qlora_colorist_e5_copy/iter_720_hf \
                --prompt-template internlm2_chat \
                --system-template colorist