# 1. 准备基础环境

## 1.1 升级Python SDK

In [None]:
# !pip install --upgrade boto3
# !pip install --upgrade sagemaker

## 1.2 获取Runtime资源配置

In [2]:
import boto3
import sagemaker
from sagemaker import get_execution_role

sess = sagemaker.Session()
role = get_execution_role()
sagemaker_default_bucket = sess.default_bucket()

account = sess.boto_session.client("sts").get_caller_identity()["Account"]
region = sess.boto_session.region_name

In [2]:
sagemaker_default_bucket

'sagemaker-us-east-1-568765279027'

# 2. 准备微调模型

## 2.1 准备微调数据
我们使用上一部分的医疗问答数据来做微调。之前我们把数据生成了文本文件。

In [54]:
import json, random
from pathlib import Path

r = random.randint(0, 100)

with open('./contents/med_qa.csv', encoding="utf-8") as f:
    med_qa_file = f.read()

local_cache_path = Path("./finetune_data")
local_cache_path.mkdir(exist_ok=True)

train_file = open('./finetune_data/med_qa_train.json', 'w')
test_file = open('./finetune_data/med_qa_test.json', 'w')

datas = []
for qa_text in med_qa_file.split('\n\n'):
    if qa_text:
        qa_text = qa_text.split('\nA:')
        q = qa_text[0].replace("Q:", "")
        a = qa_text[1]
        line = {"question": q, "answer": a}
        line = json.dumps(line, ensure_ascii=False) + "\n"
        if random.randint(0, 100) < 2:
            test_file.write(line)
        else:
            train_file.write(line)


数据如下所示：
```json
{
    "question": "不是说做b超对宝宝不好吗？那怀孕检查是不？不是说做b超对宝宝不好吗？那怀孕检查是不是越少越好。无麻烦解答，谢谢。",·
    "answer": "B超属于超声波经常检查是不好的而且也没有必要经常检查的一般怀孕两个月检查一下怀孕五个月检查一下快出生时在检查就可以还有就是不舒服检查就可以的"
}
```

In [69]:
%%script env sagemaker_default_bucket=$sagemaker_default_bucket bash

chmod +x ./s5cmd

./s5cmd sync ./finetune_data/ s3://${sagemaker_default_bucket}/llm/datasets/chatglm2/med_qa/

cp finetune_data/med_qa_test.json s3://sagemaker-us-east-1-568765279027/llm/datasets/chatglm2/med_qa/med_qa_test.json
cp finetune_data/med_qa_train.json s3://sagemaker-us-east-1-568765279027/llm/datasets/chatglm2/med_qa/med_qa_train.json


## 2.3 下载ChatGLM2原始模型

In [None]:
!pip install huggingface_hub

In [67]:
from huggingface_hub import snapshot_download
from pathlib import Path

local_cache_path = Path("/home/ec2-user/SageMaker/models")

local_cache_path.mkdir(exist_ok=True)

model_name = "THUDM/chatglm2-6b"

# Only download pytorch checkpoint files
allow_patterns = ["*.json", "*.pt", "*.bin", "*.model", "*.py"]

model_download_path = snapshot_download(
    repo_id=model_name,
    cache_dir=local_cache_path,
    allow_patterns=allow_patterns,
)

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

将模型上传到S3

In [68]:
# upload to s3
key_prefix="mt_models_uploaded/THUDM--chatglm2-6b"
model_name_or_path = sess.upload_data(path=model_download_path, key_prefix=key_prefix)
print(f"Model uploaded to --- > {model_name_or_path}")


Model uploaded to --- > s3://sagemaker-us-east-1-568765279027/mt_models_uploaded/THUDM--chatglm2-6b


# 3. 开始微调模型

## 3.1 准备微调代码
我们主要使用ChatGLM2提供的P-tuning的代码，做一点修改

In [57]:
%%script bash
rm -rf ChatGLM2-6B
git clone https://github.com/THUDM/ChatGLM2-6B.git
cd ChatGLM2-6B
git checkout 1679b014c6d08005174a215c86190d672b029501

Cloning into 'ChatGLM2-6B'...
Note: switching to '1679b014c6d08005174a215c86190d672b029501'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by switching back to a branch.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -c with the switch command. Example:

  git switch -c <new-branch-name>

Or undo this operation with:

  git switch -

Turn off this advice by setting config variable advice.detachedHead to false

HEAD is now at 1679b01 Merge branch 'main' of github.com:THUDM/ChatGLM2-6B


In [89]:
%%writefile ChatGLM2-6B/ptuning/sm_ptune_train.sh

PRE_SEQ_LEN=128 && LR=2e-2 && CUDA_VISIBLE_DEVICES=0 python3 main.py \
    --do_train \
    --train_file $TRAIN_DATASET \
    --validation_file $TEST_DATASET \
    --prompt_column ${PROMPT_COLUMN} \
    --response_column ${RESPONSE_COLUMN}  \
    --overwrite_cache \
    --model_name_or_path ${MODEL_NAME_OR_PATH} \
    --output_dir ${OUTPUT_DIR} \
    --overwrite_output_dir \
    --max_source_length 64 \
    --max_target_length 64 \
    --per_device_train_batch_size 4 \
    --per_device_eval_batch_size 1 \
    --gradient_accumulation_steps 4 \
    --predict_with_generate \
    --max_steps ${TRAIN_STEPS} \
    --logging_steps 10 \
    --save_steps ${TRAIN_STEPS} \
    --learning_rate $LR \
    --pre_seq_len $PRE_SEQ_LEN


Overwriting ChatGLM2-6B/ptuning/sm_ptune_train.sh


In [90]:
%%writefile ChatGLM2-6B/ptuning/sm_ptune_train.py

import os


if __name__ == "__main__":
    os.environ['PYTORCH_CUDA_ALLOC_CONF'] = str(os.environ['PYTORCH_CUDA_ALLOC_CONF'])
    os.environ['LD_LIBRARY_PATH'] = str(os.environ['LD_LIBRARY_PATH'])
    os.environ['TRAIN_DATASET'] = str(os.environ['TRAIN_DATASET'])
    os.environ['TEST_DATASET'] = str(os.environ['TEST_DATASET'])
    os.environ['PROMPT_COLUMN'] = str(os.environ['PROMPT_COLUMN'])
    os.environ['RESPONSE_COLUMN'] = str(os.environ['RESPONSE_COLUMN'])
    os.environ['MODEL_NAME_OR_PATH'] = str(os.environ['MODEL_NAME_OR_PATH'])
    os.environ['OUTPUT_DIR'] = str(os.environ['OUTPUT_DIR'])
    os.environ['MODEL_OUTPUT_S3_PATH'] = str(os.environ['MODEL_OUTPUT_S3_PATH'])

    # os.system("chmod +x ./s5cmd")
    os.system("/bin/bash sm_ptune_train.sh")

Overwriting ChatGLM2-6B/ptuning/sm_ptune_train.py


In [73]:
%%writefile ChatGLM2-6B/ptuning/requirements.txt

protobuf
#git+https://github.com/huggingface/transformers.git@68d640f7c368bcaaaecfc678f11908ebbd3d6176
transformers==4.28.0
cpm_kernels
torch>=1.10
gradio
mdtex2html
sentencepiece
accelerate
datasets
huggingface
jieba
rouge_chinese
nltk
deepspeed==0.9.2

Overwriting ChatGLM2-6B/ptuning/requirements.txt


In [74]:
! chmod +x s5cmd
! cp s5cmd ChatGLM2-6B/ptuning

## 3.2 定义微调参数

In [100]:
# Define Training Job Name
import time
from sagemaker.huggingface import HuggingFace

job_name = f'chatglm2-finetune-ptuning-{time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())}'

instance_type  = "ml.g5.2xlarge"
instance_count = 1

# 基础模型存放地址 model_name_or_path 在上面上传的时候赋值
# s3://sagemaker-us-east-1-568765279027/mt_models_uploaded/THUDM--chatglm2-6b
# model_name_or_path = 's3://{}/mt_models_uploaded/THUDM--chatglm2-6b/'.format(sagemaker_default_bucket)

# 微调模型输出地址
output_dir         = '/opt/ml/model/med-qa-chatglm2-6b-ft'
model_s3_path      = 's3://{}/mt_models_uploaded/THUDM--chatglm2-6b-finetune/'.format(sagemaker_default_bucket)

# 模型环境变量设置
environment = {
    'PYTORCH_CUDA_ALLOC_CONF': 'max_split_size_mb:32',
    'TRAIN_DATASET'          : '/opt/ml/input/data/med_qa/med_qa_train.json',
    'TEST_DATASET'           : '/opt/ml/input/data/med_qa/med_qa_test.json',
    'PROMPT_COLUMN'          : 'question',
    'RESPONSE_COLUMN'        : 'answer',
    'MODEL_NAME_OR_PATH'     : model_name_or_path,
    'OUTPUT_DIR'             : output_dir,
    'MODEL_OUTPUT_S3_PATH'   : model_s3_path,
    'TRAIN_STEPS'            : '50'
}

# 数据位置 s3://sagemaker-us-east-1-568765279027/llm/datasets/chatglm/med_qa/med_qa_train.json
inputs = {
   'med_qa': f"s3://{sagemaker_default_bucket}/llm/datasets/chatglm2/med_qa/"
}

In [101]:
environment

{'PYTORCH_CUDA_ALLOC_CONF': 'max_split_size_mb:32',
 'TRAIN_DATASET': '/opt/ml/input/data/med_qa/med_qa_train.json',
 'TEST_DATASET': '/opt/ml/input/data/med_qa/med_qa_test.json',
 'PROMPT_COLUMN': 'question',
 'RESPONSE_COLUMN': 'answer',
 'MODEL_NAME_OR_PATH': 's3://sagemaker-us-east-1-568765279027/mt_models_uploaded/THUDM--chatglm2-6b',
 'OUTPUT_DIR': '/opt/ml/model/med-qa-chatglm2-6b-ft',
 'MODEL_OUTPUT_S3_PATH': 's3://sagemaker-us-east-1-568765279027/mt_models_uploaded/THUDM--chatglm2-6b-finetune/',
 'TRAIN_STEPS': '50'}

## 3.3 启动微调训练

In [102]:
# create the Estimator
huggingface_estimator = HuggingFace(
    entry_point          = 'sm_ptune_train.py',
    source_dir           = './ChatGLM2-6B/ptuning',
    instance_type        = instance_type,
    instance_count       = instance_count,
    base_job_name        = job_name,
    role                 = role,
    script_mode          = True,
    transformers_version = '4.26',
    pytorch_version      = '1.13',
    py_version           = 'py39',
    environment          = environment
)

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


In [103]:
huggingface_estimator.fit(inputs=inputs)

Using provided s3_resource


INFO:sagemaker.image_uris:image_uri is not presented, retrieving image_uri based on instance_type, framework etc.
INFO:sagemaker:Creating training-job with name: chatglm2-finetune-ptuning-2023-07-19-17-2023-07-19-17-03-13-573


2023-07-19 17:03:16 Starting - Starting the training job...
2023-07-19 17:03:34 Starting - Preparing the instances for training......
2023-07-19 17:04:38 Downloading - Downloading input data...
2023-07-19 17:05:13 Training - Downloading the training image...............
2023-07-19 17:07:29 Training - Training image download completed. Training in progress...[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2023-07-19 17:08:00,689 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2023-07-19 17:08:00,703 sagemaker-training-toolkit INFO     No Neurons detected (normal if no neurons installed)[0m
[34m2023-07-19 17:08:00,712 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2023-07-19 17:08:00,714 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2023-07-19 17:08:01,144 sag

# 4. 模型部署

# 4.1 获取Runtime资源配置

In [104]:
import boto3
import sagemaker
from sagemaker import get_execution_role

sess                     = sagemaker.Session()
role                     = get_execution_role()
sagemaker_default_bucket = sess.default_bucket()

account                  = sess.boto_session.client("sts").get_caller_identity()["Account"]
region                   = sess.boto_session.region_name

INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole
INFO:botocore.credentials:Found credentials from IAM Role: BaseNotebookInstanceEc2InstanceRole


# 4.2 准备Dummy模型

In [105]:
!touch dummy
!tar czvf model.tar.gz dummy
!rm -f dummy

dummy


# 4.3 配置模型参数

In [6]:
model_name                  = "mt-chatglm2-6b-ft-g4dn-model"
entry_point                 = 'inference-chatglm2-ft.py'
framework_version           = '1.13.1'
py_version                  = 'py39'
base_model_name_or_path     = 's3://{}/mt_models_uploaded/THUDM--chatglm2-6b/'.format(sagemaker_default_bucket)
finetune_model_name_or_path = 's3://{}/mt_models_uploaded/THUDM--chatglm2-6b-finetune/med-qa-chatglm2-6b-ft/checkpoint-50/pytorch_model.bin'.format(sagemaker_default_bucket)

# 模型环境变量设置
model_environment  = {
    'SAGEMAKER_MODEL_SERVER_TIMEOUT': '600',
    'SAGEMAKER_MODEL_SERVER_WORKERS': '1',
    'MODEL_NAME_OR_PATH'            : base_model_name_or_path,
    'PRE_SEQ_LEN'                   : '128',
    'FINETUNE_MODEL_NAME_OR_PATH'   : finetune_model_name_or_path,
}


In [7]:
from sagemaker.pytorch.model import PyTorchModel

model = PyTorchModel(
    name              = model_name,
    model_data        = "./model.tar.gz",
    entry_point       = entry_point,
    source_dir        = './code-chatglm2',
    role              = role,
    framework_version = framework_version, 
    py_version        = py_version,
    env               = model_environment
)

In [109]:
! chmod +x s5cmd
! cp s5cmd code-chatglm2/

# 4.4 部署微调模型

In [8]:
from sagemaker.serializers import JSONSerializer
from sagemaker.deserializers import JSONDeserializer

endpoint_name         = 'mt-chatglm2-6b-ft-g4dn'
instance_type         = 'ml.g4dn.2xlarge'
instance_count        = 1

predictor = model.deploy(
    endpoint_name          = endpoint_name,
    instance_type          = instance_type, 
    initial_instance_count = instance_count,
    serializer             = JSONSerializer(),
    deserializer           = JSONDeserializer()
)

---------!

# 4.5 测试微调模型

In [None]:
inputs = {
    "ask": "为什么会得腰间盘突出？"
}

response = predictor.predict(inputs)
print(response["answer"])


In [None]:
inputs = {
    "ask": "天为什么是蓝的？"
}

response = predictor.predict(inputs)
print(response["answer"])


## 使用原先的ChatGLM并且没有知识库的方式进行问答

In [136]:
# 比较
import json
import boto3

client = boto3.client('runtime.sagemaker')
endpoint_name = 'mt-chatglm2-6b-g4dn'

def query_endpoint(encoded_json):
    response = client.invoke_endpoint(EndpointName=endpoint_name, ContentType='application/json', Body=encoded_json)
    model_predictions = json.loads(response['Body'].read())
    generated_text = model_predictions["answer"]
    return generated_text

payload = {"ask": "腰间盘突出怎么防治？", "parameters": {}, "history": []}
query_endpoint(json.dumps(payload).encode('utf-8'))

'腰间盘突出是一种退行性疾病，主要发生在年龄较大的人群。腰部肌肉和韧带的劳损是腰间盘突出的主要原因。那么，如何防治腰间盘突出呢？\n\n首先，要避免长时间保持一个姿势，如长时间坐着或站着，这会增加腰部肌肉和韧带的劳损，加重腰间盘突出的风险。\n\n其次，加强腰部肌肉的锻炼，有助于缓解腰部肌肉和韧带的劳损，减轻腰间盘突出的症状。例如，可以做一些腰部伸展运动，如仰卧起坐、俯卧起坐、仰卧举腿等。\n\n此外，保持适当的体重，避免过度肥胖，也能减轻腰部肌肉和韧带的负担，降低腰间盘突出的风险。\n\n在治疗腰间盘突出方面，可以采用药物治疗、物理治疗、手术治疗等方法。不过，治疗腰间盘突出需要根据医生的建议进行，并积极配合治疗，以提高治疗效果。'

In [134]:
# 比较
import json
import boto3

client = boto3.client('runtime.sagemaker')
endpoint_name = 'mt-chatglm2-6b-ds'

def query_endpoint(encoded_json):
    response = client.invoke_endpoint(EndpointName=endpoint_name, ContentType='application/json', Body=encoded_json)
    model_predictions = json.loads(response['Body'].read())
    generated_text = model_predictions["outputs"]
    return generated_text
payload = {"inputs": "腰间盘突出可以不吃药吗？", "parameters": {}, "history": []}
query_endpoint(json.dumps(payload).encode('utf-8'))

'腰间盘突出通常需要药物治疗,但具体治疗方式还需要根据病情的具体情况而定。以下是一些可能有助于缓解腰间盘突出的症状的方法:\n\n1. 休息:减轻腰部压力,减少症状。建议在症状最严重时休息数天。\n\n2. 物理疗法:物理疗法包括按摩、针灸、理疗等,可以缓解疼痛和减轻炎症。\n\n3. 药物治疗:药物治疗包括非甾体抗炎药、镇痛剂、肌肉松弛剂等,可以缓解疼痛和减轻炎症。不过,药物治疗应在医生的指导下进行,以避免不良反应。\n\n4. 运动疗法:适当的运动可以增强腰部肌肉力量和灵活性,缓解疼痛和减轻症状。不过,运动疗法应在医生的指导下进行,以避免进一步损伤。\n\n腰间盘突出需要综合治疗,包括药物治疗、物理疗法、运动疗法等。在医生的指导下进行治疗,并遵守医生的治疗方案,是缓解腰间盘突出的症状的有效途径。'

# 4.6 清除资源

In [115]:
predictor.delete_model()
predictor.delete_endpoint()

INFO:sagemaker:Deleting model with name: pytorch-inference-2023-07-19-17-32-59-456
INFO:sagemaker:Deleting endpoint configuration with name: mt-chatglm2-6b-ft-g4dn
INFO:sagemaker:Deleting endpoint with name: mt-chatglm2-6b-ft-g4dn
