# 权限配置

In [23]:
import sagemaker
import os
sess = sagemaker.Session()
role = sagemaker.get_execution_role()

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::847380964353:role/spot-bot-SpotSageMakerExecutionRole-TP8BLT3Z5JJL
sagemaker bucket: sagemaker-us-west-2-847380964353
sagemaker session region: us-west-2


# 数据准备

In [24]:
# dataset used
dataset_name = 'zhenyun'
# s3 key prefix for the data
s3_prefix = 'datasets/zhenyun'
WORK_DIRECTORY = './data/'
data_location = sess.upload_data(WORK_DIRECTORY, key_prefix=s3_prefix)
data_location

's3://sagemaker-us-west-2-847380964353/datasets/zhenyun'

# 模型参数指定

In [26]:
from sagemaker.huggingface import HuggingFace


# hyperparameters which are passed to the training job
hyperparameters={'num_train_epochs': 10,
                 'train_file':'/opt/ml/input/data/train/100014.csv',
                 'validation_file':'/opt/ml/input/data/test/100014.csv',
                 'output_dir':'/opt/ml/model',
                 'max_seq_length': 128,
                 'model_name_or_path': 'bert-base-chinese',
                 'learning_rate': 2e-5,
                 'num_train_epochs': 1,
                 'per_device_train_batch_size': 32,
                 'save_strategy':'epoch',
                 'save_total_limit':1,
                 }

# create the Estimator
huggingface_estimator = HuggingFace(
        entry_point='run_glue.py',
        source_dir='./scripts',
        instance_type='ml.p3.2xlarge',
        instance_count=1,
        role=role,
        transformers_version='4.6',
        pytorch_version='1.7',
        py_version='py36',
        hyperparameters = hyperparameters
)

# 模型训练

In [27]:
huggingface_estimator.fit(
  {'train': data_location+'/100014.csv',
   'test': data_location+'/100014.csv'}
)

2022-04-28 14:23:07 Starting - Starting the training job...
2022-04-28 14:23:35 Starting - Preparing the instances for trainingProfilerReport-1651155787: InProgress
.........
2022-04-28 14:24:53 Downloading - Downloading input data...
2022-04-28 14:25:31 Training - Downloading the training image..................
2022-04-28 14:28:32 Training - Training image download completed. Training in progress.[34mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[34mbash: no job control in this shell[0m
[34m2022-04-28 14:28:28,857 sagemaker-training-toolkit INFO     Imported framework sagemaker_pytorch_container.training[0m
[34m2022-04-28 14:28:28,882 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[34m2022-04-28 14:28:28,890 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[34m2022-04-28 14:28:29,238 sagemaker-training-toolkit INFO     Installing dependencies from requirements.txt:

# 模型部署

In [16]:
predictor = huggingface_estimator.deploy(1,"ml.g4dn.xlarge")

-------!

# 模型调用

In [21]:
from sagemaker.huggingface import HuggingFaceModel, HuggingFacePredictor
import sagemaker

huggingface_predictor=HuggingFacePredictor(
    endpoint_name='huggingface-pytorch-training-2022-04-20-07-39-48-503',
)


In [22]:
huggingface_predictor.predict({'inputs': "连环画"})

[{'label': 'LABEL_36', 'score': 0.9989555478096008}]

In [28]:
labs=['EPP泡沫', 'T其它', 'T存储', 'T服务器', 'T电脑', 'T网络', 'T网络布线(办公)', '书写工具', '会议桌', '传感器', '低压柜', '保护帽', '保洁', '信号发生器', '光学检测设备 AOI改造和升级', '光谱仪', '其它', '其它 五金件其它', '其它(机械其它）', '其它(气动其它）', '其它(电气其它）', '其它（工具其它）', '其它（非金属材料其它）', '内衬', '冰箱', '冲压设备', '冷却设备', '切片及产品验证', '办公桌', '办公椅', '办公用纸', '功率测试设备', '加热设备', '包材模具相关', '包装设备', '医疗、健康', '印刷品', '压力测量仪', '叉车', '叉车备件', '变压器', '地面', '垫板、垫片', '塑料周转塑料箱', '塑料袋', '外箱', '存储(文件)', '存储(系统)', '实验室工作台', '密封和润滑', '封箱带', '工作台', '工作服装鞋帽', '工作椅', '常用电气', '弹簧', '影像', '性能试验', '恒温恒湿机', '手动', '扎带', '打包带', '打印设备', '投影仪', '排风机', '接插件', '控制器', '支撑板', '数字万用表', '整形设备', '文件储存', '日用杂品', '更衣柜', '服务器(项目)', '机器人', '机床附件和焊接器材', '机械切平设备', '机械脉动试验', '材料', '标签', '气动', '气动执行元件', '气动控制阀', '气动附件', '水槽', '水泵', '洗碗机', '流量测量仪', '测量', '消火栓', '润滑脂', '液压执行元件', '液压控制阀', '液压泵', '液压附件', '清洗类', '清洗设备专用备件', '温度冲击', '温湿度传感器', '灭火器', '灭火用品', '灯具', '炉', '特殊桌面设备', '电动', '电机', '电气仪表', '电气性能测试设备', '电源', '电磁接触器', '电线和电缆', '电话会议设备', '电话系统', '监控探头', '硬度计', '碎纸机', '示波器', '礼品', '空压机', '空调', '空调箱', '立体仓库备件', '管路连接件', '粗糙度仪', '紧固件', '纸护角', '缠绕膜', '耗材', '蒸箱', '螺旋, 带链及齿轮传动件', '衡器', '表面激光成型设备', '计量', '财务用品', '起重, 液压和运输', '车床', '车辆使用的周边设备', '车险', '轴及连接', '轴承', '轴法兰', '运输和起重件', '配电箱', '酸碱溶液', '金属周转器具', '钢材', '钻床', '门窗和家具配件', '阀门', '防冻液', '防护用品', '隔档', '食堂日杂', '马桶', '高位货架备件', '高分子材料分析']

In [31]:
labs[36]

'印刷品'