In [None]:
# !pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
# !pip install -i https://pypi.tuna.tsinghua.edu.cn/simple -U --user 
# datasets accelerate peft trl tensorboard bitsandbytes langchain sentencepiece transformers vllm
# !pip install tiktoken einops transformers_stream_generator

In [1]:
import os
import sys
import warnings; warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import torch as th
from datasets import (load_dataset, load_from_disk, Dataset)
from transformers import (AutoTokenizer, AutoModel, AutoModelForCausalLM, BitsAndBytesConfig,
                          TrainingArguments, DataCollatorWithPadding, DataCollatorForLanguageModeling,
                          DataCollatorForSeq2Seq, DataCollatorForTokenClassification)
from peft import (LoraConfig, get_peft_model, PeftModel, TaskType, get_peft_model_state_dict)
from trl import SFTTrainer
from vllm import (LLM, SamplingParams)



In [2]:
device = th.device("cuda" if th.cuda.is_available() else "cpu")
devive_cnt = th.cuda.device_count()
print(f"device = {device}; devive_cnt = {devive_cnt}")
print(th.__version__)
print(th.version.cuda)

device = cuda; devive_cnt = 1
2.5.1+cu121
12.1


In [3]:
path_project = "C:/my_project/MyGit/Machine-Learning-Column/hugging_face"
path_data = os.path.join(os.path.dirname(path_project), "data")
path_model = "F:/LLM"
path_output = os.path.join(os.path.dirname(path_project), "output")

# step-1: LLM

In [20]:
# https://huggingface.co/THUDM/glm-4-9b-chat
# checkpoint = "Qwen/Qwen1.5-4B-Chat"
checkpoint = "THUDM/glm-4-9b-chat"

In [21]:
tokenizer = AutoTokenizer.from_pretrained(
    pretrained_model_name_or_path=os.path.join(path_model, checkpoint),
    cache_dir=path_model,
    force_download=False,
    local_files_only=True,
    trust_remote_code=True  # glm-4-9b-chat
)

In [24]:
# GLM-4-9B-Chat
# 如果遇见 OOM 现象，建议减少max_model_len，或者增加tp_size
max_model_len, tp_size = 32768, 1
prompt = [{"role": "user", "content": "你好"}]

In [27]:
llm = LLM(
    model=os.path.join(path_model, checkpoint),
    tensor_parallel_size=tp_size,
    max_model_len=max_model_len,
    trust_remote_code=True,
    enforce_eager=True,
    # GLM-4-9B-Chat-1M 如果遇见 OOM 现象，建议开启下述参数
    # enable_chunked_prefill=True,
    # max_num_batched_tokens=8192
)

INFO 12-15 17:05:02 config.py:350] This model supports multiple tasks: {'embedding', 'generate'}. Defaulting to 'generate'.
INFO 12-15 17:05:02 llm_engine.py:249] Initializing an LLM engine (v0.6.4.post1) with config: model='F:/LLM\\Qwen/Qwen1.5-4B-Chat', speculative_config=None, tokenizer='F:/LLM\\Qwen/Qwen1.5-4B-Chat', skip_tokenizer_init=False, tokenizer_mode=auto, revision=None, override_neuron_config=None, tokenizer_revision=None, trust_remote_code=True, dtype=torch.bfloat16, max_seq_len=32768, download_dir=None, load_format=LoadFormat.AUTO, tensor_parallel_size=1, pipeline_parallel_size=1, disable_custom_all_reduce=False, quantization=None, enforce_eager=True, kv_cache_dtype=auto, quantization_param_path=None, device_config=cuda, decoding_config=DecodingConfig(guided_decoding_backend='outlines'), observability_config=ObservabilityConfig(otlp_traces_endpoint=None, collect_model_forward_time=False, collect_model_execute_time=False), seed=0, served_model_name=F:/LLM\Qwen/Qwen1.5-4B-

ValueError: Bfloat16 is only supported on GPUs with compute capability of at least 8.0. Your NVIDIA GeForce GTX 1080 Ti GPU has compute capability 6.1. You can use float16 instead by explicitly setting the`dtype` flag in CLI, for example: --dtype=half.

In [None]:
stop_token_ids = [151329, 151336, 151338]
sampling_params = SamplingParams(temperature=0.95, max_tokens=1024, stop_token_ids=stop_token_ids)

In [None]:
inputs = tokenizer.apply_chat_template(prompt, tokenize=False, add_generation_prompt=True)
outputs = llm.generate(prompts=inputs, sampling_params=sampling_params)

In [None]:
print(outputs[0].outputs[0].text)

# step-2: prompt

In [9]:
schema = """
{
 "infoList": [
    {
     "ssid": "string",
     "securityProtocol": "string",
     "bandwidth": "string"
    }
    ]
}
"""
schema = schema.replace("\n", "").replace(" ", "")

In [10]:
content_sys = (
    "You are a helpful assistant that answers in JSON. "
    f"Here's the json schema you must adhere to: \n<json>\n{schema}\n</json>\n"
    )
print(content_sys)

You are a helpful assistant that answers in JSON. Here's the json schema you must adhere to: 
<json>
{"infoList":[{"ssid":"string","securityProtocol":"string","bandwidth":"string"}]}
</json>



In [11]:
content_usr = (
    "I'm currently configuring a wireless access point for our office network and I "
    "need to generate a JSON object that accurately represents its settings. "
    "The access point's SSID should be 'OfficeNetSecure', it uses WPA2-Enterprise "
    "as its security protocol, and it's capable of a bandwidth of up to 1300 Mbps "
    "on the 5 GHz band. This JSON object will be used to document our network "
    "configurations and to automate the setup process for additional access "
    "points in the future."
    )
suffix = "\nPlease provide a JSON presentation based on the schema.\n"
content_usr += suffix
print(content_usr)

I'm currently configuring a wireless access point for our office network and I need to generate a JSON object that accurately represents its settings. The access point's SSID should be 'OfficeNetSecure', it uses WPA2-Enterprise as its security protocol, and it's capable of a bandwidth of up to 1300 Mbps on the 5 GHz band. This JSON object will be used to document our network configurations and to automate the setup process for additional access points in the future.
Please provide a JSON presentation based on the schema.



In [13]:
messages = [
    {"role": "system", "content": content_sys},
    {"role": "user", "content": content_usr}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
print(text)

<|im_start|>system
You are a helpful assistant that answers in JSON. Here's the json schema you must adhere to: 
<json>
{"infoList":[{"ssid":"string","securityProtocol":"string","bandwidth":"string"}]}
</json>
<|im_end|>
<|im_start|>user
I'm currently configuring a wireless access point for our office network and I need to generate a JSON object that accurately represents its settings. The access point's SSID should be 'OfficeNetSecure', it uses WPA2-Enterprise as its security protocol, and it's capable of a bandwidth of up to 1300 Mbps on the 5 GHz band. This JSON object will be used to document our network configurations and to automate the setup process for additional access points in the future.
Please provide a JSON presentation based on the schema.
<|im_end|>
<|im_start|>assistant

