## Load Model From huggingface

In [3]:
import os

#os.environ["CUDA_VISIBLE_DEVICES"]="0"
#os.environ["https_proxy"] = 'http://127.0.0.1:7890'
#os.environ["http_proxy"] = 'http://127.0.0.1:7890'

from transformers import AutoTokenizer, AutoModel

device = 'cuda'
model_name = "THUDM/chatglm2-6b"
tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True,revision="main")
model     = AutoModel    .from_pretrained(model_name,trust_remote_code=True,revision="main")


Loading checkpoint shards:   0%|          | 0/7 [00:00<?, ?it/s]

## Insert LoRA to model

In [6]:
import loralib as lora
from insert_lora import get_lora_model

In [7]:
lora_config = {
        'r': 8,
        'lora_alpha':16,
        'lora_dropout':0.1,
        'enable_lora':[True, False, True],
    }

In [8]:
model = get_lora_model(model, lora_config)

  0%|          | 0/28 [00:00<?, ?it/s]

ValueError: invalid literal for int() with base 10: 'layers'

## Dataset

In [8]:
device = 'cuda'

In [9]:
import dataset.GLM 
from torch.utils.data import DataLoader

dataset.GLM.device = device
#dataset.GLM.pad_to = 8

In [10]:
pairs = [{'prompt':'你好', 'completion':'你好, 我是ChatGLM'}]
pairs_encoded = dataset.GLM.encode_pairs(pairs, tokenizer)
train_dataset = dataset.GLM.SimpleDataset(pairs_encoded)
train_dataloader = DataLoader(dataset=train_dataset, collate_fn = dataset.GLM.collate_fn, shuffle=True, batch_size=1)

## Training

In [11]:
model.half().to(device)

ChatGLMForConditionalGeneration(
  (transformer): ChatGLMModel(
    (word_embeddings): Embedding(130528, 4096)
    (layers): ModuleList(
      (0-27): 28 x GLMBlock(
        (input_layernorm): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)
        (attention): SelfAttention(
          (rotary_emb): RotaryEmbedding()
          (query_key_value): LoraModel(
            (model): QKV_layer(
              (linear_q): Linear(
                in_features=4096, out_features=4096, bias=True
                (lora_dropout): Dropout(p=0.1, inplace=False)
                (lora_A): Linear(in_features=4096, out_features=8, bias=False)
                (lora_B): Linear(in_features=8, out_features=4096, bias=False)
              )
              (linear_k): Linear(in_features=4096, out_features=4096, bias=True)
              (linear_v): Linear(
                in_features=4096, out_features=4096, bias=True
                (lora_dropout): Dropout(p=0.1, inplace=False)
                (lora_A): Lin

In [12]:
batch = {k: v.to(device) for k, v in next(iter(train_dataloader)).items()}

In [14]:
model(**batch).loss

tensor(3.2227, device='cuda:0', dtype=torch.float16, grad_fn=<ToCopyBackward0>)

## Inference

In [19]:
import torch

In [86]:
pairs = [
    {'prompt':'周末适合哪里玩?', 'completion':'周末适合去上海'},
    {'prompt':'周末适合哪里玩?', 'completion':'周末适合去北京'},
]

pairs_encoded = dataset.GLM.encode_pairs(pairs, tokenizer, with_eos=False)
test_dataset = dataset.GLM.SimpleDataset(pairs_encoded)
test_dataloader = DataLoader(dataset=test_dataset, collate_fn = dataset.GLM.collate_fn, shuffle=True, batch_size=1)

In [87]:
batch = {k: v.to(device) for k, v in next(iter(test_dataloader)).items()}

In [88]:
outputs = model.generate(
    **batch, 
    max_length=1024,
    eos_token_id=130005,
    do_sample=True,
    temperature=0.55,
    top_p = 0.75,
    top_k = 10000,
    repetition_penalty=1.5, 
    num_return_sequences=1,

    )

In [89]:
for output in outputs:
    print(tokenizer.sp_tokenizer.decode(output))

周末适合哪里玩? 周末适合去北京旅游。北京是中国的首都,有着悠久的历史和丰富的文化遗产。在这里你可以参观历史古迹、博物馆和文化遗址等景点。此外,北京的特色美食也不容错过,例如烤鸭和炸酱面等等。
如果对历史文化感兴趣的话,建议去故宫、天安门广场、颐和园和长城等地游览;如果喜欢自然风光,可以去八达岭高速或者京承高速公路上自驾游,欣赏美丽的景色。


## Chat

In [92]:
response, history = model.chat(tokenizer, "如何缓解焦虑", history=[])

In [93]:
response

'焦虑是一种较为常见的情绪反应，当人们面临不确定性、压力或风险时，可能会感到焦虑。以下是一些缓解焦虑的方法：\n\n1. 深呼吸：深呼吸可以帮助你放松身体和心灵。尝试缓慢地吸气，然后缓慢地呼气，重复几次。\n\n2. 渐进性肌肉松弛：这是一种放松身体的技巧，通过逐步收缩和松弛肌肉来减轻身体的紧张感。你可以在背部、腿部和手臂等部位练习渐进性肌肉松弛。\n\n3. 冥想：冥想是一种可以帮助你放松心灵和减轻焦虑的技巧。你可以通过集中注意力、呼吸、放松身体和关注内心的练习来冥想。\n\n4. 运动：运动可以帮助你释放紧张感和压力，同时也可以提高身体和心理的健康状况。你可以尝试跑步、瑜伽、游泳等运动。\n\n5. 寻求支持：与家人、朋友或专业人士谈论你的问题可以帮助你减轻焦虑。你可以寻求心理咨询或与支持团体联系。\n\n6. 改善生活方式：保持健康的饮食、充足的睡眠和规律的锻炼可以帮助你改善身体和心理的健康状况。\n\n请注意，如果你的焦虑症状持续很长时间或影响到你的日常生活，请寻求专业医疗帮助。'

## Load pretrain weight

In [97]:
model.load_state_dict(torch.load('saved/chatglm-6b_alpaca_5.pt'), strict=False)

_IncompatibleKeys(missing_keys=['transformer.word_embeddings.weight', 'transformer.layers.0.input_layernorm.weight', 'transformer.layers.0.input_layernorm.bias', 'transformer.layers.0.attention.query_key_value.model.linear_q.weight', 'transformer.layers.0.attention.query_key_value.model.linear_q.bias', 'transformer.layers.0.attention.query_key_value.model.linear_k.weight', 'transformer.layers.0.attention.query_key_value.model.linear_k.bias', 'transformer.layers.0.attention.query_key_value.model.linear_v.weight', 'transformer.layers.0.attention.query_key_value.model.linear_v.bias', 'transformer.layers.0.attention.dense.weight', 'transformer.layers.0.attention.dense.bias', 'transformer.layers.0.post_attention_layernorm.weight', 'transformer.layers.0.post_attention_layernorm.bias', 'transformer.layers.0.mlp.dense_h_to_4h.weight', 'transformer.layers.0.mlp.dense_h_to_4h.bias', 'transformer.layers.0.mlp.dense_4h_to_h.weight', 'transformer.layers.0.mlp.dense_4h_to_h.bias', 'transformer.layer