In [1]:
# %env HF_ENDPOINT=https://hf-mirror.com
%env HF_HOME=/root/autodl-tmp/hf
%env HF_HUB_CACHE=/root/autodl-tmp/hf

env: HF_HOME=/root/autodl-tmp/hf
env: HF_HUB_CACHE=/root/autodl-tmp/hf


In [2]:
import os
import subprocess

result = subprocess.run('bash -c "source /etc/network_turbo && env | grep proxy"', shell=True, capture_output=True,
                        text=True)
output = result.stdout
for line in output.splitlines():
    if '=' in line:
        var, value = line.split('=', 1)
        os.environ[var] = value

In [3]:
save_dir = '/root/autodl-tmp/models'
save_name = "google/gemma-2-9b-mingchao-ft" 
out_dir = os.path.join(save_dir, save_name)

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import PeftModel, PeftConfig
peft_config = PeftConfig.from_pretrained(out_dir)

In [4]:
model_id = peft_config.base_model_name_or_path
model_id

'google/gemma-2-9b'

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)

In [None]:
def get_completion(query: str, model, tokenizer) -> str:
  device = "cuda:0"

  prompt_template = """
  ### 问题: {query}\n###回答:
  """
  prompt = prompt_template.format(query=query)

  encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)

  model_inputs = encodeds.to(device)


  generated_ids = model.generate(**model_inputs, max_new_tokens=128, do_sample=True, pad_token_id=tokenizer.eos_token_id)
  decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
  return (decoded)

In [None]:
result = get_completion(query="《鸣潮》是什么呀？", model=model, tokenizer=tokenizer)
print(result)

In [None]:
result = get_completion(query="《鸣潮》的特色是什么？", model=model, tokenizer=tokenizer)
print(result)

In [None]:
result = get_completion(query="今汐是谁？", model=model, tokenizer=tokenizer)
print(result)

In [None]:
result = get_completion(query="今汐是有什么技能？", model=model, tokenizer=tokenizer)
print(result)

In [None]:
result = get_completion(query="对于XP党来说，抽取今汐是否值得抽取？", model=model, tokenizer=tokenizer)
print(result)