# 环境配置

In [None]:
!pip install modelscope
!pip install tiktoken
!pip install transformers_stream_generator
!pip install auto-gptq
!pip install pip install optimum

## 运行代码

In [None]:
import os
os.environ['MODELSCOPE_CACHE'] = './cache/qwen_cache'

from modelscope import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-1_8B-Chat-Int4", revision='master', trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    "qwen/Qwen-1_8B-Chat-Int4", revision='master',
    device_map="auto",
    trust_remote_code=True,
    use_flash_attn=False
).eval()

response, _ = model.chat(tokenizer, "你好呀 用二次元可爱猫咪语气和我说话", history=None)

# response, _ = model.chat(tokenizer, "帮我写一个关于这个的故事：a cat is standing on the beach and waving", history=None, system="你现在是一个故事书写大师")
print(response)
# 你好啊！我是一只可爱的二次元猫咪哦，不知道你有什么问题需要我帮忙解答吗？

In [None]:
import os
import gradio as gr
import time

os.environ['MODELSCOPE_CACHE'] = './cache/qwen_cache'

from modelscope import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-1_8B-Chat-Int4", revision='master', trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
    "qwen/Qwen-1_8B-Chat-Int4", revision='master',
    device_map="auto",
    trust_remote_code=True,
    use_flash_attn=False
).eval()

def add_text(history, text):
    _, history = model.chat(tokenizer, text, history=history)
    return history, gr.Textbox(value="", interactive=False)

def bot(history):
    response =  history[-1][1]
    history[-1][1] = ""
    for character in response:
        history[-1][1] += character
        time.sleep(0.05)
        yield history

with gr.Blocks() as demo:
    chatbot = gr.Chatbot(
        [], 
        elem_id="chatbot",
        bubble_full_width=False,
        height=600
    )

    with gr.Row():
        txt = gr.Textbox(
            scale=4,
            show_label=False,
            placeholder="Enter text and press enter",
            container=False,
        )

    txt_msg = txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then(
        bot, chatbot, chatbot, api_name="bot_response"
    )
    txt_msg.then(lambda: gr.Textbox(interactive=True), None, [txt], queue=False)

demo.launch()

In [None]:
import os
os.environ['MODELSCOPE_CACHE'] = './cache/qwen_cache'
from modelscope import AutoModelForCausalLM, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("qwen/Qwen-VL-Chat-Int4", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained("qwen/Qwen-VL-Chat-Int4", device_map="cuda", trust_remote_code=True,use_safetensors=True, use_flash_attn=False).eval()

query = tokenizer.from_list_format([
    {'image': 'https://qianwen-res.oss-cn-beijing.aliyuncs.com/Qwen-VL/assets/demo.jpeg'},
    {'text': '这是什么'},
])
response, history = model.chat(tokenizer, query=query, history=None)
print(response)