In [48]:
import os
import io
import IPython.display
from PIL import Image
import base64
import requests

requests.adapters.DEFAULT_TIMEOUT = 60

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv())  # read local .env file
hf_api_key = os.environ['HF_API_KEY']

In [50]:
from huggingface_hub import InferenceClient

client = InferenceClient(api_key=hf_api_key)

In [64]:
for message in client.chat_completion(
	model="tiiuae/falcon-7b-instruct",
	messages=[{"role": "user", "content": "Has math been invented or discovered?"}],
	max_tokens=500,
	stream=True,
):
    print(message.choices[0].delta.content, end="")

 Is math something that has been discovered or invented?
User 

In [135]:
import gradio as gr


def generate(input, slider):
    output = []
    for message in client.chat_completion(
            model="tiiuae/falcon-7b-instruct",
            messages=[{"role": "user", "content": input}],
            max_tokens=slider,
            stream=True,
    ):
        output.append(message.choices[0].delta.content)

    # 将列表中的元素连接成一个字符串
    return ''.join(output)


demo = gr.Interface(
    fn=generate,
    inputs=[
        gr.Textbox(label="Prompt"),
        gr.Slider(label="Max new tokens", value=20, maximum=1024, minimum=1)
    ],
    outputs=[gr.Textbox(label="Completion")]
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7892

To create a public link, set `share=True` in `launch()`.




In [46]:
import random


def respond(message, chat_history):
    # No LLM here, just respond with a random pre-made message
    bot_message = random.choice(["Tell me more about it",
                                 "Cool, but I'm not interested",
                                 "Hmmmm, ok then"])
    chat_history.append((message, bot_message))
    return "", chat_history


with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=240)  # just to fit the notebook
    msg = gr.Textbox(label="Prompt")
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

    btn.click(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot], outputs=[msg, chatbot])  # Press enter to submit

demo.launch()



* Running on local URL:  http://127.0.0.1:7878

To create a public link, set `share=True` in `launch()`.




In [108]:
def format_chat_prompt(message, chat_history, instruction):
    prompt = f"System:{instruction}"
    for turn in chat_history:
        user_message, bot_message = turn
        prompt += f"\nUser: {user_message}\nAssistant: {bot_message}"
    prompt += f"\nUser: {message}\nAssistant:"
    return prompt


def respond(message, chat_history, instruction):
    formatted_prompt = format_chat_prompt(message, chat_history, instruction)
    print(formatted_prompt)

    bot_message = generate(formatted_prompt)

    chat_history.append((message, bot_message))
    return "", chat_history


def generate(input):
    output = []
    for message in client.chat_completion(
            model="tiiuae/falcon-7b-instruct",
            messages=[{"role": "user", "content": input}],
            max_tokens=1024,
            stop=["\nUser:", "<|endoftext|>"],
            stream=True,
    ):
        output.append(message.choices[0].delta.content)

    # 将列表中的元素连接成一个字符串
    return ''.join(output)


with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=240)  # just to fit the notebook
    msg = gr.Textbox(label="Prompt")
    with gr.Accordion(label="Advanced options", open=False):
        system = gr.Textbox(label="System message", lines=2, value="A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.")
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

    btn.click(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot])  # Press enter to submit

demo.launch()



* Running on local URL:  http://127.0.0.1:7877

To create a public link, set `share=True` in `launch()`.




In [137]:
def format_chat_prompt(message, chat_history, instruction):
    prompt = f"System:{instruction}"
    for turn in chat_history:
        user_message, bot_message = turn
        prompt += f"\nUser: {user_message}\nAssistant: {bot_message}"
    prompt += f"\nUser: {message}\nAssistant:"
    return prompt


def respond(message, chat_history, instruction, temperature=0.7):
    formatted_prompt = format_chat_prompt(message, chat_history, instruction)

    bot_message = generate(formatted_prompt, temperature)

    chat_history.append((message, bot_message))
    return "", chat_history


def generate(input, temperature):
    output = []
    for message in client.chat_completion(
            model="tiiuae/falcon-7b-instruct",
            messages=[{"role": "user", "content": input}],
            max_tokens=1024,
            stop=["\nUser:", "<|endoftext|>"],
            temperature=temperature,
            stream=True,
    ):
        output.append(message.choices[0].delta.content)

    # 将列表中的元素连接成一个字符串
    return ''.join(output)


with gr.Blocks() as demo:
    chatbot = gr.Chatbot(height=240)  # just to fit the notebook
    msg = gr.Textbox(label="Prompt")
    with gr.Accordion(label="Advanced options", open=False):
        system = gr.Textbox(label="System message", lines=2, value="A conversation between a user and an LLM-based AI assistant. The assistant gives helpful and honest answers.")
        temperature = gr.Slider(label="temperature", minimum=0.1, maximum=1, value=0.7, step=0.1)
    btn = gr.Button("Submit")
    clear = gr.ClearButton(components=[msg, chatbot], value="Clear console")

    btn.click(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot])
    msg.submit(respond, inputs=[msg, chatbot, system], outputs=[msg, chatbot])  # Press enter to submit

demo.launch()



* Running on local URL:  http://127.0.0.1:7893

To create a public link, set `share=True` in `launch()`.


