In [1]:
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from optimum.intel import OVModelForCausalLM

# lets load the openVINO format model from huggingface hub
# link to hf repo - https://huggingface.co/OjasPatil/intel-llama2-7b-ov
model_name = "OjasPatil/intel-llama2-7b-ov"

base_model = OVModelForCausalLM.from_pretrained(model_name)

tokenizer = AutoTokenizer.from_pretrained(model_name)

INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino


Compiling the model to CPU ...


In [2]:
message = "Do you offer support for free Intel toolkits?"
prompt = f"[INST] {message} [/INST]"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = base_model.generate(**inputs, max_new_tokens=50)
response = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt+" ", "")
print(response)

No, free Intel toolkits do not qualify for Priority Support. However, you can still ask questions in the community forum [https://community.intel.com/t5/Software-Products/ct-p/software


In [3]:
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # prompt = f"<s>[INST] {system_message} [/INST] {message}"
    prompt = f"[INST]{message}[/INST]"
    inputs = tokenizer(prompt, return_tensors="pt")
    outputs = base_model.generate(**inputs, max_new_tokens=100)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True).replace(prompt, "")
    
    yield response

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    title="Intel Virtual Assistant Chatbot"
)

# demo.launch(share=True)

IMPORTANT: You are using gradio version 4.28.3, however version 4.29.0 is available, please upgrade.
--------


In [4]:
%%capture --no-display --no-stderr

from qarpo import get_gradio_setup

try:
    gradio_interface_args, access_details = get_gradio_setup()
    demo.queue(max_size=2).launch(**gradio_interface_args)
except Exception as e:
    print(e)
    raise

In [5]:
print(access_details)


    The Gradio App is ready. Use the following URL to access it:
        
        URL: https://notebooks.one-edge.intel.com/hub/user-redirect/proxy/8000/


    Notes: The Gradio application is configured with default secure parameters.
           Modifying or disabling such configuration might result in unexpected
           data exposure. For more information about Gradio Security, see:

           https://www.gradio.app/guides/sharing-your-app#security-and-file-access
    


In [9]:
demo.close()

Closing server running on port: 8000


In [7]:
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
from optimum.intel import OVModelForCausalLM

# loads the non-finetuned original llama2-7b model
model_name_1 = "meta-llama/Llama-2-7b-chat-hf"

base_model_1 = AutoModelForCausalLM.from_pretrained(model_name_1)

tokenizer_1 = AutoTokenizer.from_pretrained(model_name_1)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # prompt = f"<s>[INST] {system_message} [/INST] {message}"
    prompt = f"[INST]{message}[/INST]"
    inputs = tokenizer_1(prompt, return_tensors="pt")
    outputs = base_model_1.generate(**inputs, max_new_tokens=100)
    response = tokenizer_1.decode(outputs[0], skip_special_tokens=True).replace(prompt, "")
    
    yield response

demo_og = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
    title="Llama2 7b Model"
)

# demo_og.launch(share=True)

IMPORTANT: You are using gradio version 4.28.3, however version 4.29.0 is available, please upgrade.
--------


In [9]:
%%capture --no-display --no-stderr

from qarpo import get_gradio_setup

try:
    gradio_interface_args, access_details = get_gradio_setup()
    demo_og.queue(max_size=2).launch(**gradio_interface_args)
except Exception as e:
    print(e)
    raise

In [10]:
print(access_details)


    The Gradio App is ready. Use the following URL to access it:
        
        URL: https://notebooks.one-edge.intel.com/hub/user-redirect/proxy/8001/


    Notes: The Gradio application is configured with default secure parameters.
           Modifying or disabling such configuration might result in unexpected
           data exposure. For more information about Gradio Security, see:

           https://www.gradio.app/guides/sharing-your-app#security-and-file-access
    


In [25]:
demo_og.close()

Closing server running on port: 8001
