In [1]:
!pip install panel transformers



In [2]:
!pip install ctransformers



In [12]:
import panel as pn  # Importing the Panel library for web app development.
from ctransformers import AutoConfig, AutoModelForCausalLM, Config  # Importing necessary classes from ctransformers for model configuration and loading.

In [13]:
pn.extension()  # Initializing the Panel extension to enable web app functionalities.

In [14]:
# Setting up instructions for the Mistral model to guide its responses in French.
SYSTEM_INSTRUCTIONS = "You are a coder assistant."
# Path to the Mistral model file, replace with your specific model version if necessary.
model_file = "mistral-7b-instruct-v0.1.Q6_K.gguf"

In [15]:
""" This function formats the chat history to be used by the Mistral model
for generating responses.
"""

def apply_template(history):
    # Filtering out any system messages from the history.
    history = [message for message in history if message.user != "System"]
    prompt = ""  # Initializing the prompt string.
    for i, message in enumerate(history):
        # Formatting the first message differently to include system instructions.
        if i == 0:
            prompt += f"<s>[INST]{SYSTEM_INSTRUCTIONS} {message.object}[/INST]"
        else:
            # Adding user messages and Mistral responses to the prompt.
            if message.user == "Mistral":
                prompt += f"{message.object}</s>"
            else:
                prompt += f"[INST]{message.object}[/INST]"
    return prompt  # Returning the formatted prompt.

In [16]:
""" This function checks if the model is loaded, processes the conversation,
and fetches the AI's response.
"""
# Asynchronous function to handle incoming messages and generate responses.
async def callback(contents: str, user: str, instance: pn.chat.ChatInterface):
    # Check if the Mistral model has been loaded; if not, load it.
    if "mistral" not in llms:
        instance.placeholder_text = "Downloading model; please wait..."
        # Setting up model configuration with specified parameters.
        config = AutoConfig(
            config=Config(
                temperature=0.9, max_new_tokens=2048, context_length=2048, gpu_layers=1
            ),
        )
        # Loading the Mistral model from the specified path and configuration.
        llms["mistral"] = AutoModelForCausalLM.from_pretrained(
            "TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
            model_file=model_file,
            config=config,
        )

    llm = llms["mistral"]  # Referencing the loaded Mistral model.
    history = [message for message in instance.objects]  # Compiling chat history.
    prompt = apply_template(history)  # Formatting the chat history for the model.
    response = llm(prompt, stream=True)  # Generating the model's response.
    message = ""  # Initializing the response message.
    for token in response:
        message += token  # Constructing the message token by token.
        yield message  # Yielding the message for real-time chat updates.
  

In [17]:
llms = {}  # Dictionary to store loaded language models (like Mistral).
# Setting up the chat interface with the callback function and user designation.
chat_interface = pn.chat.ChatInterface(
    callback=callback,
    callback_user="Mistral",
)
# Sending a system message to guide users to start the conversation.
chat_interface.send(
    "Hello!  How can I help you today?", user="System", respond=False
)
chat_interface.servable()  # Making the chat interface available as a web service.

In [18]:
!panel serve app.ipynb


!pip install panel transformers

!pip install ctransformers

!panel serve app.ipynb

!pip uninstall ctransformers --yes

!CT_METAL=1 pip install ctransformers --no-binary ctransformers
2025-01-27 10:00:44,301 Starting Bokeh server version 3.6.2 (running on Tornado 6.4)
2025-01-27 10:00:44,303 User authentication hooks NOT provided (default user enabled)
2025-01-27 10:00:44,305 Bokeh app running at: http://localhost:5006/app
2025-01-27 10:00:44,305 Starting Bokeh server with process id: 12239
2025-01-27 10:00:46,678 WebSocket connection opened
2025-01-27 10:00:46,679 ServerConnection created
Fetching 1 files: 100%|█████████████████████████| 1/1 [00:00<00:00, 6492.73it/s]
ggml_metal_graph_compute: command buffer 2 failed with status 5
GGML_ASSERT: /private/var/folders/_c/d2qh9tqs4m78b_70mgslfgkr0000gn/T/pip-install-kj37rz3h/ctransformers_ed3b7c40f2a446bb89f90d42622cfe36/models/ggml/ggml-metal.m:1190: false
