In [None]:
!pip install --upgrade gradio


Collecting gradio
  Downloading gradio-5.2.1-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
INFO: pip is looking at multiple versions of gradio to determine which version is compatible with other requirements. This could take a while.
Collecting gradio
  Downloading gradio-5.1.0-py3-none-any.whl.metadata (15 kB)
Collecting gradio-client==1.4.0 (from gradio)
  Downloading gradio_client-1.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.25.2-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downlo

In [None]:
!pip install litellm

Collecting litellm
  Downloading litellm-1.49.6-py3-none-any.whl.metadata (32 kB)
Collecting openai>=1.51.0 (from litellm)
  Downloading openai-1.51.2-py3-none-any.whl.metadata (24 kB)
Collecting python-dotenv>=0.2.0 (from litellm)
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Collecting tiktoken>=0.7.0 (from litellm)
  Downloading tiktoken-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting jiter<1,>=0.4.0 (from openai>=1.51.0->litellm)
  Downloading jiter-0.6.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.2 kB)
Downloading litellm-1.49.6-py3-none-any.whl (6.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading openai-1.51.2-py3-none-any.whl (383 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m383.7/383.7 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading python_dotenv-1.0.1-py3-

In [None]:
import gradio as gr
from litellm import completion
from google.colab import userdata

In [None]:
class SecretKeyRetriever:
    """
    Retrieves secret keys based on the given model.
    """

    MODEL_SECRET_MAPPING = {
        "gemini/gemini-pro": "GEMINI_API_KEY",
        "groq/llm-34b-chat": "GROQ_API_KEY",
        "groq/gemma-7b-it": "GROQ_API_KEY",
        "groq/llama3-70b-8192": "GROQ_API_KEY",
        "groq/llama3-8b-8192": "GROQ_API_KEY",
        "groq/mixtral-8x7b-32768": "GROQ_API_KEY"
        # Add more model-secret mappings as needed
    }

    @staticmethod
    def get_secret_key(model):
        """
        Returns the secret key associated with the given model.

        Args:
            model (str): The name of the model.

        Returns:
            str: The secret key from Colab secrets, or None if not found.
        """
        secret_name = SecretKeyRetriever.MODEL_SECRET_MAPPING.get(model)
        if secret_name:
            return userdata.get(secret_name)
        return None

In [None]:
def run_llm(prompt, model_name, top_p=0.9, top_k=40, temperature=0.7, max_output_tokens=50):
    """
    Receives prompt and model name, retrieves the API key from secrets,
    and calls the correct model using litellm.
    """

    # Get API key using SecretKeyRetriever
    api_key = SecretKeyRetriever.get_secret_key(model_name)

    try:
        # Conditionally include top_k for Gemini
        kwargs = {
            "model": model_name,
            "messages": [{"role": "user", "content": prompt}],
            "api_key": api_key,
            "top_p": top_p,
            "temperature": temperature,
            "max_tokens": int(max_output_tokens)
        }
        if model_name == "gemini/gemini-pro":
            kwargs["top_k"] = top_k

        # Call litellm completion with specified model, prompt, and parameters
        response = completion(**kwargs)
        return response.choices[0].message.content  # Extract and return the generated text
    except Exception as e:
        return f"Error: {e}"


In [None]:
def llm_function(model, p_value, k_value, temperature, max_output_tokens, prompt, append_mode, current_output):
    # Simulated response generation based on input parameters
    output_text = f"Model: {model}\nPrompt: {prompt}\nP-Value: {p_value}\nK-Value: {k_value}\nTemperature: {temperature}\nMax Output Tokens: {max_output_tokens}\n"

    # Simulated LLM response (this would be the actual LLM's output in a real case)
    generated_output = run_llm(prompt, model, p_value, k_value, temperature, max_output_tokens)#f"Generated response for the prompt '{prompt}'"

    # Limit the generated response by max_output_tokens (simulated by truncating the response for demonstration)
    output_words = generated_output.split()[:int(max_output_tokens)]
    generated_output = " ".join(output_words)

    # Include token counts in the output
    output_text += f"Input Tokens: {len(prompt.split())}, Output Tokens: {len(output_words)}\n\n"
    output_text += f"Generated Output: {generated_output}"

    # Append or erase output based on user choice
    if append_mode == "Append":
        final_output = current_output + "\n\n" + output_text
    else:
        final_output = output_text

    return final_output

my_theme = gr.Theme.from_hub("gstaff/xkcd")

# Define the Gradio app
with gr.Blocks(theme=my_theme) as demo:
    # Title of the app
    gr.Markdown("## LLM Playground", elem_id="title")

    # Creating a row with two sliders per row and placing the prompt as the second artifact
    with gr.Row():
        with gr.Column(scale=1):

            # LLM model dropdown with explanation on hover
            llm_dropdown = gr.Dropdown(
                ["gemini/gemini-1.5-pro","groq/gemma-7b-it", "groq/llama3-70b-8192", "groq/llama3-8b-8192", "groq/mixtral-8x7b-32768"],
                label="Select LLM Model",
                value="gemini/gemini-1.5-pro",
                #info="Choose the AI model to use. Each model has different strengths, like answering technical questions or being creative."
            )

            # Prompt input box (now second artifact in left column)
            prompt_input = gr.Textbox(
                label="Enter Prompt",
                placeholder="Type your prompt here...",
                lines=10,
                #info="Enter the prompt you want the AI to respond to."
            )

            # Submit button with a custom CSS ID to change its background color
            submit_button = gr.Button("Generate", elem_id="custom-button")

            gr.Markdown("### LLM Parameters")

            # First row: P-value and K-value sliders
            with gr.Row():
                p_value_slider = gr.Slider(
                    minimum=0.0, maximum=1.0, value=0.9, step=0.1,
                    label="Top-P",
                    info="Controls the randomness in responses: lower values (near 0.0) make output focused, while higher values (near 1.0) boost creativity."
                )
                k_value_slider = gr.Slider(
                    minimum=0, maximum=100, value=40, step=1,
                    label="Top-K",
                    info="Limits the number of words the AI considers: lower values make responses predictable, while higher values add variety."
                )

            # Second row: Temperature slider and Max Output Tokens textbox
            with gr.Row():
                temperature_slider = gr.Slider(
                    minimum=0.0, maximum=2.0, value=0.7, step=0.1,
                    label="Temperature",
                    info="Adjusts the AI's creativity: lower values make responses conservative, while higher values add more variety.")
                max_output_tokens_textbox = gr.Textbox(
                    label="Max Output Tokens",
                    value="3000",
                    info="Set the maximum number of tokens (words) in the generated response."
                )

            # Append or overwrite option using a dropdown
            append_option = gr.Dropdown(
                choices=["Append", "Erase and Add"],
                value="Erase and Add",
                label="Select Output Mode",
                info="Choose whether to append to the current output or replace it with new content."
            )

        # Second column for output display
        with gr.Column(scale=1):
            gr.Markdown("### Output")

            output_box = gr.Markdown(label="LLM Output")

    # Connect the button click to the function
    submit_button.click(
        fn=llm_function,
        inputs=[llm_dropdown, p_value_slider, k_value_slider, temperature_slider, max_output_tokens_textbox, prompt_input, append_option, output_box],
        outputs=output_box
    )

# Launch the Gradio app
demo.launch()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


themes/theme_schema@0.0.4.json:   0%|          | 0.00/12.6k [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9d250c3d02e02857aa.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


