In [None]:
from huggingface_hub import login
login()

#Load the Model and Tokenizer

In [None]:
import gradio as gr
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline


device = "cpu"

#define llama models
llama_models = {
    "Llama 3.2 3B Instruct": "meta-llama/Llama-3.2-3B-Instruct"  
}

def load_model(model_name):
    """Load the specified Llama model."""
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    generator = pipeline('text-generation', model=model, tokenizer=tokenizer, device=device)
    return generator

model_cache = {}

def generate_chat(user_input, history, model_choice):
    
    generator = model_cache[model_choice]

    system_prompt = {"role": "system", "content": "You are a helpful assistant"}

    if history is None:
        history = [system_prompt]
    
    history.append({"role": "user", "content": user_input})

    response = generator(
        history,
        max_length=512,
        pad_token_id=generator.tokenizer.eos_token_id,
        do_sample=True,
        temperature=0.7,
        top_p=0.9
    )[-1]["generated_text"][-1]["content"]

    history.append({"role": "assistant", "content": response})
    
    return history


# Gradio Block

In [None]:
with gr.Blocks() as demo:
    gr.Markdown("<h1><center>Chat with Llama Models</center></h1>")

    model_choice = gr.Dropdown(list(llama_models.keys()), label="Select Llama Model")

    chatbot = gr.Chatbot(label="Chatbot Interface", type = "messages")
    txt_input = gr.Textbox(show_label=False, placeholder="Type your message here...")

    def respond(user_input, chat_history, model_choice):
        if model_choice is None:
            model_choice = list(llama_models.keys())[0]
        updated_history = generate_chat(user_input, chat_history, model_choice)
        return "", updated_history

    txt_input.submit(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot])

    submit_btn = gr.Button("Submit")
    submit_btn.click(respond, [txt_input, chatbot, model_choice], [txt_input, chatbot])

# running Inference

In [None]:
demo.launch()