In [None]:
!pip install torch torchvision torchaudio

!pip install accelerate

!pip install transformers

!pip install gradio


In [None]:
from huggingface_hub import notebook_login

from transformers import AutoModelForCausalLM, AutoTokenizer, set_seed

import torch

import gradio as gr



# Run this cell once to log in to your Hugging Face account

notebook_login()

In [None]:
model_path = "ibm-granite/granite-3.2-2b-instruct"
device = "cuda"
model = AutoModelForCausalLM.from_pretrained(
    model_path,
    device_map=device,
    torch_dtype=torch.bfloat16,
)
tokenizer = AutoTokenizer.from_pretrained(model_path)

# --- Enhanced Prediction Function with History ---
def generate_response(user_question, history):
    """
    This function takes a user question and chat history, returns model response and updated history.
    """
    if not user_question.strip():
        return "", history
    
    # Build conversation context from history
    conv = []
    for user_msg, bot_msg in history:
        conv.append({"role": "user", "content": user_msg})
        conv.append({"role": "assistant", "content": bot_msg})
    
    # Add current user question
    conv.append({"role": "user", "content": user_question})
    
    # Generate response
    input_ids = tokenizer.apply_chat_template(
        conv,
        return_tensors="pt",
        thinking=True,
        return_dict=True,
        add_generation_prompt=True
    ).to(device)
    
    set_seed(42)
    output = model.generate(
        **input_ids,
        max_new_tokens=8192,
    )
    
    prediction = tokenizer.decode(
        output[0, input_ids["input_ids"].shape[1]:],
        skip_special_tokens=True
    )
    
    # Update history with new conversation
    history.append((user_question, prediction))
    
    return "", history

def clear_history():
    """Clear the chat history"""
    return []

# --- Enhanced Gradio Interface ---
with gr.Blocks(
    title="Granite 3.2-2B-Instruct Chatbot",
    theme=gr.themes.Soft()
) as interface:
    
    gr.Markdown(
        """
        # 🤖 Granite 3.2-2B-Instruct Chatbot
        Ask questions and have a conversation with the IBM Granite model. Your chat history is preserved!
        """
    )
    
    with gr.Row():
        with gr.Column(scale=4):
            # Chat interface with history
            chatbot = gr.Chatbot(
                label="Chat History",
                height=500,
                show_copy_button=True,
                bubble_full_width=False
            )
            
            with gr.Row():
                user_input = gr.Textbox(
                    placeholder="Type your message here...",
                    label="Your Message",
                    lines=2,
                    scale=4
                )
                send_btn = gr.Button("Send", variant="primary", scale=1)
            
            with gr.Row():
                clear_btn = gr.Button("Clear History", variant="secondary")
                
        with gr.Column(scale=1):
            gr.Markdown(
                """
                ### 💡 Tips:
                - Chat history is maintained during your session
                - Use "Clear History" to start fresh
                - The model remembers previous messages in the conversation
                - Press Enter to send your message
                """
            )
    
    # Event handlers
    send_btn.click(
        generate_response,
        inputs=[user_input, chatbot],
        outputs=[user_input, chatbot]
    )
    
    user_input.submit(
        generate_response,
        inputs=[user_input, chatbot],
        outputs=[user_input, chatbot]
    )
    
    clear_btn.click(
        clear_history,
        outputs=[chatbot]
    )

# Launch the enhanced app
interface.launch(share=True, inbrowser=True)