In [1]:
!pip install gradio==5.42.0 transformers==4.55.3 torch==2.8.0+cu126 accelerate==1.10.0

      Successfully uninstalled transformers-4.55.2
Successfully installed transformers-4.55.3


In [2]:
import gradio as gr
from transformers import pipeline
import torch

# Initialize the pipeline
print("Loading model...")
pipe = pipeline(
    "text-generation",
    model="Savoxism/Qwen-2.5-3B-Instruct-Vietnamese-Legal-QA",
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
    device_map="auto" if torch.cuda.is_available() else None
)
print("Model loaded successfully!")

def generate_response(user_input, max_length=512, temperature=0.7, top_p=0.9):
    """
    Generate response from the Vietnamese Legal QA model
    """
    try:
        # Format the message
        messages = [
            {"role": "user", "content": user_input}
        ]

        # Generate response
        result = pipe(
            messages,
            max_new_tokens=max_length,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=pipe.tokenizer.eos_token_id
        )

        # Extract the generated text
        generated_text = result[0]['generated_text']

        # Get only the assistant's response (remove the user input)
        if isinstance(generated_text, list):
            # Find the assistant's response
            for message in generated_text:
                if message.get('role') == 'assistant':
                    return message['content']
        else:
            # If it's a string, try to extract the response after the user input
            return generated_text

    except Exception as e:
        return f"Error generating response: {str(e)}"

# Create Gradio interface
with gr.Blocks(
    title="Vietnamese Legal QA Assistant",
    theme=gr.themes.Soft(),
    css="""
    .gradio-container {
        max-width: 800px;
        margin: auto;
    }
    .header {
        text-align: center;
        margin-bottom: 30px;
    }
    """
) as demo:

    gr.HTML("""
    <div class="header">
        <h1>🏛️ Vietnamese Legal QA Assistant</h1>
        <p>Powered by Qwen-2.5-3B-Instruct-Vietnamese-Legal-QA</p>
    </div>
    """)

    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(
                label="Your Question",
                placeholder="Ask your Vietnamese legal question here...",
                lines=3,
                max_lines=10
            )

            with gr.Accordion("Advanced Settings", open=False):
                max_length = gr.Slider(
                    minimum=50,
                    maximum=1024,
                    value=512,
                    step=50,
                    label="Max Response Length"
                )
                temperature = gr.Slider(
                    minimum=0.1,
                    maximum=2.0,
                    value=0.7,
                    step=0.1,
                    label="Temperature (Creativity)"
                )
                top_p = gr.Slider(
                    minimum=0.1,
                    maximum=1.0,
                    value=0.9,
                    step=0.05,
                    label="Top-p (Nucleus Sampling)"
                )

            submit_btn = gr.Button("Generate Response", variant="primary", size="lg")
            clear_btn = gr.Button("Clear", variant="secondary")

    with gr.Row():
        output = gr.Textbox(
            label="Response",
            lines=10,
            max_lines=20,
            interactive=False
        )

    # Event handlers
    submit_btn.click(
        fn=generate_response,
        inputs=[user_input, max_length, temperature, top_p],
        outputs=output
    )

    user_input.submit(
        fn=generate_response,
        inputs=[user_input, max_length, temperature, top_p],
        outputs=output
    )

    clear_btn.click(
        fn=lambda: ("", ""),
        outputs=[user_input, output]
    )

    # Example questions
    gr.Examples(
        examples=[
            ["What are the basic labor rights in Vietnam?"],
            ["How to register a business in Vietnam?"],
            ["What are the penalties for traffic violations?"],
            ["Explain property ownership laws in Vietnam"],
            ["What are the requirements for marriage registration?"]
        ],
        inputs=user_input,
        label="Example Questions"
    )

# Launch the interface
if __name__ == "__main__":
    # Launch with public link
    demo.launch(
        share=True,  # This creates a public link
        server_name="0.0.0.0",
        server_port=7860,
        show_error=True,
        debug=False
    )

Loading model...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Device set to use cuda:0


Model loaded successfully!
Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://302463c1bd59d619a7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
