# Models

In [1]:
!pip install -q requests torch bitsandbytes transformers sentencepiece accelerate

In [2]:
!pip install gradio



In [3]:
from google.colab import userdata
from huggingface_hub import login
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer, BitsAndBytesConfig
import torch
import gradio as gr

In [4]:
hf_token = userdata.get('HF_TOKEN')
login(hf_token, add_to_git_credential=True)

In [5]:
# Model options

MODEL_OPTIONS = {
    "Meta-LLaMA 3.1": "meta-llama/Meta-Llama-3.1-8B-Instruct",
    "Phi-3 Mini": "microsoft/Phi-3-mini-4k-instruct",
    "Gemma 2B IT": "google/gemma-2-2b-it",
    "Qwen2 7B": "Qwen/Qwen2-7B-Instruct",
}

DEFAULT_MODEL = "Meta-LLaMA 3.1"

In [6]:
# Quantization Config

quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_quant_type="nf4"
)

In [7]:
# Function to generate synthethic data

def generate_synthetic_data(model_name, data_description, dataset_type, output_format):
    model_id = MODEL_OPTIONS[model_name]
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
    tokenizer.pad_token = tokenizer.eos_token

    messages = [
        {"role": "system", "content": f"You are a data generator AI specialized in generating synthetic {dataset_type} data."},
        {"role": "user", "content": f"Generate synthetic {dataset_type.lower()} data for: {data_description}. Output format: {output_format}."},
    ]

    input_ids = tokenizer.apply_chat_template(
        messages, return_tensors="pt", add_generation_prompt=True
    ).to("cuda")

    model = AutoModelForCausalLM.from_pretrained(
        model_id, device_map="auto", quantization_config=quant_config
    )

    output_ids = model.generate(input_ids, max_new_tokens=512)

    # Extract only the generated tokens (excluding the prompt)
    generated_ids = output_ids[:, input_ids.shape[1]:]

    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

    # Clean up
    del tokenizer, model, input_ids, output_ids
    torch.cuda.empty_cache()

    return generated_text.strip()


In [9]:
# Gradio Interface

with gr.Blocks() as demo:
    gr.Markdown("# 🧪 Synthetic Data Generator")
    gr.Markdown("Select a model, dataset type, output format, and describe the data you want to generate.")

    model_dropdown = gr.Dropdown(
        choices=list(MODEL_OPTIONS.keys()), label="Select Model"
    )
    dataset_type_dropdown = gr.Dropdown(
        choices=["Tabular", "Time-series", "Text"], label="Dataset Modality"
    )
    output_format_dropdown = gr.Dropdown(
        choices=["JSON", "CSV", "Markdown"], label="Output Format"
    )
    data_input = gr.Textbox(
        lines=3, placeholder="Describe the data you need...", label="Data Description"
    )
    generate_button = gr.Button("Generate Data")
    output_box = gr.Textbox(label="Generated Synthetic Data")

    generate_button.click(
        fn=generate_synthetic_data,
        inputs=[model_dropdown, data_input, dataset_type_dropdown, output_format_dropdown],
        outputs=output_box,
    )

demo.launch(debug=True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://40d31a097c5ac06f64.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://40d31a097c5ac06f64.gradio.live


