In [2]:
import os
from dotenv import load_dotenv
from openai import OpenAI

In [3]:
load_dotenv(override=True)
openai_api_key = os.getenv('OPENAI_API_KEY')
openai=OpenAI(api_key=openai_api_key)

In [6]:
def generate_synthetic_data(prompt, rows):
        system_prompt = f"""
You are a synthetic dataset generator.

Create {rows} rows of realistic but FAKE data.
Return ONLY valid JSON list (no explanations, no comments).
Rules:
- Make values realistic
- No real names, real companies, or identifiable data
- Keep structure consistent across all rows
- Avoid nulls unless they make sense
"""
        user_prompt = f"""
Generate a synthetic dataset based on the following description:
{prompt}
"""
        response = openai.chat.completions.create(
            model="gpt-5-nano",
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt}
            ]
        )
        import json
        data = json.loads(response.choices[0].message.content)
        import pandas as pd

        df = pd.DataFrame(data)
        df.to_csv('synthetic_dataset.csv', index=False)
        return df

In [7]:
import gradio as gr
with gr.Blocks() as demo:
    gr.Markdown("# Synthetic Dataset Generator")
    prompt_input = gr.Textbox(label="Dataset Description", placeholder="Describe the dataset you want to generate...")
    rows_input = gr.Number(label="Number of Rows", value=10, precision=0)
    generate_button = gr.Button("Generate Dataset")
    output_table = gr.Dataframe(label="Generated Synthetic Dataset")

    generate_button.click(
        fn=generate_synthetic_data,
        inputs=[prompt_input, rows_input],
        outputs=output_table
    )
demo.launch()

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.


