In [1]:
!pip install transformers accelerate bitsandbytes gradio

Collecting bitsandbytes
  Downloading bitsandbytes-0.45.5-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting gradio
  Downloading gradio-5.25.2-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.

In [2]:
# Step 2: Import libraries
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr

In [3]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [4]:
# Step 3: Load the fine-tuned model from your Google Drive
model_dir = "/content/drive/MyDrive/fine_tuned_science_gemma3/fine_tuned_science_gemma3"

In [5]:
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_dir)
model = AutoModelForCausalLM.from_pretrained(
    model_dir,
    torch_dtype=torch.float32,
    device_map="auto"
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [12]:
def generate_segmentation(prompt):
    formatted_prompt = (
        "<start_of_turn>user\n" + prompt + "<end_of_turn>\n"
        "<start_of_turn>assistant\n"
    )

    inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
    input_length = inputs.input_ids.shape[1]  # Get length of input tokens

    output = model.generate(
        **inputs,
        max_new_tokens=850,
        do_sample=False,
        temperature=0.3,
        top_p=0.9,
        top_k=80,
        eos_token_id=tokenizer.eos_token_id
    )

    # Slice the output to exclude input tokens
    generated_tokens = output[0, input_length:]

    # Decode only the generated portion
    generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True)

    return generated_text


In [11]:
# Cell 7: Create and launch Gradio interface
with gr.Blocks(theme="default") as demo:
    gr.Markdown("# Gemma Market Segmentation Generator")

    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Startup Idea",
                placeholder="Describe your startup idea here...",
                lines=3
            )
            generate_btn = gr.Button("Generate Market Segmentation")

    output_text = gr.Textbox(label="Generated Market Segmentation", lines=25)

    generate_btn.click(
        fn=generate_segmentation,
        inputs=input_text,
        outputs=output_text
    )

# Launch with share=True to get a public URL
demo.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://09e5c2e8442690f0fd.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [13]:
# user_prompt = "Give market segmentation for a startup idea: AI-based LeetCode platform"
prt = "Create a market segmentation table with 16 points for this startup idea: Startup Idea: An AI-based solution for Automated Lead Nurturing & CRM Integration that tracks interactions across email, social, and web to identify leads needing immediate follow-up, using AI-driven personalization to increase conversions and strengthen customer relationships."
formatted_prompt = (
    "<start_of_turn>user\n" + prt + "<end_of_turn>\n"
    "<start_of_turn>assistant\n"
)

inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)

# Step 4: Generate response
output = model.generate(
    **inputs,
    max_new_tokens=800,
    do_sample=False,
    temperature=0.2,
    top_p=1,
    eos_token_id=tokenizer.eos_token_id
)

# Step 5: Decode and print the result
response = tokenizer.decode(output[0], skip_special_tokens=True)
print(response)



user
Create a market segmentation table with 16 points for this startup idea: Startup Idea: An AI-based solution for Automated Lead Nurturing & CRM Integration that tracks interactions across email, social, and web to identify leads needing immediate follow-up, using AI-driven personalization to increase conversions and strengthen customer relationships.
assistant
[
  {
    "Title/Description": "End Users",
    "Small to Medium Enterprises (SMEs)": "Business owners, managers",
    "Marketing Teams in Large Corporations": "Marketing professionals",
    "Sales Teams in Various Industries": "Sales representatives",
    "Digital Marketing Agencies": "Agency owners and managers"
  },
  {
    "Title/Description": "Lead Nurturing Platforms",
    "Lead Management Software": "Users of lead management tools",
    "CRM Systems": "Companies using CRM for lead tracking",
    "Marketing Automation Tools": "Businesses integrating marketing automation"
  },
  {
    "Title/Description": "E-commerce Pla

In [10]:
import time

while True:
    time.sleep(60)
    print("Keeping session alive...")


Keeping session alive...
Keeping session alive...
Keeping session alive...
Keeping session alive...
Keeping session alive...
Keeping session alive...
Keeping session alive...
Keeping session alive...
Keeping session alive...


KeyboardInterrupt: 