/content/drive/MyDrive/AdProject/ads_dataset.csv


In [27]:
# =========================
# Gemini 2.5 Flash Ad Script Generator (Updated)
# =========================

# Set Gemini API key
%env GEMINI_API_KEY=AIzaSyAIi4pRRDxJKwwhBsTUChE3pEKFoIbtjE8

import os
import json
import re
import pandas as pd
import google.generativeai as genai
from datetime import datetime
import glob, shutil

# Configure Gemini API
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# ---------- Directories ----------
BASE_OUTPUT_DIR = "/content/drive/MyDrive/AdProject/data/processed"
BASE_SCENE_DIR = "/content/drive/MyDrive/AdProject/data/scene_input"
AD_SCRIPT_BASE = os.path.join(BASE_OUTPUT_DIR, "ad_scripts")
os.makedirs(AD_SCRIPT_BASE, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
OUTPUT_DIR = os.path.join(AD_SCRIPT_BASE, timestamp)
SCENE_DIR = os.path.join(BASE_SCENE_DIR, timestamp)
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(SCENE_DIR, exist_ok=True)

# ---------- Delete Previous Outputs ----------
delete_prev = input("Do you want to delete previous ad script outputs? (yes/no): ").strip().lower()
if delete_prev == "yes":
    for folder in glob.glob(os.path.join(AD_SCRIPT_BASE, "*")):
        if os.path.isdir(folder) and folder != OUTPUT_DIR:
            shutil.rmtree(folder)
    for folder in glob.glob(os.path.join(BASE_SCENE_DIR, "*")):
        if os.path.isdir(folder) and folder != SCENE_DIR:
            shutil.rmtree(folder)
    print("✅ Old ad script outputs deleted, preprocessing data preserved.")
else:
    print("ℹ️ Previous outputs kept.")

# ---------- Script Parser ----------
def parse_ad_script(raw_script, product, ad_type, text_on_screen):
    structured = {
        "Product": product,
        "AdType": ad_type,
        "Headline": "",
        "AdCopy": "",
        "Scenes": [],
        "Voiceover": {"English": "", "Hindi": ""},
        "TextOnScreen": text_on_screen
    }
    lines = raw_script.splitlines()
    current_section = None
    scene_counter = 0
    for line in lines:
        line = line.strip()
        if not line:
            continue
        if line.lower().startswith("headline:"):
            structured["Headline"] = line.split(":",1)[1].strip()
            current_section = "headline"
        elif line.lower().startswith("ad copy:"):
            structured["AdCopy"] = line.split(":",1)[1].strip()
            current_section = "adcopy"
        elif line.lower().startswith("voiceover:"):
            structured["Voiceover"]["English"] = line.split(":",1)[1].strip()
            current_section = "voiceover"
        elif line.lower().startswith("hindi voiceover:") or line.lower().startswith("(hindi)"):
            structured["Voiceover"]["Hindi"] = line.split(":",1)[1].strip()
            current_section = "voiceover_hindi"
        else:
            scene_match = re.match(r'(scene\s*\d+|step\s*\d+|^\d+\.)[:\-]?\s*(.*)', line, re.I)
            if scene_match:
                scene_counter += 1
                desc = scene_match.group(2).strip() or line
                structured["Scenes"].append({"id": scene_counter, "description": desc, "location": "", "emotion": "", "Voiceover":{"English":"","Hindi":""}})
                current_section = "scenes"
            else:
                if current_section == "adcopy":
                    structured["AdCopy"] += " " + line
                elif current_section == "voiceover":
                    structured["Voiceover"]["English"] += " " + line
                elif current_section == "voiceover_hindi":
                    structured["Voiceover"]["Hindi"] += " " + line
                elif current_section == "scenes" and scene_counter > 0:
                    structured["Scenes"][-1]["description"] += " " + line
    return structured

# ---------- Scene & Script Enrichment ----------
def enrich_ad_script(product, audience, emotion, ad_type="General", text_on_screen="", language="English"):
    model = genai.GenerativeModel("gemini-2.5-flash")
    prompt = f"""
Generate a full advertisement script for the product "{product}".
Target Audience: {audience}
Emotion/Tone: {emotion}
Ad Type: {ad_type}
Text on Screen / Tagline: {text_on_screen}
Language: {language}

Include the following clearly labeled sections:
- Headline: catchy and concise
- Ad Copy: 1-2 sentences, persuasive
- Scenes: At least 3, each with:
    - Scene X: Description of visuals and actions
    - Location: Where it takes place
    - Emotion: How characters feel
    - Voiceover: English & Hindi
- Main Voiceover: English & Hindi
- Text on Screen

Format each section with clear labels (e.g., 'Headline:', 'Ad Copy:', 'Scene 1:', 'Voiceover:', etc.)
"""
    try:
        response = model.generate_content(prompt)
        raw_script = response.text
        print(f"--- Raw script for {product} ---\n{raw_script}\n--- End ---\n")
        structured = parse_ad_script(raw_script, product, ad_type, text_on_screen)
        # Ensure at least 3 scenes
        if len(structured["Scenes"]) < 3:
            for i in range(3 - len(structured["Scenes"])):
                structured["Scenes"].append({"id": len(structured["Scenes"])+1, "description": f"Creative scene for {product}", "location": "Generic location", "emotion": "Neutral", "Voiceover":{"English":"","Hindi":""}})
        return structured, raw_script
    except Exception as e:
        print(f"⚠️ Failed to generate script for {product}: {e}")
        return {
            "Product": product,
            "AdType": ad_type,
            "Headline": "",
            "AdCopy": "",
            "Scenes": [
                {"id":1,"description":f"Creative scene for {product}","location":"Generic location","emotion":"Neutral","Voiceover":{"English":"","Hindi":""}},
                {"id":2,"description":f"Creative scene for {product}","location":"Generic location","emotion":"Neutral","Voiceover":{"English":"","Hindi":""}},
                {"id":3,"description":f"Creative scene for {product}","location":"Generic location","emotion":"Neutral","Voiceover":{"English":"","Hindi":""}}
            ],
            "Voiceover":{"English":"","Hindi":""},
            "TextOnScreen": text_on_screen
        }, ""

# ---------- Generate & Save ----------
def generate_and_save(product, audience, emotion, ad_type="General", text_on_screen="", language="English"):
    structured, raw_script = enrich_ad_script(product, audience, emotion, ad_type, text_on_screen, language)
    txt_path = os.path.join(OUTPUT_DIR, f"{product.lower().replace(' ','_')}_script.txt")
    json_path = os.path.join(OUTPUT_DIR, f"{product.lower().replace(' ','_')}_script.json")
    scene_path = os.path.join(SCENE_DIR, f"{product.lower().replace(' ','_')}_scene.json")

    # Save raw txt
    with open(txt_path, "w", encoding="utf-8") as f:
        f.write(raw_script)
    # Save structured JSON
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(structured, f, indent=4, ensure_ascii=False)
    # Save scenes separately
    with open(scene_path, "w", encoding="utf-8") as f:
        json.dump(structured["Scenes"], f, indent=4, ensure_ascii=False)

    print(f"✅ Saved script and scenes for '{product}'")
    return structured

# ---------- Run Mode ----------
print(f"Outputs saved to:\n{OUTPUT_DIR}\n{SCENE_DIR}\n")
mode = input("Choose Mode:\n1 = Personalized Input\n2 = Bulk from Dataset\nEnter choice: ").strip()
all_scripts = []

if mode=="1":
    product = input("Product Name: ")
    audience = input("Target Audience: ")
    emotion = input("Emotion/Tone: ")
    ad_type = input("Ad Type (General/Video/Print/etc.): ") or "General"
    text_on_screen = input("Text on Screen / Tagline: ") or ""
    language = input("Language (default English): ") or "English"
    generate_and_save(product, audience, emotion, ad_type, text_on_screen, language)

elif mode=="2":
    dataset_path = input("Dataset path (CSV or Excel): ")
    df = pd.read_csv(dataset_path) if dataset_path.endswith(".csv") else pd.read_excel(dataset_path)
    for idx, row in df.iterrows():
        product = row.get("Product", f"Product_{idx}")
        audience = row.get("TargetAudience", "General Audience")
        emotion = row.get("EmotionTone", "Inspiring")
        ad_type = row.get("AdType", "General")
        text_on_screen = row.get("TextOnScreen", "")
        language = row.get("Language", "English")
        try:
            structured = generate_and_save(product, audience, emotion, ad_type, text_on_screen, language)
            all_scripts.append(structured)
        except Exception as e:
            print(f"❌ Failed for {product}: {e}")
    # Save merged JSON for all scripts
    merged_path = os.path.join(SCENE_DIR, "all_scripts.json")
    with open(merged_path, "w", encoding="utf-8") as f:
        json.dump(all_scripts, f, indent=4, ensure_ascii=False)
    print(f"\n📦 Bulk generation completed! All scripts saved at {merged_path}")


env: GEMINI_API_KEY=AIzaSyAIi4pRRDxJKwwhBsTUChE3pEKFoIbtjE8
Do you want to delete previous ad script outputs? (yes/no): no
ℹ️ Previous outputs kept.
Outputs saved to:
/content/drive/MyDrive/AdProject/data/processed/ad_scripts/2025-09-02_12-06-45
/content/drive/MyDrive/AdProject/data/scene_input/2025-09-02_12-06-45

Choose Mode:
1 = Personalized Input
2 = Bulk from Dataset
Enter choice: 1
Product Name: Cadbury Dairy Milk
Target Audience: Kids and young adults
Emotion/Tone: Joyful, indulgent
Ad Type (General/Video/Print/etc.): TV Commercial
Text on Screen / Tagline: Share the Happiness
Language (default English): english


ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 6305.62ms
ERROR:tornado.access:503 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 2149.22ms


--- Raw script for Cadbury Dairy Milk ---
Here is the full advertisement script for Cadbury Dairy Milk:

---

**Headline:** Unwrap Pure Happiness

**Ad Copy:**
Experience the smooth, creamy taste of Cadbury Dairy Milk, a moment of pure bliss in every bite. It's more than just chocolate; it's a feeling of joy waiting to be shared.

---

**Scene 1:**
*   **Description of visuals and actions:** Two young kids (around 8-10 years old), a boy and a girl, are sitting on a park bench under a sunny sky. The boy unwraps a large Cadbury Dairy Milk bar, its iconic purple wrapper gleaming. He breaks off a square and offers it to the girl. They both take a bite simultaneously, their eyes widening in delight, followed by big, genuine smiles at each other.
*   **Location:** A vibrant, sunny park
*   **Emotion:** Joy, friendship, contentment, innocent delight
*   **Voiceover:**
    *   **English:** (Warm, friendly) "Looks like someone's found the secret to happiness. And it's even better when shared!"


In [29]:
# =========================
# Gemini 2.5 Flash Ad Script Generator (Full Updated)
# =========================

# Set Gemini API key
%env GEMINI_API_KEY=AIzaSyAIi4pRRDxJKwwhBsTUChE3pEKFoIbtjE8

import os
import json
import re
import pandas as pd
import google.generativeai as genai
from datetime import datetime
import glob, shutil

# Configure Gemini API
genai.configure(api_key=os.getenv("GEMINI_API_KEY"))

# ---------- Directories ----------
BASE_OUTPUT_DIR = "/content/drive/MyDrive/AdProject/data/processed"
BASE_SCENE_DIR = "/content/drive/MyDrive/AdProject/data/scene_input"
AD_SCRIPT_BASE = os.path.join(BASE_OUTPUT_DIR, "ad_scripts")
os.makedirs(AD_SCRIPT_BASE, exist_ok=True)

timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
OUTPUT_DIR = os.path.join(AD_SCRIPT_BASE, timestamp)
SCENE_DIR = os.path.join(BASE_SCENE_DIR, timestamp)
os.makedirs(OUTPUT_DIR, exist_ok=True)
os.makedirs(SCENE_DIR, exist_ok=True)

# ---------- Delete Previous Outputs ----------
delete_prev = input("Do you want to delete previous ad script outputs? (yes/no): ").strip().lower()
if delete_prev == "yes":
    for folder in glob.glob(os.path.join(AD_SCRIPT_BASE, "*")):
        if os.path.isdir(folder) and folder != OUTPUT_DIR:
            shutil.rmtree(folder)
    for folder in glob.glob(os.path.join(BASE_SCENE_DIR, "*")):
        if os.path.isdir(folder) and folder != SCENE_DIR:
            shutil.rmtree(folder)
    print("✅ Old ad script outputs deleted, preprocessing data preserved.")
else:
    print("ℹ️ Previous outputs kept.")

# ---------- Script Parser ----------
def parse_ad_script(raw_script, product, ad_type, text_on_screen):
    structured = {
        "Product": product,
        "AdType": ad_type,
        "Headline": "",
        "AdCopy": "",
        "Scenes": [],
        "Voiceover": {"English": "", "Hindi": ""},
        "TextOnScreen": text_on_screen
    }
    lines = raw_script.splitlines()
    current_section = None
    scene_counter = 0
    for line in lines:
        line = line.strip()
        if not line:
            continue
        if line.lower().startswith("headline:"):
            structured["Headline"] = line.split(":",1)[1].strip()
            current_section = "headline"
        elif line.lower().startswith("ad copy:"):
            structured["AdCopy"] = line.split(":",1)[1].strip()
            current_section = "adcopy"
        elif re.match(r'scene\s*\d+:', line.lower()):
            scene_counter += 1
            structured["Scenes"].append({
                "id": scene_counter,
                "description": "",
                "location": "",
                "emotion": "",
                "Voiceover": {"English": "", "Hindi": ""}
            })
            current_section = "scenes"
        elif line.lower().startswith("description:") and scene_counter>0:
            structured["Scenes"][-1]["description"] = line.split(":",1)[1].strip()
        elif line.lower().startswith("location:") and scene_counter>0:
            structured["Scenes"][-1]["location"] = line.split(":",1)[1].strip()
        elif line.lower().startswith("emotion:") and scene_counter>0:
            structured["Scenes"][-1]["emotion"] = line.split(":",1)[1].strip()
        elif line.lower().startswith("voiceover english:") and scene_counter>0:
            structured["Scenes"][-1]["Voiceover"]["English"] = line.split(":",1)[1].strip()
        elif line.lower().startswith("voiceover hindi:") and scene_counter>0:
            structured["Scenes"][-1]["Voiceover"]["Hindi"] = line.split(":",1)[1].strip()
        elif line.lower().startswith("main voiceover english:"):
            structured["Voiceover"]["English"] = line.split(":",1)[1].strip()
        elif line.lower().startswith("main voiceover hindi:"):
            structured["Voiceover"]["Hindi"] = line.split(":",1)[1].strip()
        elif line.lower().startswith("text on screen:"):
            structured["TextOnScreen"] = line.split(":",1)[1].strip()
        else:
            # Append continuation lines
            if current_section=="adcopy":
                structured["AdCopy"] += " " + line
            elif current_section=="headline":
                structured["Headline"] += " " + line
            elif current_section=="scenes" and scene_counter>0:
                structured["Scenes"][-1]["description"] += " " + line
    return structured

# ---------- Scene & Script Enrichment ----------
def enrich_ad_script(product, audience, emotion, ad_type="General", text_on_screen="", language="English"):
    model = genai.GenerativeModel("gemini-2.5-flash")
    prompt = f"""
Generate a full advertisement script for the product "{product}".
Target Audience: {audience}
Emotion/Tone: {emotion}
Ad Type: {ad_type}
Text on Screen / Tagline: {text_on_screen}
Language: {language}

Format the output EXACTLY as:

Headline: <Catchy headline>
Ad Copy: <1-2 persuasive sentences>
Scene 1:
  Description: <Visuals and actions>
  Location: <Location>
  Emotion: <Character emotions>
  Voiceover English: <English voiceover>
  Voiceover Hindi: <Hindi voiceover>
Scene 2:
  Description: ...
  Location: ...
  Emotion: ...
  Voiceover English: ...
  Voiceover Hindi: ...
Scene 3:
  Description: ...
  Location: ...
  Emotion: ...
  Voiceover English: ...
  Voiceover Hindi: ...
Main Voiceover English: <English>
Main Voiceover Hindi: <Hindi>
Text on Screen: <Text displayed>
"""
    try:
        response = model.generate_content(prompt)
        raw_script = response.text
        print(f"--- Raw script for {product} ---\n{raw_script}\n--- End ---\n")
        structured = parse_ad_script(raw_script, product, ad_type, text_on_screen)
        # Ensure at least 3 scenes with default values if missing
        while len(structured["Scenes"]) < 3:
            structured["Scenes"].append({
                "id": len(structured["Scenes"])+1,
                "description": f"Creative scene for {product}",
                "location": "Generic location",
                "emotion": "Neutral",
                "Voiceover": {"English":"","Hindi":""}
            })
        return structured, raw_script
    except Exception as e:
        print(f"⚠️ Failed to generate script for {product}: {e}")
        return {
            "Product": product,
            "AdType": ad_type,
            "Headline": "",
            "AdCopy": "",
            "Scenes": [
                {"id":1,"description":f"Creative scene for {product}","location":"Generic location","emotion":"Neutral","Voiceover":{"English":"","Hindi":""}},
                {"id":2,"description":f"Creative scene for {product}","location":"Generic location","emotion":"Neutral","Voiceover":{"English":"","Hindi":""}},
                {"id":3,"description":f"Creative scene for {product}","location":"Generic location","emotion":"Neutral","Voiceover":{"English":"","Hindi":""}}
            ],
            "Voiceover":{"English":"","Hindi":""},
            "TextOnScreen": text_on_screen
        }, ""

# ---------- Generate & Save ----------
def generate_and_save(product, audience, emotion, ad_type="General", text_on_screen="", language="English"):
    structured, raw_script = enrich_ad_script(product, audience, emotion, ad_type, text_on_screen, language)
    txt_path = os.path.join(OUTPUT_DIR, f"{product.lower().replace(' ','_')}_script.txt")
    json_path = os.path.join(OUTPUT_DIR, f"{product.lower().replace(' ','_')}_script.json")
    scene_path = os.path.join(SCENE_DIR, f"{product.lower().replace(' ','_')}_scene.json")

    # Save raw txt
    with open(txt_path, "w", encoding="utf-8") as f:
        f.write(raw_script)
    # Save structured JSON
    with open(json_path, "w", encoding="utf-8") as f:
        json.dump(structured, f, indent=4, ensure_ascii=False)
    # Save scenes separately
    with open(scene_path, "w", encoding="utf-8") as f:
        json.dump(structured["Scenes"], f, indent=4, ensure_ascii=False)

    print(f"✅ Saved script and scenes for '{product}'")
    return structured

# ---------- Run Mode ----------
print(f"Outputs saved to:\n{OUTPUT_DIR}\n{SCENE_DIR}\n")
mode = input("Choose Mode:\n1 = Personalized Input\n2 = Bulk from Dataset\nEnter choice: ").strip()
all_scripts = []

if mode=="1":
    product = input("Product Name: ")
    audience = input("Target Audience: ")
    emotion = input("Emotion/Tone: ")
    ad_type = input("Ad Type (General/TV/Video/Print/etc.): ") or "General"
    text_on_screen = input("Text on Screen / Tagline: ") or ""
    language = input("Language (default English): ") or "English"
    generate_and_save(product, audience, emotion, ad_type, text_on_screen, language)

elif mode=="2":
    dataset_path = input("Dataset path (CSV or Excel): ")
    df = pd.read_csv(dataset_path) if dataset_path.endswith(".csv") else pd.read_excel(dataset_path)
    for idx, row in df.iterrows():
        product = row.get("Product", f"Product_{idx}")
        audience = row.get("TargetAudience", "General Audience")
        emotion = row.get("EmotionTone", "Inspiring")
        ad_type = row.get("AdType", "General")
        text_on_screen = row.get("TextOnScreen", "")
        language = row.get("Language", "English")
        try:
            structured = generate_and_save(product, audience, emotion, ad_type, text_on_screen, language)
            all_scripts.append(structured)
        except Exception as e:
            print(f"❌ Failed for {product}: {e}")
    # Save merged JSON for all scripts
    merged_path = os.path.join(SCENE_DIR, "all_scripts.json")
    with open(merged_path, "w", encoding="utf-8") as f:
        json.dump(all_scripts, f, indent=4, ensure_ascii=False)
    print(f"\n📦 Bulk generation completed! All scripts saved at {merged_path}")


env: GEMINI_API_KEY=AIzaSyAIi4pRRDxJKwwhBsTUChE3pEKFoIbtjE8
Do you want to delete previous ad script outputs? (yes/no): no
ℹ️ Previous outputs kept.
Outputs saved to:
/content/drive/MyDrive/AdProject/data/processed/ad_scripts/2025-09-02_12-22-52
/content/drive/MyDrive/AdProject/data/scene_input/2025-09-02_12-22-52

Choose Mode:
1 = Personalized Input
2 = Bulk from Dataset
Enter choice: 2
Dataset path (CSV or Excel): /content/drive/MyDrive/AdProject/ads_dataset.csv
--- Raw script for Skybag ---
Headline: Where Dreams Take Flight.
Ad Copy: Every journey begins with a dream. Skybag is more than just luggage; it's your partner in every aspiration, built for every path you choose.

Scene 1:
  Description: A young woman, brightly dressed, confidently pulls her sleek Skybag trolley through a bustling, sunlit airport terminal. She pauses, looking out a large window at planes taking off, a hopeful smile on her face. She checks her boarding pass, then looks forward with determination.
  Location

In [None]:
AIzaSyB3eo_VGTDnQJBau8MpFgq5xyCA2XzNpT4