In [None]:
# Install the necessary libraries
!pip install -q -U google-generativeai pydantic pandas xgboost

import google.generativeai as genai
import re
import sqlite3
import pandas as pd
import json
from datetime import datetime
from pydantic import BaseModel, Field, field_validator, ValidationError
from google.colab import userdata

# Setup Gemini API (Ensure GOOGLE_API_KEY is saved in the Colab 'Key' menu)
try:
    GOOGLE_API_KEY = userdata.get('GOOGLE_API_KEY')
    genai.configure(api_key=GOOGLE_API_KEY)

    # Quick note on DEV-23: You mentioned Bedrock in the task list, but we are using Gemini.
    # We can configure Gemini's native safety settings later to handle toxicity!
    model = genai.GenerativeModel('gemini-2.5-flash')
    print("‚úÖ Setup complete! Gemini is locked and loaded.")
except Exception as e:
    print(f"‚ùå Error setting up Gemini: {e}")

‚úÖ Setup complete! Gemini is locked and loaded.


In [None]:
# --- DEV-20: Policy configuration via JSON (tone, structure) ---

# Simulating a JSON configuration file you might pull from your backend
policy_json_string = """
{
    "tone": "Persuasive yet Professional",
    "forbidden_words": ["winner", "lottery", "urgent", "cash"],
    "min_length": 50,
    "max_length": 300,
    "required_structure": ["Greeting", "Value Proposition", "Call to Action"]
}
"""

# Load the JSON policy into a Python dictionary
try:
    POLICY_CONFIG = json.loads(policy_json_string)
    print("‚úÖ DEV-20: Policy Configuration Loaded Successfully!")
    print(f"Current Tone: {POLICY_CONFIG['tone']}")
    print(f"Forbidden Words: {', '.join(POLICY_CONFIG['forbidden_words'])}")
except json.JSONDecodeError as e:
    print(f"‚ùå Error loading JSON policy: {e}")

‚úÖ DEV-20: Policy Configuration Loaded Successfully!
Current Tone: Persuasive yet Professional
Forbidden Words: winner, lottery, urgent, cash


In [None]:
# --- DEV-21: Post-generation validation (Pydantic + Regex) ---

class CampaignMessage(BaseModel):
    target_cluster: str
    message_content: str

    @field_validator('message_content')
    def enforce_policy(cls, value: str):
        """Validates the generated message against POLICY_CONFIG rules."""

        # 1. Validate Length
        min_len = POLICY_CONFIG.get("min_length", 0)
        max_len = POLICY_CONFIG.get("max_length", 500)

        if len(value) < min_len:
            raise ValueError(f"Message too short ({len(value)} chars). Minimum is {min_len}.")
        if len(value) > max_len:
            raise ValueError(f"Message too long ({len(value)} chars). Maximum is {max_len}.")

        # 2. Validate Forbidden Words (Regex)
        forbidden_words = POLICY_CONFIG.get("forbidden_words", [])
        if forbidden_words:
            # Create a regex pattern to match whole words, case-insensitive
            pattern = re.compile(r'\b(?:' + '|'.join(map(re.escape, forbidden_words)) + r')\b', re.IGNORECASE)
            found_words = pattern.findall(value)

            if found_words:
                # Deduplicate the found words for the error message
                unique_found = list(set([w.lower() for w in found_words]))
                raise ValueError(f"Policy violation! Forbidden words detected: {', '.join(unique_found)}")

        # 3. Basic Structure Check (Ensuring Call to Action exists)
        # This is a simple regex check; for complex structure, you'd use LLM evaluation.
        if not re.search(r'(?i)(click|buy|shop|visit|subscribe|discover|learn more)', value):
             raise ValueError("Message lacks a clear Call to Action (CTA).")

        return value

# --- Testing the Validation ---

print("üß™ Running Validation Tests...")

# Test 1: Should PASS
try:
    good_msg = CampaignMessage(
        target_cluster="Cluster 1: High-Value Loyalists",
        message_content="Hello there! We wanted to thank you for your continued support. Discover our new premium tier designed just for you."
    )
    print("‚úÖ Test 1 Passed: Valid message accepted.")
except ValidationError as e:
    print(f"‚ùå Test 1 Failed: {e}")

# Test 2: Should FAIL (Contains forbidden word "urgent")
try:
    bad_msg = CampaignMessage(
        target_cluster="Cluster 2: Inactive Users",
        message_content="Urgent! Come back and shop with us right now to claim your prize. Click here to learn more."
    )
    print("‚úÖ Test 2 Failed (Unexpectedly passed)")
except ValidationError as e:
    print(f"‚úÖ Test 2 Passed: Caught forbidden word correctly.\n   Error: {e.errors()[0]['msg']}")

üß™ Running Validation Tests...
‚úÖ Test 1 Passed: Valid message accepted.
‚úÖ Test 2 Passed: Caught forbidden word correctly.
   Error: Value error, Policy violation! Forbidden words detected: urgent


In [None]:
# --- DEV-22: Compliance & Legal Rules Engine ---
def apply_compliance_rules(message: str, channel: str) -> str:
    """Appends legally required text based on the marketing channel."""
    if channel.upper() == "SMS":
        legal_footer = "\nReply STOP to opt out."
    elif channel.upper() == "EMAIL":
        legal_footer = "\n\nTo unsubscribe, visit our preference center."
    else:
        legal_footer = ""

    return message + legal_footer


# --- DEV-23: Toxicity & Guardrails (Using Gemini Safety Settings) ---
from google.generativeai.types import HarmCategory, HarmBlockThreshold

safety_settings = {
    HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
    HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_LOW_AND_ABOVE,
}


# --- The Main Generation Service ---
def generate_cluster_message(cluster_name: str, cluster_description: str, channel: str = "SMS"):
    print(f"üöÄ Generating message for: {cluster_name}...")

    # 1. Construct the Prompt utilizing the DEV-20 Policy
    prompt = f"""
    You are an expert marketing copywriter. Write a short campaign message for a customer segment.

    Target Audience (Cluster): {cluster_name} - {cluster_description}
    Channel: {channel}

    Strict Rules:
    1. Tone must be: {POLICY_CONFIG['tone']}
    2. Do NOT use these words: {', '.join(POLICY_CONFIG['forbidden_words'])}
    3. Include a clear Call to Action (like "Shop", "Discover", "Click").
    4. Keep it between {POLICY_CONFIG['min_length']} and {POLICY_CONFIG['max_length']} characters.

    Output ONLY the message text.
    """

    try:
        # 2. Call Gemini
        response = model.generate_content(prompt, safety_settings=safety_settings)
        raw_message = response.text.strip()

        # 3. Apply DEV-22 Compliance Engine
        compliant_message = apply_compliance_rules(raw_message, channel)

        # 4. Apply DEV-21 Pydantic Validation
        validated_msg = CampaignMessage(
            target_cluster=cluster_name,
            message_content=compliant_message
        )

        print(f"‚úÖ Success!\nGenerated Message:\n{validated_msg.message_content}\n" + "-"*40)
        return validated_msg

    except ValidationError as e:
        print(f"‚ùå Validation Error for {cluster_name}:\n{e}\n")
    except Exception as e:
        print(f"‚ùå API/Generation Error: {e}\n")


# --- Let's Test It with some Sample K-Means Clusters ---
sample_clusters = {
    "Cluster 0": "High-Value Loyalists - Spends frequently, high average order value.",
    "Cluster 1": "Bargain Hunters - Only buys during sales, highly price-sensitive.",
    "Cluster 2": "Churn Risk - Hasn't made a purchase in 90 days."
}

for name, desc in sample_clusters.items():
    generate_cluster_message(name, desc, channel="SMS")

üöÄ Generating message for: Cluster 0...
‚úÖ Success!
Generated Message:
Thank you for your continued loyalty. Enjoy a special offer tailored just for you. Click here to unlock your exclusive benefit today! [Link]
Reply STOP to opt out.
----------------------------------------
üöÄ Generating message for: Cluster 1...
‚úÖ Success!
Generated Message:
Smart shoppers, rejoice! Our highly anticipated sale has begun. Secure exceptional value across all categories. Discover your savings: [Link]
Reply STOP to opt out.
----------------------------------------
üöÄ Generating message for: Cluster 2...
‚úÖ Success!
Generated Message:
Hello! We've missed you. It's been 90 days since your last purchase, and we have new arrivals waiting. Rediscover your favorites today! Shop now: [Link]
Reply STOP to opt out.
----------------------------------------


In [None]:
# --- DEV-22/23: Hyper-Personalized Campaign Generation ---

def generate_personalized_msg(row):
    """
    Takes a row from your uploaded CSV and generates a custom message.
    """
    # Mapping your specific CSV columns
    item = row.get('item', 'our collection')
    category = row.get('item_category', 'general')
    discount = row.get('discount_given', 0)
    channel = row.get('preferred_channel', 'SMS')

    # Optional: If you have a 'Cluster' column from your K-Means notebook, use it.
    # Otherwise, we'll let the AI infer the context from the purchase history.
    cluster = row.get('Cluster', 'Valued Customer')

    prompt = f"""
    You are an AI Marketing Strategist for a premium brand.
    Write a hyper-personalized {channel} campaign message.

    CUSTOMER DATA:
    - Recent Purchase: {item} ({category})
    - Past Discount Received: {discount}%
    - Segment: {cluster}

    TASK:
    1. Acknowledge their recent interest in {item}.
    2. Since they've used a {discount}% discount before, offer a similar relevant incentive.
    3. Maintain a {POLICY_CONFIG['tone']} tone.
    4. NO forbidden words: {', '.join(POLICY_CONFIG['forbidden_words'])}.
    5. Length must be between {POLICY_CONFIG['min_length']} and {POLICY_CONFIG['max_length']} chars.
    6. End with a clear Call to Action (CTA).

    Output ONLY the message text.
    """

    try:
        # Generate with Gemini 2.5-flash for high-quality reasoning
        response = model.generate_content(prompt, safety_settings=safety_settings)
        raw_text = response.text.strip()

        # Apply Channel Compliance (DEV-22)
        compliant_text = apply_compliance_rules(raw_text, channel)

        # Validate against Pydantic model (DEV-21)
        validated = CampaignMessage(
            target_cluster=str(cluster),
            message_content=compliant_text
        )

        return validated.message_content

    except Exception as e:
        return f"Error: {e}"

# --- EXECUTION: Run this to show your mentor ---
# Assuming your file is 'customer_data.csv'
df = pd.read_csv('campaign_dataset_refined.csv')

print(f"‚ú® Generating {len(df.head(3))} Sample Personalized Messages...\n")

for i, row in df.head(3).iterrows():
    result = generate_personalized_msg(row)
    print(f"Row {i} | Channel: {row.get('preferred_channel')}")
    print(f"Message: {result}")
    print("-" * 50)

‚ú® Generating 3 Sample Personalized Messages...

Row 0 | Channel: S
Message: We appreciate your recent interest in our Hair-Oil. As a valued customer, enjoy complimentary shipping on your next order. Discover more premium hair care solutions today.
--------------------------------------------------


ERROR:tornado.access:500 POST /v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint (::1) 97244.54ms


Row 1 | Channel: W
Message: Error: 500 POST https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?%24alt=json%3Benum-encoding%3Dint: TypeError: Failed to fetch
--------------------------------------------------
Row 2 | Channel: E
Message: We trust you're enjoying your recent jeans acquisition. As a valued customer, we'd like to extend a special 2.31% saving on your next fashion exploration. Refine your wardrobe. Shop New Arrivals now.
--------------------------------------------------


In [None]:
import time
import pandas as pd
import re
from pydantic import BaseModel, field_validator, ValidationError

# --- 1. Policy & Validation (DEV-20 & DEV-21) ---
class CampaignMessage(BaseModel):
    target_cluster: str
    message_content: str

    @field_validator('message_content')
    def enforce_policy(cls, value: str):
        # Using the policy we defined earlier
        min_len, max_len = 50, 300
        forbidden_words = ["winner", "lottery", "urgent", "cash"]

        if len(value) < min_len:
            raise ValueError(f"Message too short ({len(value)} chars). Minimum is {min_len}.")
        if len(value) > max_len:
            raise ValueError(f"Message too long ({len(value)} chars). Maximum is {max_len}.")

        pattern = re.compile(r'\b(?:' + '|'.join(map(re.escape, forbidden_words)) + r')\b', re.IGNORECASE)
        found_words = pattern.findall(value)
        if found_words:
            unique_found = list(set([w.lower() for w in found_words]))
            raise ValueError(f"Forbidden words detected: {', '.join(unique_found)}")

        return value

# --- 2. Compliance Engine (DEV-22) ---
def apply_compliance_rules(message: str, channel: str) -> str:
    channel_code = str(channel).strip().upper()
    if channel_code in ["SMS", "S"]:
        return message + "\nReply STOP to opt out."
    elif channel_code in ["EMAIL", "E"]:
        return message + "\n\nTo unsubscribe, visit our preference center."
    elif channel_code in ["WHATSAPP", "W"]:
        return message + "\n\nReply STOP to end WhatsApp messages."
    return message

# --- 3. Hyper-Personalized Generator (DEV-23) ---
def generate_personalized_msg(row, max_retries=3):
    item = row.get('item', 'our collection')
    category = row.get('item_category', 'general')
    discount = round(float(row.get('discount_given', 0)), 1)
    channel = row.get('preferred_channel', 'S')
    cluster = row.get('Cluster', 'Valued Customer')

    prompt = f"""
    You are an AI Marketing Strategist. Write a short, hyper-personalized {channel} campaign message.

    CUSTOMER DATA:
    - Recent Purchase: {item} ({category})
    - Past Discount Received: {discount}%

    TASK:
    1. Acknowledge their recent interest in the {item}.
    2. Offer a similar relevant incentive based on their past {discount}% discount.
    3. Maintain a Persuasive yet Professional tone.
    4. NO forbidden words: winner, lottery, urgent, cash.
    5. Length: 50 to 300 chars.
    6. End with a clear Call to Action (CTA).

    Output ONLY the message text.
    """

    for attempt in range(max_retries):
        try:
            # Assumes 'model' and 'safety_settings' are loaded from Cell 1 & 4
            response = model.generate_content(prompt, safety_settings=safety_settings)
            raw_text = response.text.strip()

            compliant_text = apply_compliance_rules(raw_text, channel)

            validated = CampaignMessage(
                target_cluster=str(cluster),
                message_content=compliant_text
            )
            return validated.message_content

        except Exception as e:
            if attempt < max_retries - 1:
                time.sleep(2)
            else:
                return f"‚ùå Failed after {max_retries} attempts. Error: {e}"

# --- 4. EXECUTION: Random Sampling & Data Verification ---
try:
    # IMPORTANT: Change 'your_file_name.csv' to the exact name of your uploaded file!
    df = pd.read_csv('campaign_dataset_refined.csv')

    print(f"‚ú® Pulling 3 Random Customers for the Autonomous Workspace...\n")
    print("=" * 60)

    for i, row in df.sample(n=3).iterrows():
        print(f"üìç ROW {i} - RAW DATA FOR VERIFICATION:")

        # This loop prints every exact column name and value for this specific customer
        for col_name, val in row.items():
            print(f"   - {col_name}: {val}")

        print("\nü§ñ AUTONOMOUS GENERATION:")
        result = generate_personalized_msg(row)
        print(f"   Message: {result}")
        print("=" * 60)

except FileNotFoundError:
    print("‚ùå CSV not found! Upload it using the Colab folder icon and update the filename in the code.")

‚ú® Pulling 3 Random Customers for the Autonomous Workspace...

üìç ROW 420 - RAW DATA FOR VERIFICATION:
   - customer_id: 3fabe27a-e976-852a-276a-b860bd98b0f1
   - customer_loyalty_status: Enrolled
   - customer_purchase_frequency: 6.442349452
   - item_price: 943.0
   - discount_given: 0.26252406
   - item_category: Personal care
   - no_of_opens: 5
   - no_of_clicks: 5
   - replies: 1
   - preferred_channel: W
   - preferred_time: night
   - email_consent: 1
   - sms_consent: 1
   - whatsapp_consent: 1
   - message_opened: 1
   - message_clicked: 1
   - message_failed: 0
   - delivery_status: 1
   - send_date: 1/18/2026
   - send_time: 19:40
   - send_timestamp: 01-18-2026 19:40
   - item: Hair-Oil
   - quantity : 4
   - payment_method: Card

ü§ñ AUTONOMOUS GENERATION:
   Message: Hope you're loving your new Hair-Oil! To enhance your haircare, enjoy 3% off our nourishing hair masks for a limited time. Elevate your routine now!

Reply STOP to end WhatsApp messages.
üìç ROW 177 - R