In [6]:
import numpy as np
import pandas as pd

# ------------------------------------------------------------
# Synthetic Data Generator Function
# ------------------------------------------------------------
def generate_synthetic_ads(
    n_samples=8000,
    seed=2025,
    age_range=(18, 60),
    cities=None,
    genders=("Male", "Female", "Other"),
    interest_categories=None
):
    np.random.seed(seed)

    if cities is None:
        cities = ["Mumbai", "Delhi", "Bengaluru", "Chennai", "Hyderabad",
                  "Kolkata", "Pune", "Ahmedabad"]

    if interest_categories is None:
        interest_categories = ["Technology", "Fashion", "Finance", "Travel",
                               "Food", "Automobile", "Entertainment", "Healthcare"]

    # ------------------------ #
    # Demographics
    # ------------------------ #
    age = np.random.randint(age_range[0], age_range[1], n_samples)
    gender = np.random.choice(genders, n_samples)
    city = np.random.choice(cities, n_samples)
    interest = np.random.choice(interest_categories, n_samples)

    # ------------------------ #
    # Media & Ad Exposure
    # ------------------------ #
    media_spend = np.random.normal(500000, 100000, n_samples).clip(150000, 800000)
    impressions = np.random.normal(1_000_000, 200_000, n_samples).clip(200000, 2000000)

    # ------------------------ #
    # Engagement Metrics
    # ------------------------ #
    click_through_rate = np.random.uniform(0.2, 5.0, n_samples)
    view_time = np.random.normal(4, 1.2, n_samples).clip(1, 10)   # seconds

    # Creative attributes
    ad_creativity_score = np.random.uniform(1, 10, n_samples)
    brand_familiarity = np.random.uniform(1, 10, n_samples)

    # Derived purchase intent
    purchase_intent = (
        0.35 * ad_creativity_score +
        0.30 * brand_familiarity +
        0.15 * (click_through_rate / 5) +
        np.random.normal(0, 1, n_samples)
    )

    # ------------------------ #
    # Target variable - Ad Performance
    # ------------------------ #
    performance_score = (
        0.4 * purchase_intent +
        0.25 * (view_time / 10) +
        0.25 * (media_spend / 800000) +
        np.random.normal(0, 0.5, n_samples)
    )

    labels = pd.cut(
        performance_score,
        bins=[-np.inf, 2.2, 3.8, np.inf],
        labels=["Low", "Medium", "High"]
    )

    # ------------------------ #
    # Final DataFrame
    # ------------------------ #
    df = pd.DataFrame({
        "age": age,
        "gender": gender,
        "city": city,
        "interest_category": interest,
        "media_spend": media_spend.astype(int),
        "impressions": impressions.astype(int),
        "click_through_rate": click_through_rate,
        "view_time": view_time,
        "ad_creativity_score": ad_creativity_score,
        "brand_familiarity": brand_familiarity,
        "purchase_intent": purchase_intent,
        "ad_performance": labels
    })

    return df

# ------------------------------------------------------------
# Generate dataset
# ------------------------------------------------------------

df = generate_synthetic_ads(n_samples=8000)

print("Sample Rows:")
print(df.head())
print("\nDataset Shape:", df.shape)

# ------------------------------------------------------------
# Save CSV to Colab
# ------------------------------------------------------------
df.to_csv("synthetic_ad_campaign_data.csv", index=False)
print("\n✔ File saved: synthetic_ad_campaign_data.csv")

Sample Rows:
   age  gender       city interest_category  media_spend  impressions  \
0   48    Male  Hyderabad            Travel       365783      1309027   
1   36  Female  Bengaluru           Fashion       654095       523598   
2   48    Male    Chennai              Food       517132       944695   
3   30   Other  Hyderabad              Food       642238      1109337   
4   21   Other  Ahmedabad        Technology       596696      1112572   

   click_through_rate  view_time  ad_creativity_score  brand_familiarity  \
0            1.199488   4.641204             3.411143           7.946261   
1            0.854681   5.905751             5.594464           4.636649   
2            3.964856   5.230709             1.482793           9.944561   
3            4.932087   4.291977             2.780973           4.949981   
4            4.009535   4.500102             8.249004           4.800976   

   purchase_intent ad_performance  
0         3.987898            Low  
1         2.689103 