In [3]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# CONFIGURATION
NUM_CAMPAIGNS = 50   # We are scaling up to 50 campaigns
DAYS_HISTORY = 365   # 1 full year of data
FILENAME = 'big_google_ads_data.csv'

# Setup dates
end_date = datetime.now()
start_date = end_date - timedelta(days=DAYS_HISTORY)
date_range = pd.date_range(start=start_date, end=end_date)

data = []

# Define Campaign Personalities
types = ['Star', 'Zombie', 'Zero_Hope', 'Decayer', 'Average']
weights = [0.1, 0.3, 0.2, 0.1, 0.3] # 30% Zombies, 20% Zero Hope (Lots of errors to find)

print(f"Generating {DAYS_HISTORY} days of data for {NUM_CAMPAIGNS} campaigns...")

# Generate Campaign Names & Types
campaign_registry = []
for i in range(1, NUM_CAMPAIGNS + 1):
    c_type = np.random.choice(types, p=weights)
    c_name = f"Campaign_{i:03d}_{c_type.upper()}" # e.g., Campaign_001_ZOMBIE
    campaign_registry.append({'name': c_name, 'type': c_type})

# Generate Daily Data
for date in date_range:
    for camp in campaign_registry:
        c_name = camp['name']
        c_type = camp['type']
        
        # Volatility
        daily_noise = np.random.uniform(0.7, 1.3)
        
        # Default Values
        spend = 0
        clicks = 0
        conversions = 0
        revenue = 0
        
        # Logic per Type
        if c_type == 'Star':
            spend = 100 * daily_noise
            cpc = 1.0
            cvr = 0.20
            roas_target = 4.0
            
        elif c_type == 'Zombie':
            # High spend, ROAS < 1
            spend = 150 * daily_noise
            cpc = 3.0
            cvr = 0.05
            roas_target = 0.5 # Losing money
            
        elif c_type == 'Zero_Hope':
            # High spend, NO conversions
            spend = 80 * daily_noise
            cpc = 5.0
            cvr = 0.0
            roas_target = 0
            
        elif c_type == 'Average':
            spend = 50 * daily_noise
            cpc = 2.0
            cvr = 0.10
            roas_target = 1.5
            
        elif c_type == 'Decayer':
            # Gets worse over time
            days_passed = (date - start_date).days
            decay = max(0.1, 1 - (days_passed / 300))
            spend = 100 * daily_noise
            cpc = 2.0
            cvr = 0.10 * decay
            roas_target = 2.0 * decay

        # Calculate Metrics
        clicks = int(spend / cpc) if cpc > 0 else 0
        impressions = int(clicks * np.random.uniform(10, 20))
        conversions = int(clicks * cvr)
        
        # Calculate Revenue (derived from ROAS target to be consistent)
        if c_type == 'Zero_Hope':
            revenue = 0
        else:
            revenue = spend * roas_target * np.random.uniform(0.9, 1.1)

        data.append({
            'Day': date.strftime('%Y-%m-%d'),
            'Campaign': c_name,
            'Cost': round(spend, 2),
            'Impressions': impressions,
            'Clicks': clicks,
            'Conversions': conversions,
            'Total conv. value': round(revenue, 2)
        })

# Export
df = pd.DataFrame(data)
df.to_csv(FILENAME, index=False)
print(f"✓ DONE. Generated {len(df)} rows. Saved to {FILENAME}")

Generating 365 days of data for 50 campaigns...
✓ DONE. Generated 18300 rows. Saved to big_google_ads_data.csv
