# Marketing KPI Data Cleaning & Feature Engineering
This notebook performs:
- Data loading
- Cleaning & preprocessing
- KPI calculations (CAC, ROAS, Conversion Rate)
- Region generation
- Anomaly injection
- Export of processed dataset

In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("/mnt/data/marketing_campaign_dataset.csv")
df.head()

In [None]:
# Standardize column names
df.columns = df.columns.str.lower().str.replace(' ', '_')
df.columns

In [None]:
# Convert date column if exists
if 'date' in df.columns:
    df['date'] = pd.to_datetime(df['date'], errors='coerce')
df.info()

In [None]:
# Drop missing values
df = df.dropna()

In [None]:
# Remove invalid rows for safe KPI calculation
if 'conversions' in df.columns:
    df = df[df['conversions'] > 0]
if 'clicks' in df.columns:
    df = df[df['clicks'] > 0]
if 'spend' in df.columns:
    df = df[df['spend'] > 0]

In [None]:
# KPI Calculations
if {'spend','conversions'}.issubset(df.columns):
    df['cac'] = df['spend'] / df['conversions']
if {'revenue','spend'}.issubset(df.columns):
    df['roas'] = df['revenue'] / df['spend']
if {'conversions','clicks'}.issubset(df.columns):
    df['conversion_rate'] = df['conversions'] / df['clicks']
df.head()

In [None]:
# Add synthetic region column
regions = ['North', 'South', 'East', 'West']
df['region'] = np.random.choice(regions, size=len(df))

In [None]:
# Inject 2% anomaly spikes in spend
anomaly_indices = df.sample(frac=0.02, random_state=42).index
df.loc[anomaly_indices, 'spend'] *= 3

In [None]:
# Save processed dataset
df.to_csv('/mnt/data/processed_marketing_data.csv', index=False)
df.shape