In [None]:
# Meta advertising data 
# Anonymous organization's social media ad campaign
# Calculates CAC 
# https://www.kaggle.com/code/chrisbow/an-introduction-to-facebook-ad-analysis-using-r
# https://www.kaggle.com/datasets/loveall/clicks-conversion-tracking?resource=download 

# Features & Label
# Gender. Male or female. 
# Interest. A code specifying the category to which the person’s interest belongs (interests are as mentioned in the person’s Facebook public profile).
# Spent. Amount paid by company xyz to Facebook, to show that ad as CPM (cost per 1,000 impressions), CPC (cpst per click), and CPA (cost per approved conversion).
# Impressions. The number of times the ad was shown.
# Clicks.  Number of clicks on for that ad.
# Total conversion. Total number of people who enquired about the product after seeing the ad.
# Approved conversion. Total number of people who bought the product after seeing the ad.

# Requirements
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

In [None]:
df = pd.read_csv("sales_conversion.csv")

In [None]:
# CAC = Total Marketing & Sales Spend / Number of New Customers Acquired 
ad_spend = df['Spent'].sum()
print("Ad spend:", f"${ad_spend:,.2f}")
new_customers = df['Approved_Conversion'].sum()
print("New customers:", f"{new_customers:,}")
cac = ad_spend / new_customers 
print("Customer acquisition costs (CAC):", f"${cac:,.2f}")


In [None]:
# CPL = Total Marketing & Sales Spend / New Leads
# LCR = New Customers / Total Leads 
leads = df['Total_Conversion'].sum()
print("Leads:", f"{leads:,}")
cpl = ad_spend / leads
print("Cost per lead (CPL):", f"${cpl:,.2f}")
lcr = (new_customers / leads) * 100
print("Lead conversion rate:", f"{lcr:,.2f}%")

In [None]:
# CPM = cost per 1,000 ad impressions
cpm = df[(df['Spent'] < 10) & (df['Spent'] > 0)]
cpm = (cpm['Spent'] / cpm['Impressions']) * 1000
print("Average cost per 1,000 impressions (CPM):", f"${cpm.mean():.2f}")

In [None]:
#CTR (%) = clicks / impressions x 100 
'''
Meta ad CTRs range from 0.5% to 2%. 
A low CTR suggests: 
- The ads were note engaging (low-quality creative).
- Targeting was too broad or mismatched.
- Dataset might be synthetic or incorrect.
'''
clicks = df['Clicks'].sum()
print("Clicks:", f"{clicks:,}")
impressions = df['Impressions'].sum()
print("Impressions:", f"{impressions:,}")
ctr = (clicks / impressions ) * 100
print("Click through rate (CTR):", f"{ctr:,.3f}%")

In [None]:
print(df.columns)

In [None]:
X = df.drop(columns = ['ad_id', 'xyz_campaign_id', 'fb_campaign_id', 'age', 'Approved_Conversion'])
y = df['Approved_Conversion']

In [None]:
categorical = ['gender']
numeric = ['interest', 'Spent', 'Impressions', 'Clicks', 'Total_Conversion']

In [None]:
# Preprocessing: OHE for categorical, passthrough for numeric
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(handle_unknown='ignore'), categorical),
        ('num', 'passthrough', numeric)
    ]
)

In [None]:
# Data pipeline
mlr = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [None]:
# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=32)

In [None]:
# Fit
mlr.fit(X_train, y_train)

In [None]:
print("Training accuracy (R²): ", f"{mlr.score(X_train, y_train)*100:,.2f}%")
print("Test accuracy (R²): ", f"{mlr.score(X_test, y_test)*100:,.2f}%")

In [None]:
# Coefficients
print(mlr.named_steps['regressor'].coef_)

In [None]:
# CAC
conversion = pd.DataFrame([{
    'gender': "M",
    'interest': 16,
    'Spent': 150,
    'Impressions': 1000000,
    'Clicks': 95,
    'Total_Conversion': 26
}])

prediction = mlr.predict(conversion)[0]
print("Predicted new customers:", prediction)

In [None]:
# Next steps: 
# Optimze with Lasso, Ridge, SGD Regressor or another ML model