# 📧 Email Marketing Campaign Optimization using Machine Learning
---
### 🎯 Objective:
- Analyze email campaign performance (open and click rates)
- Predict which users are likely to click the link in future campaigns
- Provide actionable insights for optimizing future email campaigns

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

## 📥 Load Datasets

In [None]:
email_df = pd.read_csv('email_table.csv')
opened_df = pd.read_csv('email_opened_table.csv')
clicked_df = pd.read_csv('link_clicked_table.csv')

## 🔗 Merge Data and Add Flags

In [None]:
email_df['opened'] = email_df['email_id'].isin(opened_df['email_id']).astype(int)
email_df['clicked'] = email_df['email_id'].isin(clicked_df['email_id']).astype(int)

## 📊 Performance Metrics

In [None]:
open_rate = email_df['opened'].mean()
click_rate = email_df['clicked'].mean()
print(f"Open Rate: {open_rate:.2%}")
print(f"Click Rate: {click_rate:.2%}")

## 🔍 Exploratory Data Analysis

In [None]:
email_df['purchase_bucket'] = pd.cut(email_df['user_past_purchases'],
                                     bins=[-1, 0, 1, 3, 5, 10, 100],
                                     labels=['0', '1', '2-3', '4-5', '6-10', '10+'])
email_df.groupby('purchase_bucket')['clicked'].mean().plot(kind='bar', title='CTR by Past Purchases')
plt.ylabel('Click Rate')
plt.show()

## 🧪 Train Machine Learning Model

In [None]:
data = pd.get_dummies(email_df, 
                      columns=['email_text', 'email_version', 'weekday', 'user_country'],
                      drop_first=True)
X = data.drop(columns=['email_id', 'clicked', 'opened'])
y = data['clicked']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

## ✅ Model Evaluation

In [None]:
print(classification_report(y_test, y_pred))
print(f"Accuracy Score: {accuracy_score(y_test, y_pred):.2%}")

## 🔍 Feature Importance

In [None]:
importances = model.feature_importances_
features = X.columns
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(12, 6))
plt.title("Top 15 Feature Importances")
plt.bar(range(15), importances[indices][:15])
plt.xticks(range(15), [features[i] for i in indices[:15]], rotation=90)
plt.tight_layout()
plt.show()

## 📈 Simulate Targeted Campaign

In [None]:
X_test_copy = X_test.copy()
X_test_copy['pred_prob'] = model.predict_proba(X_test)[:, 1]
X_test_copy['actual'] = y_test.values
top_30_percent = X_test_copy.sort_values(by='pred_prob', ascending=False).head(int(0.3 * len(X_test_copy)))
simulated_ctr = top_30_percent['actual'].mean()
print(f"Simulated CTR (top 30% users): {simulated_ctr:.2%}")