# Import Necessary Libraries


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


# Loading  data

In [3]:
email = pd.read_csv("data/email_table.csv")
opened = pd.read_csv("data/email_opened_table.csv")
clicked = pd.read_csv("data/link_clicked_table.csv")


# Open and Click Rates

In [4]:
total_emails = len(email)
open_rate = len(opened) / total_emails * 100
click_rate = len(clicked) / total_emails * 100
print(f" Open Rate: {open_rate:.2f}%")
print(f" Click Rate: {click_rate:.2f}%")

 Open Rate: 10.35%
 Click Rate: 2.12%


# Label emails as opened or clicked

In [5]:
email["opened"] = email["email_id"].isin(opened["email_id"]).astype(int)
email["clicked"] = email["email_id"].isin(clicked["email_id"]).astype(int)

# One-hot encode categorical features

In [6]:
df = pd.get_dummies(email, columns=["email_text", "email_version", "weekday", "user_country"])


# Train-test split

In [7]:
X = df.drop(columns=["email_id", "clicked"])
y = df["clicked"]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

# Train model

In [8]:
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)


In [9]:
print("\n📊 Model Evaluation:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))



📊 Model Evaluation:
Accuracy: 0.97156
              precision    recall  f1-score   support

           0       0.98      0.99      0.99     24443
           1       0.28      0.17      0.21       557

    accuracy                           0.97     25000
   macro avg       0.63      0.58      0.60     25000
weighted avg       0.97      0.97      0.97     25000



In [12]:
print("\n Click Rates by Email Text Type:")
print(email.groupby("email_text")["clicked"].mean())

print("\n Click Rates by Country:")
print(email.groupby("user_country")["clicked"].mean().sort_values(ascending=False))


 Click Rates by Email Text Type:
email_text
long_email     0.018538
short_email    0.023872
Name: clicked, dtype: float64

 Click Rates by Country:
user_country
UK    0.024675
US    0.024360
ES    0.008327
FR    0.008004
Name: clicked, dtype: float64
