In [None]:
import pandas as pd

# Load the final preprocessed dataset
df = pd.read_csv("../data/facebook_ads_final_preprocessed.csv")

# Create CTR category labels (Low/Medium/High)
df['CTR_Category'] = pd.qcut(df['CTR'], q=3, labels=['Low', 'Medium', 'High'])

# Drop unusable column
if 'reporting_start' in df.columns:
    df = df.drop(columns=['reporting_start'])

In [None]:
# Drop target columns from features
features_df = df.drop(columns=['CTR', 'CTR_Category'])

# Keep only numeric columns
features_df = features_df.select_dtypes(include='number')

# Drop rows with NaNs
features_df = features_df.dropna()

# Final features and labels
X = features_df
y = df.loc[X.index, 'CTR_Category']

print("✅ NaNs in X:", X.isna().sum().sum())
print("✅ NaNs in y:", y.isna().sum())

✅ NaNs in X: 0
✅ NaNs in y: 0


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report

clf = LogisticRegression(max_iter=1000)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print("📊 Logistic Regression Report:")
print(classification_report(y_test, y_pred))

📊 Logistic Regression Report:
              precision    recall  f1-score   support

        High       0.00      0.00      0.00         1
         Low       0.65      0.47      0.55        76
      Medium       0.59      0.76      0.67        76

    accuracy                           0.61       153
   macro avg       0.42      0.41      0.41       153
weighted avg       0.62      0.61      0.60       153



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

# Predict
y_pred_rf = rf.predict(X_test)

# Evaluate
print("📊 Random Forest Classification Report:")
print(classification_report(y_test, y_pred_rf))


📊 Random Forest Classification Report:
              precision    recall  f1-score   support

        High       0.00      0.00      0.00         1
         Low       0.94      0.87      0.90        76
      Medium       0.87      0.95      0.91        76

    accuracy                           0.90       153
   macro avg       0.60      0.61      0.60       153
weighted avg       0.90      0.90      0.90       153



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
