In [2]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib

seed = 42

campaigns = pd.read_csv("campaigns.csv")
users = pd.read_csv("users.csv")
ads = pd.read_csv("ads.csv")
ad_events = pd.read_csv("ad_events.csv")

df = pd.merge(ad_events, ads, on="ad_id", how="left")
df = pd.merge(df, users, on="user_id", how="left")
df = pd.merge(df, campaigns, on="campaign_id", how="left")

if "user_gender" in df.columns and "target_gender" in df.columns:
    df["gender_match"] = (
        (df["user_gender"] == df["target_gender"]) |
        (df["target_gender"].astype(str).str.lower() == "all")
    ).astype(int)
else:
    df["gender_match"] = 0

if "age_group" in df.columns and "target_age_group" in df.columns:
    df["age_match"] = (
        (df["age_group"] == df["target_age_group"]) |
        (df["target_age_group"].astype(str).str.lower() == "all")
    ).astype(int)
else:
    df["age_match"] = 0

if "timestamp" in df.columns:
    df["hour"] = pd.to_datetime(df["timestamp"], errors="coerce").dt.hour.fillna(0).astype(int)
else:
    df["hour"] = 0

df["hour_bucket"] = pd.cut(
    df["hour"],
    bins=[0, 6, 12, 18, 24],
    labels=["Night", "Morning", "Afternoon", "Evening"],
    include_lowest=True
)

if "event_type" in df.columns:
    df["interaction_score"] = df["event_type"].map({
        "Impression": 1,
        "View": 2,
        "Like": 3,
        "Click": 4
    }).fillna(0).astype(int)
else:
    df["interaction_score"] = 0

desired_features = [
    "user_gender",
    "age_group",
    "ad_platform",
    "ad_type",
    "hour_bucket",
    "gender_match",
    "age_match",
    "interaction_score"
]

existing_features = [c for c in desired_features if c in df.columns]

X = df[existing_features].copy()
y = df["event_type"] if "event_type" in df.columns else df.iloc[:, -1]

X = pd.get_dummies(X, dtype=int)

feature_cols = X.columns.tolist()
joblib.dump(feature_cols, "svm_model.pkl")

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=seed, stratify=y
)

clf = SVC(kernel="rbf", random_state=seed)
clf.fit(X_train, y_train)

pred = clf.predict(X_test)
acc = accuracy_score(y_test, pred)
print("Accuracy:", acc)

joblib.dump(clf, "svm_model.sav")
print("Model saved as svm_model.sav")
print("Feature list saved as svm_model.pkl")


Accuracy: 0.990032263121849
Model saved as svm_model.sav
Feature list saved as svm_model.pkl
