In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    classification_report
)

from xgboost import XGBClassifier
import joblib


In [None]:
model_data= pd.read_csv("/content/model_data.csv")



In [None]:
X = model_data.drop(columns=["CustomerID", "will_purchase_30d"])
y = model_data["will_purchase_30d"]

X.shape, y.value_counts()



((5000, 11),
 will_purchase_30d
 0    2965
 1    2035
 Name: count, dtype: int64)

In [None]:
categorical_features = [
    "Gender",
    "Location",
    "LoyaltyStatus"
]

numerical_features = [
    "Age",
    "total_transactions",
    "total_revenue",
    "avg_order_value",
    "total_sessions",
    "avg_session_duration",
    "avg_rating",
    "review_count"
]


In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), numerical_features),
        ("cat", OneHotEncoder(handle_unknown="ignore"), categorical_features)
    ]
)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, stratify=y, random_state=42
)


In [None]:
xgb_model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", XGBClassifier(
        n_estimators=100,
        max_depth=3,
        learning_rate=0.1,
        subsample=0.7,
        colsample_bytree=0.7,
        eval_metric="logloss",
        scale_pos_weight = len(y_train[y_train==0]) / len(y_train[y_train==1]),
        random_state=42
    ))
])


In [None]:
xgb_model.fit(X_train, y_train)

y_pred_proba = xgb_model.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_pred_proba)

print("XGBoost ROC-AUC:", roc_auc)


XGBoost ROC-AUC: 0.5957965792522715


In [None]:
joblib.dump(xgb_model, "purchase_model_dynamic.pkl")
print("Model saved as 'purchase_model_dynamic.pkl'")

Model saved as 'purchase_model_dynamic.pkl'


In [None]:
import sklearn
print(sklearn.__version__)


1.6.1
