# Model Training

In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import xgboost as xgb

In [None]:
# Load data
df = pd.read_csv('../data/creditcard.csv')
X = df.drop('Class', axis=1)
y = df['Class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, max_depth=10, class_weight={0: 1.0, 1: 2.0}, random_state=42)
rf.fit(X_train, y_train)

In [None]:
# Train XGBoost
scale_pos = (y_train == 0).sum() / (y_train == 1).sum()
xgb_model = xgb.XGBClassifier(n_estimators=100, max_depth=10, scale_pos_weight=scale_pos, reg_lambda=0.01, random_state=42)
xgb_model.fit(X_train, y_train)

In [None]:
# Ensemble predictions
rf_proba = rf.predict_proba(X_test)[:, 1]
xgb_proba = xgb_model.predict_proba(X_test)[:, 1]
ensemble_proba = (rf_proba + xgb_proba) / 2