# ðŸš€ Notebook 03 â€” Proposed Model Development

Implements the proposed enhancements beyond the original paper:
1. **CNN-BiGRU** hybrid deep-learning model
2. **BERT-based** classifier (DistilBERT on tabularâ†’text)
3. **Stacking Ensemble** (RF + XGB + CNN-BiGRU â†’ Logistic Regression)
4. **Hyperparameter Tuning** for RF & XGB
5. **Explainability** (SHAP, LIME, Feature Importance)
6. **2FA / MFA Simulation** framework

---

In [None]:
import sys, os, warnings
sys.path.insert(0, os.path.abspath('..'))
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import joblib

from src.utils.config import (
    RANDOM_SEED, MODELS_DIR, FIGURES_DIR, N_SPLITS,
    DS_EUROPEAN, DS_SPARKOV,
)
from src.utils.metrics import evaluate_model, results_to_dataframe
from src.data.preprocessing import load_processed
from src.data.balancing_strategies import get_balanced_datasets

np.random.seed(RANDOM_SEED)
%matplotlib inline
print('Setup complete.')

In [None]:
# Load processed data (use European dataset as primary)
eu_data = load_processed(DS_EUROPEAN)
balanced = get_balanced_datasets(eu_data['X_train'], eu_data['y_train'])
X_smote, y_smote = balanced['smote']
X_val, y_val = eu_data['X_val'], eu_data['y_val']
X_test, y_test = eu_data['X_test'], eu_data['y_test']
print(f'SMOTE training: {len(X_smote):,} samples')
print(f'Validation:     {len(X_val):,} samples')
print(f'Test:           {len(X_test):,} samples')

## 1. CNN-BiGRU Model

In [None]:
from src.models.deep_learning_models import (
    build_cnn_bigru, compile_cnn_bigru, train_cnn_bigru, reshape_for_cnn
)
from src.visualization.plot_utils import plot_training_history

# Reshape for Conv1D
X_tr_3d = reshape_for_cnn(X_smote)
X_val_3d = reshape_for_cnn(X_val)
X_te_3d = reshape_for_cnn(X_test)

print(f'Input shape: {X_tr_3d.shape[1:]}')

cnn_model = build_cnn_bigru(input_shape=(X_tr_3d.shape[1], 1))
cnn_model = compile_cnn_bigru(cnn_model)
cnn_model.summary()

In [None]:
history = train_cnn_bigru(cnn_model, X_tr_3d, y_smote, X_val_3d, y_val)
plot_training_history(history, model_name='CNN-BiGRU')
plt.show()

In [None]:
# Evaluate
y_prob_cnn = cnn_model.predict(X_te_3d).flatten()
y_pred_cnn = (y_prob_cnn >= 0.5).astype(int)
cnn_metrics = evaluate_model(y_test, y_pred_cnn, y_prob_cnn)
print(f"CNN-BiGRU â€” F1: {cnn_metrics['f1']:.4f}  AUC: {cnn_metrics['roc_auc']:.4f}")

cnn_model.save(str(MODELS_DIR / 'european_smote_cnn_bigru.h5'))

## 2. BERT-based Model

In [None]:
from src.models.deep_learning_models import BertFraudClassifier

# Sub-sample for feasibility
N_BERT = 3000
idx = np.random.choice(len(X_smote), N_BERT, replace=False)
X_bert_tr = X_smote.iloc[idx] if hasattr(X_smote, 'iloc') else X_smote[idx]
y_bert_tr = y_smote.iloc[idx] if hasattr(y_smote, 'iloc') else y_smote[idx]

X_bert_te = X_test.iloc[:1000] if hasattr(X_test, 'iloc') else X_test[:1000]
y_bert_te = y_test.iloc[:1000] if hasattr(y_test, 'iloc') else y_test[:1000]

bert_clf = BertFraudClassifier()
bert_clf.prepare_data(X_bert_tr, y_bert_tr)
bert_clf.train()

In [None]:
y_prob_bert = bert_clf.predict_proba(X_bert_te)
y_pred_bert = (y_prob_bert >= 0.5).astype(int)
bert_metrics = evaluate_model(y_bert_te, y_pred_bert, y_prob_bert)
print(f"BERT â€” F1: {bert_metrics['f1']:.4f}  AUC: {bert_metrics['roc_auc']:.4f}")

## 3. Stacking Ensemble

In [None]:
from src.models.ensemble_models import get_stacking_ensemble

stacking = get_stacking_ensemble()
stacking.fit(X_smote, y_smote)

y_pred_stack = stacking.predict(X_test)
y_prob_stack = stacking.predict_proba(X_test)[:, 1]
stack_metrics = evaluate_model(y_test, y_pred_stack, y_prob_stack)
print(f"Stacking â€” F1: {stack_metrics['f1']:.4f}  AUC: {stack_metrics['roc_auc']:.4f}")

joblib.dump(stacking, MODELS_DIR / 'european_smote_stacking.joblib')

## 4. Hyperparameter Tuning

In [None]:
from src.models.ensemble_models import tune_random_forest, tune_xgboost

best_rf, rf_search = tune_random_forest(X_smote, y_smote, n_iter=30)
joblib.dump(best_rf, MODELS_DIR / 'tuned_rf.joblib')

y_pred_trf = best_rf.predict(X_test)
y_prob_trf = best_rf.predict_proba(X_test)[:, 1]
trf_metrics = evaluate_model(y_test, y_pred_trf, y_prob_trf)
print(f"Tuned RF â€” F1: {trf_metrics['f1']:.4f}  AUC: {trf_metrics['roc_auc']:.4f}")

In [None]:
best_xgb, xgb_search = tune_xgboost(X_smote, y_smote, n_iter=30)
joblib.dump(best_xgb, MODELS_DIR / 'tuned_xgb.joblib')

y_pred_txgb = best_xgb.predict(X_test)
y_prob_txgb = best_xgb.predict_proba(X_test)[:, 1]
txgb_metrics = evaluate_model(y_test, y_pred_txgb, y_prob_txgb)
print(f"Tuned XGB â€” F1: {txgb_metrics['f1']:.4f}  AUC: {txgb_metrics['roc_auc']:.4f}")

## 5. Explainability (SHAP & LIME)

In [None]:
from src.models.explainability import shap_analysis, shap_force_plot, lime_analysis, feature_importance_report

# SHAP on tuned XGB
shap_vals = shap_analysis(best_xgb, X_test, model_type='tree')
plt.show()

In [None]:
# SHAP force plot for a fraudulent instance
fraud_indices = y_test[y_test == 1].index.tolist()
if fraud_indices:
    # Find position in X_test
    fraud_pos = X_test.index.get_loc(fraud_indices[0]) if hasattr(X_test, 'index') else 0
    shap_force_plot(best_xgb, X_test, instance_idx=fraud_pos)
    plt.show()

In [None]:
# LIME explanation
exp = lime_analysis(best_xgb, X_smote, X_test, instance_idx=0)
plt.show()

In [None]:
# Feature importance
fi_report = feature_importance_report(best_xgb, X_test, y_test)
fi_report.head(15)

## 6. 2FA / MFA Simulation

In [None]:
def simulate_2fa(risk_scores, threshold_2fa=0.5, threshold_mfa=0.8):
    """
    Multi-layer authentication simulation.
    Layer 1: Standard auth (risk < threshold_2fa)
    Layer 2: 2FA â€” SMS/Email (threshold_2fa <= risk < threshold_mfa)
    Layer 3: MFA â€” Biometric (risk >= threshold_mfa)
    """
    layers = np.where(
        risk_scores < threshold_2fa, 'Standard',
        np.where(risk_scores < threshold_mfa, '2FA', 'MFA')
    )
    return layers

# Use model predictions as risk scores
risk = y_prob_txgb  # tuned XGB probabilities
auth_layers = simulate_2fa(risk)

sim_df = pd.DataFrame({
    'risk_score': risk,
    'true_label': y_test.values,
    'auth_layer': auth_layers,
})

print('=== Authentication Layer Distribution ===')
print(sim_df['auth_layer'].value_counts())

print('\n=== Fraud caught per layer ===')
caught = sim_df[sim_df['true_label'] == 1].groupby('auth_layer').size()
print(caught)

In [None]:
# Visualize
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

sim_df['auth_layer'].value_counts().plot(kind='bar', ax=axes[0],
    color=['#55A868', '#F0AD4E', '#C44E52'], edgecolor='black')
axes[0].set_title('Transactions per Auth Layer')

for layer, color in zip(['Standard', '2FA', 'MFA'],
                        ['#55A868', '#F0AD4E', '#C44E52']):
    subset = sim_df[sim_df['auth_layer'] == layer]['risk_score']
    axes[1].hist(subset, bins=30, alpha=0.5, color=color, label=layer)
axes[1].set_title('Risk Score Distribution by Auth Layer')
axes[1].legend()

plt.tight_layout()
plt.savefig(FIGURES_DIR / '2fa_simulation.png', dpi=150)
plt.show()

## 7. Feature Engineering (Proposed)

Demonstrates new features that could improve detection when raw transaction data is available.

In [None]:
def engineer_features(df):
    """Create new features from raw transaction data."""
    result = df.copy()
    
    # Amount deviation from mean
    if 'amt' in result.columns or 'Amount' in result.columns:
        amt_col = 'amt' if 'amt' in result.columns else 'Amount'
        result['amount_deviation'] = (
            result[amt_col] - result[amt_col].mean()
        ) / (result[amt_col].std() + 1e-8)
    
    # Time anomaly (transactions outside 9am-9pm)
    if 'trans_hour' in result.columns:
        result['time_anomaly'] = (
            (result['trans_hour'] < 6) | (result['trans_hour'] > 22)
        ).astype(int)
    
    return result

print('Feature engineering functions defined.')
print('These would be applied to raw data before preprocessing.')

---
*Proceed to Notebook 04 for comprehensive model comparison and statistical analysis.*