# Model Interpretability & Risk ML Dashboard

This notebook provides interpretability analysis and risk monitoring dashboard:
- Feature importance analysis
- SHAP values for model explanations
- Risk monitoring and alerting
- Dashboard metrics

Essential for Risk ML pipeline and fraud detection systems.


In [None]:
%matplotlib inlineimport sysfrom pathlib import Pathfrom datetime import datetime, timedelta# add project root to pathproject_root = Path().resolve().parentsys.path.insert(0, str(project_root))# create output directory for plotsoutput_dir = project_root / "outputs" / "figures"output_dir.mkdir(parents = True, exist_ok = True)import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsimport warningswarnings.filterwarnings('ignore')# set plotting styletry: plt.style.use('seaborn-v0_8-darkgrid')except OSError: try: plt.style.use('seaborn-darkgrid') except OSError: plt.style.use('default')sns.set_palette("husl")plt.rcParams['figure.figsize'] = (14, 8)plt.rcParams['font.size'] = 10plt.rcParams['axes.labelsize'] = 12plt.rcParams['axes.titlesize'] = 14plt.rcParams['figure.dpi'] = 100plt.rcParams['savefig.dpi'] = 150plt.rcParams['savefig.bbox'] = 'tight'# import project modulesfrom src.data.load_data import load_datafrom src.features.temporal_features import extract_temporal_featuresfrom src.interpretability.explain import(compute_feature_importance, compute_shap_values, explain_prediction, plot_feature_importance)from src.monitoring.risk_dashboard import RiskMonitorfrom sklearn.ensemble import RandomForestClassif ierfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score# import IPython displaytry: from IPython.display import Image, display HAS_IPYTHON = Trueexcept ImportError: HAS_IPYTHON = False

## 1. Load Data and Train Model for Interpretability

Load data, extract features, and train a model for interpretability analysis.


In [None]:
# load datasetdata_path = project_root / "data" / "raw" / "engagement.parquet"df = load_data(data_path)# adapt column namesif 'user_id' in df.columns and 'id' not in df.columns: df['id'] = df['user_id']if 'is_fake_series' in df.columns and 'label' not in df.columns: df['label'] = df['is_fake_series'].map({True: 'fake', False: 'normal'})print(f"Dataset shape: {df.shape}")print(f"Label distribution:")print(df['label'].value_counts())# extract.featuresprint("\nExtracting temporal features...")features_df = extract_temporal_features(df, id_column = "id", timestamp_column = "timestamp", window_sizes = [6, 12, 24], autocorr_lags = [1, 6, 12, 24], aggregate_per_id = True,)print(f"Features extracted: {features_df.shape}")# prepare for modelingfeature_cols = [c for c in features_df.columns if c not in ['id', 'label']]X = features_df[feature_cols].fillna(0)y = features_df['label'].map({'normal': 0, 'fake': 1}).values# train / test splitX_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, stratif y = y)# standardizescaler = StandardScaler()X_train_scaled = scaler.fit_transform(X_train)X_test_scaled = scaler.transform(X_test)# train Random Forest for.interpretabilityprint("\nTraining Random Forest model...")rf_model = RandomForestClassif ier(n_estimators = 100, random_state = 42, n_jobs =  - 1)rf_model.fit(X_train_scaled, y_train)print(f"Training accuracy: {rf_model.score(X_train_scaled, y_train):.4f}")print(f"Test accuracy: {rf_model.score(X_test_scaled, y_test):.4f}")

## 2. Feature Importance Analysis

Identify which features are most important for detecting fake engagement.


In [None]:
# compute feature importanceimportance_dict = compute_feature_importance(rf_model, pd.DataFrame(X_train_scaled, columns = feature_cols), y_train, feature_names = feature_cols, method = "permutation")# plot feature importancefig, ax = plot_feature_importance(importance_dict, top_k = 20, title = "Top 20 Most Important Features for Fake Engagement Detection")plt.savefig(output_dir / "05_feature_importance.png", dpi = 150, bbox_inches = 'tight')plt.show()if HAS_IPYTHON and(output_dir / "05_feature_importance.png").exists(): display(Image(str(output_dir / "05_feature_importance.png")))# print top.featuresprint("\nTop 10 Most Important Features:")top_features = sorted(importance_dict.items(), key = lambda x: x[1], reverse = True)[:10] for idx, (feature, importance) in enumerate(top_features, 1): print(f" {idx}. {feature}: {importance:.4f}")

## 3. Risk Monitoring Dashboard

Monitor alerts, track metrics, and generate risk reports.


In [None]:
# initialize risk monitorrisk_monitor = RiskMonitor(alert_threshold = 0.7, alert_window_hours = 24)# simulate predictions and generate alertsy_proba = rf_model.predict_proba(X_test_scaled)[:, 1]# generate alerts for high - risk predictionsalerts_generated = 0 for idx in range(len(X_test)): score = y_proba[idx] user_id = str(features_df.iloc[X_test.index[idx]]['id']) if.hasattr(X_test, 'index') and X_test.index[idx] < len(features_df) else f"user_{idx}" timestamp = datetime.now() - timedelta(hours = len(X_test) - idx) if risk_monitor.check_alert(user_id, score, timestamp, {"true_label": int(y_test[idx])}): alerts_generated +  = 1print(f"Generated {alerts_generated} alerts from test set")# record metricsrisk_monitor.record_metrics({ "auc": roc_auc_score(y_test, y_proba), "precision": precision_score(y_test, rf_model.predict(X_test_scaled)), "recall": recall_score(y_test, rf_model.predict(X_test_scaled)), "f1": f1_score(y_test, rf_model.predict(X_test_scaled))})# generate risk reportreport = risk_monitor.generate_risk_report()print(report)# plot alert summarysummary = risk_monitor.get_alert_summary(hours = 24)fig, axes = plt.subplots(1, 2, figsize = (14, 6))# alert severity distributionif summary['by_severity']: severities = list(summary['by_severity'].keys()) counts = list(summary['by_severity'].values()) colors = ['darkred' if s == 'CRITICAL' else 'red' if s == 'HIGH' else 'orange' if s == 'MEDIUM' else 'yellow' for s in severities] axes[0].bar(severities, counts, color = colors, alpha = 0.8, edgecolor = 'black', linewidth = 2) axes[0].set_ylabel('Number of Alerts', fontsize = 12, fontweight = 'bold') axes[0].set_title('Alert Distribution by Severity', fontsize = 14, fontweight = 'bold') axes[0].grid(True, alpha = 0.3, axis = 'y')# top alerted usersif summary['top_users']: top_users = summary['top_users'][:10] user_ids = [u['user_id'] for u in top_users] alert_counts = [u['alert_count'] for u in top_users] axes[1].barh(range(len(user_ids)), alert_counts, color = 'red', alpha = 0.7) axes[1].set_yticks(range(len(user_ids))) axes[1].set_yticklabels([f"User {uid}" for uid in user_ids], fontsize = 9) axes[1].set_xlabel('Number of Alerts', fontsize = 12, fontweight = 'bold') axes[1].set_title('Top 10 Alerted Users', fontsize = 14, fontweight = 'bold') axes[1].grid(True, alpha = 0.3, axis = 'x')plt.tight_layout()plt.savefig(output_dir / "05_risk_dashboard.png", dpi = 150, bbox_inches = 'tight')plt.show()if HAS_IPYTHON and(output_dir / "05_risk_dashboard.png").exists(): display(Image(str(output_dir / "05_risk_dashboard.png")))