# Model Training, Evaluation and Comparison

This notebook trains and compares baseline models (Logistic Regression, Random Forest, Isolation Forest, LOF) with sequential deep learning models (LSTM, TCN, Autoencoder) for fake engagement detection.


In [None]:
%matplotlib inlineimport sysfrom pathlib import Path# add project root to pathproject_root = Path().resolve().parentsys.path.insert(0, str(project_root))# create output directory for plotsoutput_dir = project_root / "outputs" / "figures"output_dir.mkdir(parents=True, exist_ok=True)import pandas as pdimport numpy as npimport matplotlib.pyplot as pltimport seaborn as snsimport torchimport torch.nn as nnfrom torch.utils.data import DataLoaderimport warningswarnings.filterwarnings('ignore')# set plotting styletry:    plt.style.use('seaborn-v0_8-darkgrid')except OSError:    try:        plt.style.use('seaborn-darkgrid')    except OSError:        plt.style.use('default')sns.set_palette("husl")plt.rcParams['figure.figsize'] = (14, 8)plt.rcParams['font.size'] = 10plt.rcParams['axes.labelsize'] = 12plt.rcParams['axes.titlesize'] = 14plt.rcParams['figure.dpi'] = 100plt.rcParams['savefig.dpi'] = 150plt.rcParams['savefig.bbox'] = 'tight'# import project modulesfrom src.data.load_data import load_datafrom src.data.sequence_preparation import prepare_sequences_for_trainingfrom src.data.dataset import create_dataloaders_from_dictfrom src.features.temporal_features import extract_temporal_featuresfrom src.training.train import (    train_multiple_baselines,    train_model_from_config,)from src.training.evaluate import (    compare_models,    compare_all_models,    evaluate_sequential_model,    compute_metrics,    plot_roc_curve,)from src.utils.config import load_config, update_config_with_datafrom src.visualization.plots import plot_reconstruction# import IPython display for showing saved imagestry:    from IPython.display import Image, display    HAS_IPYTHON = Trueexcept ImportError:    HAS_IPYTHON = False# set random seedstorch.manual_seed(42)np.random.seed(42)

## 1. Load Configuration and Data

Load configuration and prepare data for both baseline and sequential models.


In [None]:
# load configurationconfig = load_config()print("Configuration loaded successfully")# load datasetdata_path = project_root / "data" / "raw" / "engagement.parquet"df = load_data(data_path)# adapt column names if neededif 'user_id' in df.columns and 'id' not in df.columns:    df['id'] = df['user_id']if 'is_fake_series' in df.columns and 'label' not in df.columns:    df['label'] = df['is_fake_series'].map({True: 'fake', False: 'normal'})print(f"\nTime series data shape: {df.shape}")print(f"Number of users: {df['id'].nunique()}")if 'is_fake_series' in df.columns:    print(f"Fake series distribution:")    print(df['is_fake_series'].value_counts())if 'label' in df.columns:    print(f"Label distribution:")    print(df['label'].value_counts())

## 2. Training Data Overview

Visualize the training data distribution and characteristics.


In [None]:
# visualize training data distributionfig, axes = plt.subplots(2, 2, figsize=(16, 12))# class distributionax = axes[0, 0]label_counts = features_df['label'].value_counts()colors = ['blue', 'red']bars = ax.bar(label_counts.index, label_counts.values, color=colors, alpha=0.7, edgecolor='black', linewidth=1.5)ax.set_xlabel('Label', fontsize=12)ax.set_ylabel('Count', fontsize=12)ax.set_title('Class Distribution in Training Data', fontsize=14, fontweight='bold')ax.grid(True, alpha=0.3, axis='y')for i, (label, count) in enumerate(label_counts.items()):    ax.text(i, count, str(count), ha='center', va='bottom', fontsize=12, fontweight='bold')# feature distribution heatmap (sample)ax = axes[0, 1]sample_features = feature_cols[:20] if len(feature_cols) > 20 else feature_colssample_data = features_df[sample_features + ['label']].groupby('label')[sample_features].mean().Tsns.heatmap(sample_data, annot=False, fmt='.1f', cmap='viridis', ax=ax, cbar_kws={'label': 'Mean Value'})ax.set_title('Feature Mean Values by Label (Sample)', fontsize=14, fontweight='bold')ax.set_xlabel('Label', fontsize=12)ax.set_ylabel('Feature', fontsize=10)# sequence length distributionax = axes[1, 0]if 'sequences_dict' in locals() and sequences_dict:    seq_lengths = [len(seq) for seq in sequences_dict.get('X', [])]    ax.hist(seq_lengths, bins=20, color='green', alpha=0.7, edgecolor='black', linewidth=1.5)    ax.set_xlabel('Sequence Length', fontsize=12)    ax.set_ylabel('Frequency', fontsize=12)    ax.set_title('Sequence Length Distribution', fontsize=14, fontweight='bold')    ax.grid(True, alpha=0.3, axis='y')else:    ax.text(0.5, 0.5, 'Sequences not prepared yet', ha='center', va='center', transform=ax.transAxes, fontsize=12)    ax.set_title('Sequence Length Distribution', fontsize=14, fontweight='bold')# label distribution in sequencesax = axes[1, 1]if 'sequences_dict' in locals() and sequences_dict and 'y' in sequences_dict:    label_counts_seq = pd.Series(sequences_dict['y']).value_counts()    colors_seq = ['blue', 'red']    bars = ax.bar(['Normal', 'Fake'], [label_counts_seq.get(0, 0), label_counts_seq.get(1, 0)],                   color=colors_seq, alpha=0.7, edgecolor='black', linewidth=1.5)    ax.set_xlabel('Label', fontsize=12)    ax.set_ylabel('Count', fontsize=12)    ax.set_title('Class Distribution in Sequences', fontsize=14, fontweight='bold')    ax.grid(True, alpha=0.3, axis='y')    for i, count in enumerate([label_counts_seq.get(0, 0), label_counts_seq.get(1, 0)]):        ax.text(i, count, str(count), ha='center', va='bottom', fontsize=12, fontweight='bold')else:    ax.text(0.5, 0.5, 'Sequences not prepared yet', ha='center', va='center', transform=ax.transAxes, fontsize=12)    ax.set_title('Class Distribution in Sequences', fontsize=14, fontweight='bold')plt.tight_layout()plt.savefig(output_dir / "03_modeling_01_plot.png", dpi=150, bbox_inches='tight')plt.show()if HAS_IPYTHON and (output_dir / "03_modeling_01_plot.png").exists():    display(Image(str(output_dir / "03_modeling_01_plot.png")))

## 2. Train Baseline Models

Train baseline models on temporal features.


In [None]:
# extract temporal featuresprint("Extracting temporal features...")features_df = extract_temporal_features(    df,    id_column="id",    timestamp_column="timestamp",    window_sizes=[6, 12, 24],    autocorr_lags=[1, 6, 12, 24],    aggregate_per_id=True,)print(f"Features extracted: {features_df.shape}")# train baseline modelsbaseline_model_types = ['logistic_regression', 'random_forest', 'isolation_forest', 'lof']baseline_results = train_multiple_baselines(    features_df,    model_types=baseline_model_types,    test_size=0.2,    random_state=42,    save_dir=str(project_root / "models" / "baselines"),)print(f"\nBaseline models trained: {len(baseline_results)}")

## 3. Prepare Sequences for Sequential Models

Prepare time series sequences for LSTM, TCN, and Autoencoder.


In [None]:
# prepare sequencesdata_config = config.get("data", {})seq_len = data_config.get("seq_len", 48)print("Preparing sequences for sequential models...")sequence_data = prepare_sequences_for_training(    df,    seq_len=seq_len,    stride=data_config.get("stride", 1),    normalize=data_config.get("normalize", True),    normalization_method=data_config.get("normalization_method", "standardize"),    normalize_per_series=data_config.get("normalize_per_series", False),    test_size=data_config.get("test_size", 0.2),    val_size=data_config.get("val_size", 0.1),    random_state=config.get("training", {}).get("random_seed", 42),)print(f"Sequences prepared:")print(f"  Train: {sequence_data['X_train'].shape}")print(f"  Val: {sequence_data['X_val'].shape}")print(f"  Test: {sequence_data['X_test'].shape}")# update config with data dimensionsinput_size = len(sequence_data['feature_names'])config = update_config_with_data(config, input_size=input_size, seq_len=seq_len)# create dataloadersdataloaders = create_dataloaders_from_dict(    sequence_data,    batch_size=data_config.get("batch_size", 32),    shuffle_train=True,    num_workers=data_config.get("num_workers", 0),    pin_memory=data_config.get("pin_memory", False),)print(f"\nDataLoaders created: {list(dataloaders.keys())}")

## 4. Train Sequential Models

Train LSTM, TCN, and Autoencoder models with early stopping.


In [None]:
# setup devicedevice = torch.device("cuda" if torch.cuda.is_available() else "cpu")print(f"Using device: {device}")# train sequential modelssequential_models = ['lstm', 'tcn', 'autoencoder']sequential_results = {}training_histories = {}for model_type in sequential_models:    print(f"\n{'='*60}")    print(f"Training {model_type.upper()}")    print(f"{'='*60}")        model, history = train_model_from_config(        model_type=model_type,        dataloaders=dataloaders,        config=config,        device=device,        save_dir=str(project_root / "models" / "sequential"),    )        sequential_results[model_type] = (        model,        dataloaders['test'],        device,        model_type    )    training_histories[model_type] = history        print(f"{model_type.upper()} training completed")print(f"\nSequential models trained: {len(sequential_results)}")

## 5. Training Curves Visualization

Visualize training and validation curves for sequential models.


In [None]:
# plot training curvesfig, axes = plt.subplots(2, 2, figsize=(16, 12))for idx, (model_type, history) in enumerate(training_histories.items()):    row = idx // 2    col = idx % 2        ax = axes[row, col]        epochs = range(1, len(history['train_loss']) + 1)    ax.plot(epochs, history['train_loss'], label='Train Loss', linewidth=2)    ax.plot(epochs, history['val_loss'], label='Val Loss', linewidth=2)        if history['train_accuracy'] and any(history['train_accuracy']):        ax2 = ax.twinx()        ax2.plot(epochs, history['train_accuracy'], label='Train Acc',                 linewidth=2, linestyle='--', color='green')        ax2.plot(epochs, history['val_accuracy'], label='Val Acc',                 linewidth=2, linestyle='--', color='orange')        ax2.set_ylabel('Accuracy', fontsize=10)        ax2.legend(loc='upper right')        ax.set_xlabel('Epoch', fontsize=12)    ax.set_ylabel('Loss', fontsize=12)    ax.set_title(f'{model_type.upper()} Training Curves', fontsize=14, fontweight='bold')    ax.legend(loc='upper left')    ax.grid(True, alpha=0.3)# hide unused subplotif len(training_histories) < 4:    axes[1, 1].axis('off')plt.tight_layout()plt.savefig(output_dir / "03_modeling_02_plot.png", dpi=150, bbox_inches='tight')plt.show()if HAS_IPYTHON and (output_dir / "03_modeling_02_plot.png").exists():    display(Image(str(output_dir / "03_modeling_02_plot.png")))

## 6. Evaluate Sequential Models

Evaluate sequential models on test set.


## 6.1. Autoencoder Reconstruction Visualization

Visualize original vs reconstructed series for the autoencoder model.


In [None]:
# spectacular autoencoder reconstruction with red error zonesif 'autoencoder' in sequential_results:    ae_model, ae_loader, ae_device, _ = sequential_results['autoencoder']    ae_model.eval()        # get sample sequences    sample_batch = next(iter(ae_loader))    X_sample, y_sample, _ = sample_batch    X_sample = X_sample.to(ae_device)        with torch.no_grad():        reconstructed = ae_model(X_sample)        # convert to numpy    original_np = X_sample.cpu().numpy()    reconstructed_np = reconstructed.cpu().numpy()        # plot spectacular reconstructions with red error zones    n_examples = min(3, len(X_sample))        for idx in range(n_examples):        original_seq = original_np[idx, :, 0]  # first feature (views)        reconstructed_seq = reconstructed_np[idx, :, 0]        is_fake = y_sample[idx].item()                # compute reconstruction error        error = np.abs(original_seq - reconstructed_seq)        error_threshold = np.percentile(error, 90)        anomaly_mask = error > error_threshold                # use enhanced plot_reconstruction with red zones        fig, axes = plot_reconstruction(            original_seq,            reconstructed_seq,            anomaly_mask,            title=f"Autoencoder Reconstruction - Sample {idx+1} ({'FAKE' if is_fake else 'NORMAL'}) - Red Zones = High Error",            error_threshold=error_threshold,            show_error_zones=True        )                plt.savefig(output_dir / f"03_spectacular_ae_reconstruction_{idx+1}.png", dpi=150, bbox_inches='tight')        plt.show()        if HAS_IPYTHON and (output_dir / f"03_spectacular_ae_reconstruction_{idx+1}.png").exists():            display(Image(str(output_dir / f"03_spectacular_ae_reconstruction_{idx+1}.png")))        plt.close(fig)        # print statistics    mse = np.mean((original_np - reconstructed_np) ** 2)    print(f"\nAutoencoder Reconstruction Statistics:")    print(f"  Mean Squared Error: {mse:.4f}")    print(f"  Mean Absolute Error: {np.mean(np.abs(original_np - reconstructed_np)):.4f}")else:    print("Autoencoder model not available. Train it first.")

In [None]:
# evaluate sequential modelssequential_metrics = {}for model_type, (model, test_loader, device, _) in sequential_results.items():    print(f"\nEvaluating {model_type.upper()}...")    y_true, y_pred, y_proba = evaluate_sequential_model(model, test_loader, device, model_type)    metrics = compute_metrics(y_true, y_pred, y_proba)    sequential_metrics[model_type] = metrics        print(f"  AUC: {metrics['auc']:.4f}")    print(f"  Precision: {metrics['precision']:.4f}")    print(f"  Recall: {metrics['recall']:.4f}")    print(f"  F1: {metrics['f1']:.4f}")sequential_metrics_df = pd.DataFrame(sequential_metrics).Tprint("\nSequential Models Metrics:")print(sequential_metrics_df[['auc', 'precision', 'recall', 'f1', 'false_positive_rate']])

## 7. LSTM vs TCN Comparison

Compare LSTM and TCN models performance and training characteristics.


## 9. Model Performance Heatmap

Create a comprehensive heatmap comparing all baseline models.


In [None]:
# create baseline models performance heatmapif len(baseline_results) > 0:    baseline_metrics_dict = {}    for model_name, (model, X_test, y_test, y_pred, y_proba) in baseline_results.items():        metrics = compute_metrics(y_test, y_pred, y_proba)        baseline_metrics_dict[model_name] = metrics        baseline_metrics_df = pd.DataFrame(baseline_metrics_dict).T    metrics_to_plot = ['auc', 'precision', 'recall', 'f1', 'false_positive_rate']    metrics_to_plot = [m for m in metrics_to_plot if m in baseline_metrics_df.columns]        fig, ax = plt.subplots(1, 1, figsize=(max(8, len(baseline_results) * 1.5), 6))    sns.heatmap(baseline_metrics_df[metrics_to_plot].T, annot=True, fmt='.3f',                 cmap='YlOrRd', cbar_kws={'label': 'Score'}, ax=ax, linewidths=0.5)    ax.set_title('Baseline Models Performance Heatmap', fontsize=14, fontweight='bold')    ax.set_xlabel('Model', fontsize=12)    ax.set_ylabel('Metric', fontsize=12)    ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha='right')    plt.tight_layout()plt.savefig(output_dir / "03_modeling_03_plot.png", dpi=150, bbox_inches='tight')plt.show()if HAS_IPYTHON and (output_dir / "03_modeling_03_plot.png").exists():    display(Image(str(output_dir / "03_modeling_03_plot.png")))else:    print("No baseline models loaded.")

In [None]:
# compare LSTM vs TCNif 'lstm' in sequential_results and 'tcn' in sequential_results:    # get training histories    lstm_history = training_histories.get('lstm', {})    tcn_history = training_histories.get('tcn', {})        # plot comparison    fig, axes = plt.subplots(2, 2, figsize=(16, 12))        # loss comparison    ax = axes[0, 0]    if lstm_history and 'train_loss' in lstm_history:        epochs_lstm = range(1, len(lstm_history['train_loss']) + 1)        ax.plot(epochs_lstm, lstm_history['train_loss'], label='LSTM Train', linewidth=2, color='blue')        ax.plot(epochs_lstm, lstm_history['val_loss'], label='LSTM Val', linewidth=2, color='blue', linestyle='--')    if tcn_history and 'train_loss' in tcn_history:        epochs_tcn = range(1, len(tcn_history['train_loss']) + 1)        ax.plot(epochs_tcn, tcn_history['train_loss'], label='TCN Train', linewidth=2, color='red')        ax.plot(epochs_tcn, tcn_history['val_loss'], label='TCN Val', linewidth=2, color='red', linestyle='--')    ax.set_xlabel('Epoch', fontsize=12)    ax.set_ylabel('Loss', fontsize=12)    ax.set_title('Loss Comparison: LSTM vs TCN', fontsize=14, fontweight='bold')    ax.legend()    ax.grid(True, alpha=0.3)        # accuracy comparison    ax = axes[0, 1]    if lstm_history and 'train_accuracy' in lstm_history and any(lstm_history['train_accuracy']):        epochs_lstm = range(1, len(lstm_history['train_accuracy']) + 1)        ax.plot(epochs_lstm, lstm_history['train_accuracy'], label='LSTM Train', linewidth=2, color='blue')        ax.plot(epochs_lstm, lstm_history['val_accuracy'], label='LSTM Val', linewidth=2, color='blue', linestyle='--')    if tcn_history and 'train_accuracy' in tcn_history and any(tcn_history['train_accuracy']):        epochs_tcn = range(1, len(tcn_history['train_accuracy']) + 1)        ax.plot(epochs_tcn, tcn_history['train_accuracy'], label='TCN Train', linewidth=2, color='red')        ax.plot(epochs_tcn, tcn_history['val_accuracy'], label='TCN Val', linewidth=2, color='red', linestyle='--')    ax.set_xlabel('Epoch', fontsize=12)    ax.set_ylabel('Accuracy', fontsize=12)    ax.set_title('Accuracy Comparison: LSTM vs TCN', fontsize=14, fontweight='bold')    ax.legend()    ax.grid(True, alpha=0.3)        # evaluate and compare metrics    lstm_model, lstm_loader, lstm_device, _ = sequential_results['lstm']    tcn_model, tcn_loader, tcn_device, _ = sequential_results['tcn']        lstm_y_true, lstm_y_pred, lstm_y_proba = evaluate_sequential_model(lstm_model, lstm_loader, lstm_device, 'lstm')    tcn_y_true, tcn_y_pred, tcn_y_proba = evaluate_sequential_model(tcn_model, tcn_loader, tcn_device, 'tcn')        lstm_metrics = compute_metrics(lstm_y_true, lstm_y_pred, lstm_y_proba)    tcn_metrics = compute_metrics(tcn_y_true, tcn_y_pred, tcn_y_proba)        # metrics bar chart    ax = axes[1, 0]    metrics_names = ['AUC', 'Precision', 'Recall', 'F1']    lstm_values = [lstm_metrics['auc'], lstm_metrics['precision'], lstm_metrics['recall'], lstm_metrics['f1']]    tcn_values = [tcn_metrics['auc'], tcn_metrics['precision'], tcn_metrics['recall'], tcn_metrics['f1']]        x = np.arange(len(metrics_names))    width = 0.35    ax.bar(x - width/2, lstm_values, width, label='LSTM', color='blue', alpha=0.7)    ax.bar(x + width/2, tcn_values, width, label='TCN', color='red', alpha=0.7)    ax.set_xlabel('Metric', fontsize=12)    ax.set_ylabel('Score', fontsize=12)    ax.set_title('Performance Metrics: LSTM vs TCN', fontsize=14, fontweight='bold')    ax.set_xticks(x)    ax.set_xticklabels(metrics_names)    ax.legend()    ax.grid(True, alpha=0.3, axis='y')        # ROC curves comparison    ax = axes[1, 1]    plot_roc_curve(lstm_y_true, lstm_y_proba, model_name='LSTM', ax=ax)    plot_roc_curve(tcn_y_true, tcn_y_proba, model_name='TCN', ax=ax)    ax.set_title('ROC Curves: LSTM vs TCN', fontsize=14, fontweight='bold')    ax.legend()    ax.grid(True, alpha=0.3)        plt.tight_layout()plt.savefig(output_dir / "03_modeling_04_plot.png", dpi=150, bbox_inches='tight')plt.show()if HAS_IPYTHON and (output_dir / "03_modeling_04_plot.png").exists():    display(Image(str(output_dir / "03_modeling_04_plot.png")))        # print comparison    print("=" * 60)    print("LSTM vs TCN Comparison")    print("=" * 60)    print(f"\nLSTM Metrics:")    print(f"  AUC: {lstm_metrics['auc']:.4f}")    print(f"  Precision: {lstm_metrics['precision']:.4f}")    print(f"  Recall: {lstm_metrics['recall']:.4f}")    print(f"  F1: {lstm_metrics['f1']:.4f}")    print(f"\nTCN Metrics:")    print(f"  AUC: {tcn_metrics['auc']:.4f}")    print(f"  Precision: {tcn_metrics['precision']:.4f}")    print(f"  Recall: {tcn_metrics['recall']:.4f}")    print(f"  F1: {tcn_metrics['f1']:.4f}")    print("=" * 60)else:    print("Both LSTM and TCN models are required for comparison. Train them first.")

## 8. Evaluate Sequential Models

Evaluate sequential models on test set.


In [None]:
# evaluate sequential modelssequential_metrics = {}for model_type, (model, test_loader, device, _) in sequential_results.items():    print(f"\nEvaluating {model_type.upper()}...")    y_true, y_pred, y_proba = evaluate_sequential_model(model, test_loader, device, model_type)    metrics = compute_metrics(y_true, y_pred, y_proba)    sequential_metrics[model_type] = metrics        print(f"  AUC: {metrics['auc']:.4f}")    print(f"  Precision: {metrics['precision']:.4f}")    print(f"  Recall: {metrics['recall']:.4f}")    print(f"  F1: {metrics['f1']:.4f}")sequential_metrics_df = pd.DataFrame(sequential_metrics).Tprint("\nSequential Models Metrics:")print(sequential_metrics_df[['auc', 'precision', 'recall', 'f1', 'false_positive_rate']])

## 7. Compare Baseline Models

Compare baseline models performance.


In [None]:
# compare baseline modelsbaseline_metrics_df = compare_models(baseline_results, plot=True)

## 8. Comprehensive Model Comparison

Compare all models (baselines vs sequential) to identify the best model.


## 9.1. Score Comparison by Attack Type

Compare model scores across different attack types.


In [None]:
# compare scores by attack typeif 'attack_type' in df.columns:    # get predictions from best model    if 'best_model_name' in locals() and best_model_name in sequential_results:        model, test_loader, device, model_type = sequential_results[best_model_name]        y_true, y_pred, y_proba = evaluate_sequential_model(model, test_loader, device, model_type)                # get attack types for test set (need to map back from sequences)        # for now, use a simplified approach: get attack types from fake series        fake_df = df[df.get('is_fake_series', df.get('label') == 'fake')]        attack_types = fake_df['attack_type'].unique()                if len(attack_types) > 0:            # create box plots by attack type            fig, axes = plt.subplots(1, 2, figsize=(16, 6))                        # get scores for fake samples only            fake_mask = y_true == 1            fake_scores = y_proba[fake_mask] if y_proba.ndim == 1 else y_proba[fake_mask, 1] if y_proba.shape[1] > 1 else y_proba[fake_mask].flatten()                        # sample attack types (simplified - in real scenario would map from sequences)            # for visualization, we'll use the attack types from the dataset            n_fake = len(fake_scores)            n_attack_types = len(attack_types)            samples_per_type = n_fake // n_attack_types                        attack_type_scores = {}            for idx, attack_type in enumerate(attack_types):                start_idx = idx * samples_per_type                end_idx = start_idx + samples_per_type if idx < n_attack_types - 1 else n_fake                attack_type_scores[attack_type] = fake_scores[start_idx:end_idx]                        # box plot            axes[0].boxplot([attack_type_scores[at] for at in attack_types], labels=attack_types)            axes[0].set_ylabel('Prediction Score', fontsize=12)            axes[0].set_title('Score Distribution by Attack Type', fontsize=14, fontweight='bold')            axes[0].tick_params(axis='x', rotation=45)            axes[0].grid(True, alpha=0.3, axis='y')                        # bar chart of mean scores            mean_scores = [np.mean(attack_type_scores[at]) for at in attack_types]            axes[1].bar(range(len(attack_types)), mean_scores, color='red', alpha=0.7)            axes[1].set_xticks(range(len(attack_types)))            axes[1].set_xticklabels(attack_types, rotation=45, ha='right')            axes[1].set_ylabel('Mean Prediction Score', fontsize=12)            axes[1].set_title('Mean Score by Attack Type', fontsize=14, fontweight='bold')            axes[1].grid(True, alpha=0.3, axis='y')                        plt.tight_layout()            plt.savefig(output_dir / "03_scores_by_attack_type.png", dpi=150, bbox_inches='tight')plt.show()if HAS_IPYTHON and (output_dir / "03_scores_by_attack_type.png").exists():    display(Image(str(output_dir / "03_scores_by_attack_type.png")))                        # print statistics            print("\nScore Statistics by Attack Type:")            for attack_type in attack_types:                scores = attack_type_scores[attack_type]                print(f"  {attack_type}: mean={np.mean(scores):.4f}, std={np.std(scores):.4f}")        else:            print("No attack types found in dataset.")    else:        print("Best model not available. Train models first.")else:    print("Attack type column not found in dataset.")

In [None]:
# comprehensive comparisonall_metrics_df = compare_all_models(baseline_results, sequential_results, plot=True)# save resultsresults_path = project_root / "models" / "model_comparison_results.csv"all_metrics_df.to_csv(results_path)print(f"\nResults saved to: {results_path}")

## 9. Error Analysis

Analyze misclassifications and identify patterns in errors.


In [None]:
# get best model predictions for error analysisbest_model_name = all_metrics_df['auc'].idxmax()print(f"Analyzing errors for best model: {best_model_name}")if best_model_name in sequential_results:    # sequential model    model, test_loader, device, model_type = sequential_results[best_model_name]    y_true, y_pred, y_proba = evaluate_sequential_model(model, test_loader, device, model_type)else:    # baseline model    model, X_test, y_test, y_pred, y_proba = baseline_results[best_model_name]    y_true = y_test# confusion matrixfrom sklearn.metrics import confusion_matrixcm = confusion_matrix(y_true, y_pred)fig, axes = plt.subplots(1, 2, figsize=(14, 5))# confusion matrix heatmapsns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[0],            xticklabels=['Normal', 'Fake'], yticklabels=['Normal', 'Fake'])axes[0].set_xlabel('Predicted', fontsize=12)axes[0].set_ylabel('True', fontsize=12)axes[0].set_title(f'Confusion Matrix - {best_model_name}', fontsize=14, fontweight='bold')# error distributionerrors = y_true != y_predif y_proba.ndim > 1:    y_proba_positive = y_proba[:, 1] if y_proba.shape[1] > 1 else y_proba.flatten()else:    y_proba_positive = y_probaaxes[1].hist(y_proba_positive[errors & (y_true == 0)], bins=20,             alpha=0.6, label='False Positives', color='red', density=True)axes[1].hist(y_proba_positive[errors & (y_true == 1)], bins=20,             alpha=0.6, label='False Negatives', color='orange', density=True)axes[1].set_xlabel('Prediction Score', fontsize=12)axes[1].set_ylabel('Density', fontsize=12)axes[1].set_title('Error Distribution', fontsize=14, fontweight='bold')axes[1].legend()axes[1].grid(True, alpha=0.3)plt.tight_layout()plt.savefig(output_dir / "03_modeling_05_plot.png", dpi=150, bbox_inches='tight')plt.show()if HAS_IPYTHON and (output_dir / "03_modeling_05_plot.png").exists():    display(Image(str(output_dir / "03_modeling_05_plot.png")))# print error statisticsprint(f"\nError Statistics for {best_model_name}:")print(f"  Total errors: {errors.sum()}")print(f"  False Positives: {(errors & (y_true == 0)).sum()}")print(f"  False Negatives: {(errors & (y_true == 1)).sum()}")print(f"  Error rate: {errors.mean():.2%}")

In [None]:
print("=" * 80)print("MODEL SELECTION SUMMARY")print("=" * 80)# best modelbest_model_name = all_metrics_df['auc'].idxmax()best_metrics = all_metrics_df.loc[best_model_name]print(f"\nChampion Model: {best_model_name}")print(f"  Model Type: {best_metrics['model_type']}")print(f"  AUC: {best_metrics['auc']:.4f}")print(f"  Precision: {best_metrics['precision']:.4f}")print(f"  Recall: {best_metrics['recall']:.4f}")print(f"  F1-Score: {best_metrics['f1']:.4f}")print(f"  False Positive Rate: {best_metrics['false_positive_rate']:.4f}")# comparison baseline vs sequentialbaseline_avg_auc = all_metrics_df[all_metrics_df['model_type'] == 'baseline']['auc'].mean()sequential_avg_auc = all_metrics_df[all_metrics_df['model_type'] == 'sequential']['auc'].mean()print(f"\nAverage Performance:")print(f"  Baseline models: {baseline_avg_auc:.4f}")print(f"  Sequential models: {sequential_avg_auc:.4f}")print(f"  Improvement: {(sequential_avg_auc - baseline_avg_auc):.4f} ({(sequential_avg_auc - baseline_avg_auc) / baseline_avg_auc * 100:.1f}%)")# top 3 modelsprint(f"\nTop 3 Models (by AUC):")top_3 = all_metrics_df.nlargest(3, 'auc')for idx, (model_name, row) in enumerate(top_3.iterrows(), 1):    print(f"  {idx}. {model_name}: AUC={row['auc']:.4f}, F1={row['f1']:.4f}")print("\n" + "=" * 80)