In [2]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.patches as mpatches
from math import pi
from collections import defaultdict

In [3]:
current_dir = Path.cwd()
project_root = current_dir.parent.parent

sns.set(style='whitegrid')

In [4]:
# --- Features Parameters ---
# Available Datasets = ['FakeRealMusicOriginal/', 'FakeRealMusicOriginalNormalized/']
Dataset = 'FakeRealMusicOriginal/'

# Available Sources = ['mix', 'vocals0', 'drums0', 'bass0', 'other0']
separated_source = 'mix'

# Available LUFS = ['', 'minus14/', 'minus23/']
LUFS = ''

# Available Perturbations = ['', 'base/', 'mp3_192/', 'noise_snr30/', 'resample22k/', 'reverb_room/']
perturbation = ''

test_name = 'Extended_full_track_features/'

# --- Preds Parameters ---
preds_test_name = 'FULL_TEST/'

In [5]:
features_path = f'../../results/Features/{Dataset}{LUFS}{perturbation}{test_name}full_track/features_full_track.json'
preds_path = f'../../results/AudioLIME/{Dataset}{preds_test_name}full_track/explanations.json'

In [None]:
def load_and_prepare_data_full(json_file, separated_source):
    """
    Load JSON data and preserve ALL sub-features from nested structure.
    
    {
        model_name: {
            track_id: {
                "type": "full_track" | "segment",
                "segment_id": null | value,
                "features": {
                    "mix": {
                        "duration": 120.0,
                        "rms_wave": {"min": ..., "mean": ..., "std": ..., "max": ...},
                        "jitter": {"jitter_local": ..., "jitter_rap": ..., ...},
                        ...
                    },
                    "vocals0": {...},
                    ...
                }
            },
            ...
        },
        ...
    }
    
    Output:
    - DataFrame with collumns: model, track, data_type, source, segment_id, [all_features]
    """
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    all_rows = []
    
    type_mapping = {
        'ElevenLabs': 'GENERATED',       
        'REAL': 'REAL',
        'SUNO': 'GENERATED',
        'SUNO_PRO': 'GENERATED',
        'UDIO': 'GENERATED',
    }
    
    for model_name, tracks_dict in data.items():
        for track_key, track_data in tracks_dict.items():
            
            if not isinstance(track_data, dict) or 'features' not in track_data:
                continue
            
            features_root = track_data.get('features', {})
            track_type = track_data.get('type', 'unknown')
            segment_id = track_data.get('segment_id', None)
            
            sources_to_process = [separated_source]
            
            for source in sources_to_process:
                if source not in features_root:
                    continue
                
                features = features_root[source]
                row = {
                    'model': model_name,
                    'track': track_key,
                    'source': source,
                    'data_type': type_mapping.get(model_name, model_name),
                    'segment_id': segment_id,
                }
                
                def flatten_feature(feat_dict, prefix=''):
                    result = {}
                    
                    for key, val in feat_dict.items():
                        col_name = f"{prefix}_{key}" if prefix else key
                        
                        if isinstance(val, dict):
                            stats_keys = {'min', 'mean', 'std', 'max'}
                            
                            if stats_keys.intersection(val.keys()):
                                for stat_name, stat_val in val.items():
                                    result[f"{col_name}_{stat_name}"] = float(stat_val) if isinstance(stat_val, (int, float)) else np.nan
                            else:
                                nested = flatten_feature(val, prefix=col_name)
                                result.update(nested)
                        
                        elif isinstance(val, list):
                            if len(val) > 0 and all(isinstance(x, (int, float)) for x in val):
                                result[f"{col_name}_mean"] = float(np.mean(val))
                                result[f"{col_name}_min"] = float(np.min(val))
                                result[f"{col_name}_max"] = float(np.max(val))
                                result[f"{col_name}_std"] = float(np.std(val)) if len(val) > 1 else 0.0
                            else:
                                pass
                        
                        elif isinstance(val, (int, float)):
                            result[col_name] = float(val)
                        elif isinstance(val, bool):
                            result[col_name] = val
                        elif isinstance(val, str):
                            try:
                                result[col_name] = float(val)
                            except (ValueError, TypeError):
                                pass
                    
                    return result
                
                flattened = flatten_feature(features)
                row.update(flattened)
                
                all_rows.append(row)
    
    features_df = pd.DataFrame(all_rows)
    
    if features_df.empty:
        print("⚠️ Warning: No data loaded from JSON file!")
        return features_df, []
    
    exclude_cols = {'model', 'track', 'source', 'data_type', 'segment_id'}
    feature_cols = [col for col in features_df.columns if col not in exclude_cols]
    
    print(f"\n{'='*80}")
    print(f"✅ Data loaded successfully!")
    print(f"   • Models: {features_df['model'].unique().tolist()}")
    print(f"   • Total records: {len(features_df)}")
    print(f"   • Total features: {len(feature_cols)}")
    print(f"   • Sample features: {feature_cols[:10]}")
    print(f"{'='*80}\n")
    
    return features_df, feature_cols


In [7]:
features_df, features_to_analyze = load_and_prepare_data_full(features_path, separated_source)
print(f"\n✓ Data loaded: {len(features_df)} samples, {len(features_to_analyze)} features")
print(f"✓ Models: {features_df['model'].value_counts().to_dict()}\n")


✅ Data loaded successfully!
   • Models: ['ElevenLabs', 'REAL', 'SUNO', 'SUNO_PRO', 'UDIO']
   • Total records: 50
   • Total features: 65
   • Sample features: ['duration', 'rms_wave_min', 'rms_wave_mean', 'rms_wave_std', 'rms_wave_max', 'rms_spec_min', 'rms_spec_mean', 'rms_spec_std', 'rms_spec_max', 'zero_crossing_rate']


✓ Data loaded: 50 samples, 65 features
✓ Models: {'ElevenLabs': 10, 'REAL': 10, 'SUNO': 10, 'SUNO_PRO': 10, 'UDIO': 10}



In [None]:
def load_model_predictions(json_file):
    """
    Load model predictions and AudioLIME component influences from JSON file.
    
    Expected structure:
    {
        model_name: {
            track_id: {
                "type": "full_track" | "segment",
                "segment_id": null | value,
                "explanations": {
                    "file_path": "...",
                    "model_prediction": 0.xyz,
                    "predicted_class": "Real" | "Fake",
                    "component_influences": {
                        "vocals0": 0.5378...,
                        "drums0": -0.0084...,
                        "bass0": -0.0116...,
                        "other0": -0.1878...
                    }
                }
            },
            ...
        },
        ...
    }
    
    Output:
    - DataFrame with collumns:
      * model, track, data_type, segment_id
      * model_prediction, predicted_class
      * component_influence_vocals0, component_influence_drums0, ...
    """
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    type_mapping = {
        'ElevenLabs': 'GENERATED',       
        'REAL': 'REAL',
        'SUNO': 'GENERATED',
        'SUNO_PRO': 'GENERATED',
        'UDIO': 'GENERATED',
    }
    
    all_rows = []
    
    for model_name, tracks_dict in data.items():
        for track_key, track_data in tracks_dict.items():
            
            if not isinstance(track_data, dict) or 'explanations' not in track_data:
                continue
            
            explanations = track_data.get('explanations', {})
            track_type = track_data.get('type', 'unknown')
            segment_id = track_data.get('segment_id', None)
            
            row = {
                'model': model_name,
                'track': track_key,
                'data_type': type_mapping.get(model_name, model_name),
                'segment_id': segment_id,
                'file_path': explanations.get('file_path', ''),
                'model_prediction': float(explanations.get('model_prediction', np.nan)),
                'predicted_class': explanations.get('predicted_class', 'Unknown'),
            }
            
            component_influences = explanations.get('component_influences', {})
            for component_name, influence_value in component_influences.items():
                col_name = f"component_influence_{component_name}"
                row[col_name] = float(influence_value) if isinstance(influence_value, (int, float)) else np.nan
            
            all_rows.append(row)
    
    predictions_df = pd.DataFrame(all_rows)
    
    if predictions_df.empty:
        print("⚠️ Warning: No predictions loaded from JSON file!")
        return predictions_df
    
    base_cols = ['model', 'track', 'data_type', 'segment_id', 'file_path', 
                 'model_prediction', 'predicted_class']
    component_cols = [col for col in predictions_df.columns if col.startswith('component_influence_')]
    predictions_df = predictions_df[base_cols + component_cols]
    
    print(f"\n{'='*80}")
    print(f"✅ Predictions loaded successfully!")
    print(f"   • Models: {predictions_df['model'].unique().tolist()}")
    print(f"   • Total records: {len(predictions_df)}")
    print(f"   • Prediction range: [{predictions_df['model_prediction'].min():.4f}, {predictions_df['model_prediction'].max():.4f}]")
    print(f"   • Predicted classes: {predictions_df['predicted_class'].unique().tolist()}")
    print(f"   • Components tracked: {component_cols}")
    print(f"{'='*80}\n")
    
    return predictions_df


In [9]:
predictions_df = load_model_predictions(
    json_file=preds_path
)


✅ Predictions loaded successfully!
   • Models: ['ElevenLabs', 'REAL', 'SUNO', 'SUNO_PRO', 'UDIO']
   • Total records: 50
   • Prediction range: [0.0239, 0.9899]
   • Predicted classes: ['Real', 'Fake']
   • Components tracked: ['component_influence_vocals0', 'component_influence_drums0', 'component_influence_bass0', 'component_influence_other0']



In [None]:
def merge_features_and_predictions(features_df, predictions_df):
    """
    Merge features DataFrame with predictions DataFrame.
    
    Join key: (model, track)
    
    Returns merged DataFrame with all features + all predictions/influences
    """
    
    features_merge = features_df.copy()
    features_merge['merge_key'] = features_merge['model'] + '||' + features_merge['track']
    
    predictions_merge = predictions_df.copy()
    predictions_merge['merge_key'] = predictions_merge['model'] + '||' + predictions_merge['track']
    
    merged_df = features_merge.merge(
        predictions_merge.drop(columns=['model', 'track', 'data_type', 'segment_id']),
        on='merge_key',
        how='left',
        suffixes=('_features', '_predictions')
    )
    
    merged_df = merged_df.drop(columns=['merge_key'])
    
    metadata_cols = ['model', 'track', 'source', 'data_type', 'segment_id']
    
    feature_col_prefixes = ['rms_wave', 'jitter', 'shimmer', 'rhythm_stats', 'duration', 
                            'zero_crossing', 'hnr', 'gne', 'breath', 'voice_breaks', 'tempo']
    feature_cols = [col for col in merged_df.columns 
                   if any(col.startswith(prefix) for prefix in feature_col_prefixes)]
    
    prediction_cols = [col for col in merged_df.columns 
                      if col.startswith(('file_path', 'model_prediction', 'predicted_class', 'component_influence'))]
    
    other_cols = [col for col in merged_df.columns 
                 if col not in metadata_cols + feature_cols + prediction_cols]
    
    final_col_order = metadata_cols + feature_cols + prediction_cols + other_cols
    merged_df = merged_df[[col for col in final_col_order if col in merged_df.columns]]
    
    print(f"\n{'='*80}")
    print(f"✅ DataFrames merged successfully!")
    print(f"   • Total records: {len(merged_df)}")
    print(f"   • Records with predictions: {merged_df['model_prediction'].notna().sum()}")
    print(f"   • Records without predictions: {merged_df['model_prediction'].isna().sum()}")
    print(f"   • Total columns: {len(merged_df.columns)}")
    print(f"   • Feature columns: {len(feature_cols)}")
    print(f"   • Prediction columns: {len(prediction_cols)}")
    print(f"\n   Column structure:")
    print(f"      - Metadata: {metadata_cols}")
    print(f"      - Features: {feature_cols[:5]} ... ({len(feature_cols)} total)")
    print(f"      - Predictions: {prediction_cols}")
    print(f"{'='*80}\n")
    
    return merged_df


In [11]:
complete_df = merge_features_and_predictions(features_df, predictions_df)


✅ DataFrames merged successfully!
   • Total records: 50
   • Records with predictions: 50
   • Records without predictions: 0
   • Total columns: 77
   • Feature columns: 28
   • Prediction columns: 7

   Column structure:
      - Metadata: ['model', 'track', 'source', 'data_type', 'segment_id']
      - Features: ['duration', 'rms_wave_min', 'rms_wave_mean', 'rms_wave_std', 'rms_wave_max'] ... (28 total)
      - Predictions: ['file_path', 'model_prediction', 'predicted_class', 'component_influence_vocals0', 'component_influence_drums0', 'component_influence_bass0', 'component_influence_other0']



In [12]:
complete_df.head()

Unnamed: 0,model,track,source,data_type,segment_id,duration,rms_wave_min,rms_wave_mean,rms_wave_std,rms_wave_max,...,spectral_contrast_max,spectral_flatness_min,spectral_flatness_mean,spectral_flatness_std,spectral_flatness_max,f0_min,f0_mean,f0_std,f0_max,intonation_pattern_pitch_variability
0,ElevenLabs,1__Ed_Sheeran_-_Perfect_Echoes_of_You_Wariant_...,mix,GENERATED,,120.0,2.089001e-05,0.139664,0.070806,0.333599,...,63.779575,2.588399e-08,0.001425,0.010149,0.122521,65.406391,140.237022,44.911029,314.742105,67.918416
1,ElevenLabs,10__Adele_Rolling_in_the_Deep_-_Edge_of_the_He...,mix,GENERATED,,99.892245,0.0,0.178018,0.0838,0.411453,...,64.418292,1.46127e-09,0.027694,0.144977,1.000001,65.406391,93.489759,105.430151,806.963558,97.10983
2,ElevenLabs,2__Travis_Scott_Sico_Mode_-_Run_This_Town_Wari...,mix,GENERATED,,103.88898,0.0,0.159084,0.090357,0.442858,...,65.887064,1.049918e-10,0.025026,0.127544,1.000001,65.406391,256.992874,347.838956,1790.764258,205.753161
3,ElevenLabs,3__Imagine_Dragons_Believer_-_Together_We_Rise...,mix,GENERATED,,89.887347,0.0,0.146642,0.071364,0.353091,...,69.78765,6.306086e-09,0.017176,0.119802,1.000001,65.406391,191.993434,326.049139,2093.004522,235.435288
4,ElevenLabs,4__Offset_Bodies_-_Ruckus_Resonance_Wariant_2_...,mix,GENERATED,,73.926531,2.330183e-07,0.166111,0.066529,0.385434,...,64.047156,2.245041e-09,0.004042,0.04156,1.000001,72.154946,1003.296535,865.411727,2093.004522,468.677755


In [13]:
complete_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 77 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   model                                 50 non-null     object 
 1   track                                 50 non-null     object 
 2   source                                50 non-null     object 
 3   data_type                             50 non-null     object 
 4   segment_id                            0 non-null      object 
 5   duration                              50 non-null     float64
 6   rms_wave_min                          50 non-null     float64
 7   rms_wave_mean                         50 non-null     float64
 8   rms_wave_std                          50 non-null     float64
 9   rms_wave_max                          50 non-null     float64
 10  zero_crossing_rate                    50 non-null     float64
 11  jitter_jitter_local  

In [None]:
def setup_professional_style():
    plt.rcParams['font.family'] = 'sans-serif'
    plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica']
    plt.rcParams['font.size'] = 10
    plt.rcParams['axes.labelsize'] = 12
    plt.rcParams['axes.titlesize'] = 13
    plt.rcParams['xtick.labelsize'] = 11
    plt.rcParams['ytick.labelsize'] = 11
    plt.rcParams['legend.fontsize'] = 10
    plt.rcParams['figure.titlesize'] = 16
    
    # Professional grid
    plt.rcParams['axes.grid'] = True
    plt.rcParams['grid.alpha'] = 0.3
    plt.rcParams['grid.linestyle'] = '--'
    plt.rcParams['grid.linewidth'] = 0.5
    
    # Line widths
    plt.rcParams['axes.linewidth'] = 1.5
    plt.rcParams['xtick.major.width'] = 1.5
    plt.rcParams['ytick.major.width'] = 1.5
    
    sns.set_palette("husl")

PROFESSIONAL_COLORS = {
    'REAL': '#1f77b4',
    'ElevenLabs': '#ff7f0e',
    'SUNO': '#2ca02c',
    'SUNO_PRO': '#d62728',
    'UDIO': '#9467bd'
}

In [15]:
base_output_folder = Path(f'{Dataset}Vizualizations_with_preds/{preds_test_name}{LUFS}{perturbation}{separated_source}/')
base_output_folder.mkdir(parents=True, exist_ok=True)

In [None]:
def viz_all_features_decision_boundary(merged_df, confidence_threshold=0.3):
    """        
    Folder structure:
    visualizations_decision_boundary/
    ├── rms_wave/
    │   ├── rms_wave_min/
    │   │   ├── boxplots.png
    │   │   └── scatter_analysis.png
    │   ├── rms_wave_mean/
    │   ├── rms_wave_std/
    │   └── rms_wave_max/
    ├── rms_spec/
    │   ├── rms_spec_min/
    │   ├── rms_spec_mean/
    │   ├── rms_spec_std/
    │   └── rms_spec_max/
    ├── jitter/
    │   ├── jitter_jitter_local/
    │   ├── jitter_jitter_rap/
    │   └── ...
    └── [other features]/
    
    Parameters:
    -----------
    merged_df : pd.DataFrame
        Merged features + predictions DataFrame
    confidence_threshold : float, default=0.3
        Threshold for "uncertain" predictions (0.5 ± threshold)
    """
    
    setup_professional_style()
    
    base_folder = Path(f'{base_output_folder}/visualizations_decision_boundary')
    base_folder.mkdir(exist_ok=True)
    
    print(f"\n{'='*80}")
    print("Creating comprehensive decision boundary analysis for ALL features...")
    print(f"{'='*80}\n")
    
    def categorize_confidence(pred_value):
        if pd.isna(pred_value):
            return 'Unknown'
        if abs(pred_value - 0.5) <= confidence_threshold:
            return 'Uncertain'
        elif pred_value < 0.5:
            return 'Confident REAL'
        else:
            return 'Confident FAKE'
    
    merged_df['prediction_confidence'] = merged_df['model_prediction'].apply(categorize_confidence)
    
    CONFIDENCE_COLORS = {
        'Confident REAL': '#1f77b4',
        'Uncertain': '#ff7f0e',
        'Confident FAKE': '#d62728',
        'Unknown': '#7f7f7f'
    }
    
    CONFIDENCE_ALPHA = {
        'Confident REAL': 0.8,
        'Uncertain': 0.6,
        'Confident FAKE': 0.8,
        'Unknown': 0.3
    }
    
    exclude_cols = {'model', 'track', 'source', 'data_type', 'segment_id', 
                   'file_path', 'model_prediction', 'predicted_class', 
                   'prediction_confidence', 'predicted_class'}
    all_features = [col for col in merged_df.columns 
                   if col not in exclude_cols and 'component_influence' not in col]
    
    feature_groups = defaultdict(list)
    
    for col in all_features:
        parts = col.split('_')
        
        if len(parts) > 1 and parts[-1] in ['min', 'mean', 'std', 'max']:
            base_name = '_'.join(parts[:-1])
            stat = parts[-1]
        else:
            base_name = col
            stat = 'single'
        
        feature_groups[base_name].append((col, stat))
    
    print(f"Found {len(feature_groups)} feature groups:")
    for group_name in sorted(feature_groups.keys()):
        print(f"  • {group_name} ({len(feature_groups[group_name])} features)")
    print()
    
    total_visualizations = 0
    
    for feature_base, columns_list in sorted(feature_groups.items()):
        print(f"\n{'─'*80}")
        print(f"Processing feature group: {feature_base}")
        print(f"{'─'*80}")
        
        feature_folder = base_folder / feature_base
        feature_folder.mkdir(exist_ok=True)
        
        if len(columns_list) == 1 and columns_list[0][1] == 'single':
            col = columns_list[0][0]
            
            print(f"  Processing single feature: {col}")
            
            subfolder = feature_folder / col
            subfolder.mkdir(exist_ok=True)
            
            _create_feature_visualizations(
                merged_df, col, feature_base, subfolder,
                CONFIDENCE_COLORS, CONFIDENCE_ALPHA, confidence_threshold
            )
            
            total_visualizations += 1
        
        else:
            stat_order = ['min', 'mean', 'std', 'max']
            columns_sorted = sorted(columns_list,
                                   key=lambda x: next((i for i, s in enumerate(stat_order) 
                                                      if s == x[1]), 999))
            
            for col, stat in columns_sorted:
                print(f"  Processing feature: {col}")
                
                subfolder = feature_folder / col
                subfolder.mkdir(exist_ok=True)
                
                _create_feature_visualizations(
                    merged_df, col, feature_base, subfolder,
                    CONFIDENCE_COLORS, CONFIDENCE_ALPHA, confidence_threshold
                )
                
                total_visualizations += 1
    
    print(f"\n{'='*80}")
    print(f"✅ Decision boundary visualizations created!")
    print(f"   • Total features analyzed: {total_visualizations}")
    print(f"   • Output folder: {base_folder}/")
    print(f"✅ Ready for academic thesis analysis!")
    print(f"{'='*80}\n")
    
    return merged_df


def _create_feature_visualizations(merged_df, feature_col, feature_base, output_folder,
                                   CONFIDENCE_COLORS, CONFIDENCE_ALPHA, confidence_threshold):
    """
    Helper function to create visualizations for a single feature.
    Generates: boxplots.png, scatter_analysis.png, heatmap.png, statistics.txt
    """
    
    fig, axes = plt.subplots(2, 2, figsize=(18, 14))
    fig.patch.set_facecolor('white')
    axes = axes.flatten()
    
    predictions_by_class = {
        'Confident REAL': merged_df[merged_df['prediction_confidence'] == 'Confident REAL'],
        'Uncertain': merged_df[merged_df['prediction_confidence'] == 'Uncertain'],
        'Confident FAKE': merged_df[merged_df['prediction_confidence'] == 'Confident FAKE'],
    }
    
    for plot_idx, (category, category_label) in enumerate([
        ('Confident REAL', 'Confident REAL Classification'),
        ('Uncertain', 'Uncertain Classification'),
        ('Confident FAKE', 'Confident FAKE Classification'),
        ('All Data', 'All Data Combined')
    ]):
        ax = axes[plot_idx]
        
        if category == 'All Data':
            plot_df = merged_df[merged_df['model_prediction'].notna()].copy()
        else:
            plot_df = predictions_by_class[category]
        
        if len(plot_df) == 0:
            ax.text(0.5, 0.5, f'No data available\nfor {category}',
                   ha='center', va='center', fontsize=12, color='red',
                   transform=ax.transAxes)
            ax.set_title(f'{category_label} - NO DATA', fontsize=13, 
                       fontweight='bold', color='red')
            continue
        
        data_types = sorted(plot_df['data_type'].unique())
        plot_data = []
        box_colors = []
        
        for data_type in data_types:
            type_data = plot_df[plot_df['data_type'] == data_type][feature_col].dropna()
            if len(type_data) > 0:
                plot_data.append(type_data.values)
                if data_type == 'REAL':
                    box_colors.append('#aec7e8')
                else:
                    box_colors.append('#ffbb78')
        
        if len(plot_data) == 0:
            ax.text(0.5, 0.5, f'No valid data',
                   ha='center', va='center', fontsize=12, color='red',
                   transform=ax.transAxes)
            continue
        
        bp = ax.boxplot(plot_data,
                       labels=data_types,
                       patch_artist=True,
                       widths=0.5,
                       showmeans=True,
                       meanline=False,
                       notch=False,
                       vert=True,
                       whis=1.5,
                       meanprops=dict(marker='D', markerfacecolor='red',
                                    markersize=8, markeredgecolor='darkred',
                                    markeredgewidth=1.5),
                       medianprops=dict(color='darkblue', linewidth=2.5),
                       whiskerprops=dict(linewidth=1.5, color='black'),
                       capprops=dict(linewidth=1.5, color='black'),
                       boxprops=dict(linewidth=2, color='black'))
        
        for patch, color in zip(bp['boxes'], box_colors):
            patch.set_facecolor(color)
            patch.set_alpha(0.85)
        
        for i, (data, dtype) in enumerate(zip(plot_data, data_types)):
            y = data
            x = np.random.normal(i+1, 0.04, size=len(y))
            ax.scatter(x, y, alpha=0.4, s=35, color='black',
                      edgecolors='gray', linewidth=0.5)
        
        ax.set_xticklabels(data_types, fontsize=12, fontweight='bold')
        ax.set_ylabel('Feature Value', fontsize=12, fontweight='bold')
        ax.set_title(f'{category_label}', fontsize=13, fontweight='bold', pad=12,
                    bbox=dict(boxstyle='round,pad=0.6', 
                             facecolor=CONFIDENCE_COLORS.get(category, '#f0f0f0'),
                             alpha=0.3, edgecolor='#333333', linewidth=1.5))
        
        ax.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.8)
        ax.set_axisbelow(True)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['left'].set_linewidth(1.8)
        ax.spines['bottom'].set_linewidth(1.8)
        
        real_vals = plot_df[plot_df['data_type'] == 'REAL'][feature_col].dropna()
        gen_vals = plot_df[plot_df['data_type'] == 'GENERATED'][feature_col].dropna()
            
        table_lines = [
            "┌─────────────┬──────────┬──────────┬────────┐",
            "│ Type        │   Mean   │   Std    │   n    │",
            "├─────────────┼──────────┼──────────┼────────┤"
        ]
        
        if len(real_vals) > 0:
            table_lines.append(
                f"│ REAL        │ {real_vals.mean():8.4f} │ {real_vals.std():8.4f} │ {len(real_vals):6d} │"
            )
        
        if len(gen_vals) > 0:
            table_lines.append(
                f"│ GENERATED   │ {gen_vals.mean():8.4f} │ {gen_vals.std():8.4f} │ {len(gen_vals):6d} │"
            )
        
        table_lines.append("└─────────────┴──────────┴──────────┴────────┘")
        
        stats_text = "\n".join(table_lines)
        
        ax.text(0.98, 0.97, stats_text, transform=ax.transAxes,
                fontsize=8.5, verticalalignment='top', horizontalalignment='right',
                bbox=dict(boxstyle='round,pad=0.8', facecolor='white',
                        alpha=0.95, edgecolor='black', linewidth=1.3),
                family='monospace', weight='bold')

    
    fig.suptitle(f'Decision Boundary Analysis: {feature_col}\n'
                f'(Confidence threshold: ±{confidence_threshold})',
                fontsize=16, fontweight='bold', y=0.995)
    
    plt.tight_layout()
    output_file = output_folder / 'boxplots.png'
    plt.savefig(output_file, dpi=300, bbox_inches='tight', facecolor='white')
    plt.close()
    
    print(f"    ✓ Saved: boxplots.png")
    
    fig, ax = plt.subplots(1, 1, figsize=(14, 10))
    fig.patch.set_facecolor('white')
    
    plot_data = merged_df[[feature_col, 'model_prediction', 'data_type', 'prediction_confidence']].dropna()
    
    if len(plot_data) > 0:
        for confidence in ['Confident REAL', 'Uncertain', 'Confident FAKE']:
            conf_data = plot_data[plot_data['prediction_confidence'] == confidence]
            if len(conf_data) > 0:
                ax.scatter(conf_data[feature_col], conf_data['model_prediction'],
                          alpha=CONFIDENCE_ALPHA[confidence],
                          s=80,
                          color=CONFIDENCE_COLORS[confidence],
                          label=confidence,
                          edgecolors='black',
                          linewidth=0.7)
        
        ax.axhline(y=0.5, color='black', linestyle='-', linewidth=2, alpha=0.5)
        ax.axhline(y=0.5-confidence_threshold, color='gray', linestyle='--', 
                  linewidth=1.5, alpha=0.5, label='Confidence threshold')
        ax.axhline(y=0.5+confidence_threshold, color='gray', linestyle='--', 
                  linewidth=1.5, alpha=0.5)
        
        ax.fill_between(ax.get_xlim(), 0.5-confidence_threshold, 0.5+confidence_threshold,
                       color='orange', alpha=0.08)
        
        ax.set_xlabel(f'{feature_col}', fontsize=13, fontweight='bold')
        ax.set_ylabel('Model Prediction P(Fake)', fontsize=13, fontweight='bold')
        ax.set_title(f'Feature vs Model Prediction: {feature_col}', 
                    fontsize=14, fontweight='bold', pad=15)
        ax.set_ylim(-0.05, 1.05)
        
        ax.grid(alpha=0.3, linestyle='--', linewidth=0.8)
        ax.set_axisbelow(True)
        ax.spines['top'].set_visible(False)
        ax.spines['right'].set_visible(False)
        ax.spines['left'].set_linewidth(1.8)
        ax.spines['bottom'].set_linewidth(1.8)
        ax.legend(fontsize=11, loc='best', framealpha=0.95)
        
        plt.tight_layout()
        output_file = output_folder / 'scatter_analysis.png'
        plt.savefig(output_file, dpi=300, bbox_inches='tight', facecolor='white')
        plt.close()
        
        print(f"    ✓ Saved: scatter_analysis.png")
    
    summary_file = output_folder / 'statistics.txt'
    with open(summary_file, 'w', encoding='utf-8') as f:
        f.write("="*80 + "\n")
        f.write(f"DECISION BOUNDARY ANALYSIS - {feature_col.upper()}\n")
        f.write("="*80 + "\n\n")
        
        f.write(f"Feature: {feature_col}\n")
        f.write(f"Feature Group: {feature_base}\n")
        f.write(f"Confidence Threshold: ±{confidence_threshold}\n\n")
        
        f.write(f"Configuration:\n")
        f.write(f"  Confident REAL: P(Fake) < {0.5-confidence_threshold:.2f}\n")
        f.write(f"  Uncertain: {0.5-confidence_threshold:.2f} ≤ P(Fake) ≤ {0.5+confidence_threshold:.2f}\n")
        f.write(f"  Confident FAKE: P(Fake) > {0.5+confidence_threshold:.2f}\n\n")
        
        f.write("="*80 + "\n")
        f.write("SUMMARY BY PREDICTION CONFIDENCE\n")
        f.write("="*80 + "\n\n")
        
        for confidence in ['Confident REAL', 'Uncertain', 'Confident FAKE']:
            conf_data = merged_df[merged_df['prediction_confidence'] == confidence]
            if len(conf_data) > 0:
                f.write(f"\n{confidence.upper()}:\n")
                f.write(f"  Total samples: {len(conf_data)}\n")
                f.write(f"  REAL samples: {(conf_data['data_type'] == 'REAL').sum()}\n")
                f.write(f"  GENERATED samples: {(conf_data['data_type'] == 'GENERATED').sum()}\n")
                f.write(f"  Avg prediction: {conf_data['model_prediction'].mean():.4f}\n")
                f.write(f"  Std prediction: {conf_data['model_prediction'].std():.4f}\n\n")
                
                real_vals = conf_data[conf_data['data_type'] == 'REAL'][feature_col].dropna()
                gen_vals = conf_data[conf_data['data_type'] == 'GENERATED'][feature_col].dropna()
                
                f.write(f"  {feature_col}:\n")
                if len(real_vals) > 0:
                    f.write(f"    REAL - Mean: {real_vals.mean():.6f}, Std: {real_vals.std():.6f}\n")
                    f.write(f"           Min: {real_vals.min():.6f}, Max: {real_vals.max():.6f}\n")
                if len(gen_vals) > 0:
                    f.write(f"    GENERATED - Mean: {gen_vals.mean():.6f}, Std: {gen_vals.std():.6f}\n")
                    f.write(f"               Min: {gen_vals.min():.6f}, Max: {gen_vals.max():.6f}\n")
                
                if len(real_vals) > 1 and len(gen_vals) > 1:
                    from scipy.stats import mannwhitneyu
                    stat_val, p_val = mannwhitneyu(real_vals, gen_vals)
                    significance = "***" if p_val < 0.001 else "**" if p_val < 0.01 else "*" if p_val < 0.05 else "ns"
                    f.write(f"    Mann-Whitney U test p-value: {p_val:.6f} {significance}\n")
                f.write("\n")
    
    print(f"    ✓ Saved: statistics.txt")

In [17]:
viz_all_features_decision_boundary(
    complete_df,
    confidence_threshold=0.3
)


Creating comprehensive decision boundary analysis for ALL features...

Found 32 feature groups:
  • breath_count (1 features)
  • duration (1 features)
  • f0 (4 features)
  • gne (1 features)
  • hnr (1 features)
  • intonation_pattern_pitch_variability (1 features)
  • jitter_jitter (1 features)
  • jitter_jitter_local (1 features)
  • jitter_jitter_mean_absolute_ms (1 features)
  • jitter_jitter_ppq5 (1 features)
  • jitter_jitter_range (1 features)
  • jitter_jitter_rap (1 features)
  • rhythm_stats_avg_onset_strength (1 features)
  • rhythm_stats_max_onset_strength (1 features)
  • rhythm_stats_tempo_bpm (4 features)
  • rms_spec (4 features)
  • rms_wave (4 features)
  • shimmer_shimmer (1 features)
  • shimmer_shimmer_apq3 (1 features)
  • shimmer_shimmer_apq5 (1 features)
  • shimmer_shimmer_dB (1 features)
  • shimmer_shimmer_local (1 features)
  • shimmer_shimmer_range (1 features)
  • spectral_bandwidth (4 features)
  • spectral_centroid (4 features)
  • spectral_contrast (

  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: duration
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: duration


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: f0
────────────────────────────────────────────────────────────────────────────────
  Processing feature: f0_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: f0_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: f0_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: f0_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: gne
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: gne


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: hnr
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: hnr


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: intonation_pattern_pitch_variability
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: intonation_pattern_pitch_variability


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: jitter_jitter
────────────────────────────────────────────────────────────────────────────────
  Processing feature: jitter_jitter_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: jitter_jitter_local
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: jitter_jitter_local


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: jitter_jitter_mean_absolute_ms
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: jitter_jitter_mean_absolute_ms


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: jitter_jitter_ppq5
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: jitter_jitter_ppq5


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: jitter_jitter_range
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: jitter_jitter_range


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: jitter_jitter_rap
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: jitter_jitter_rap


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: rhythm_stats_avg_onset_strength
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: rhythm_stats_avg_onset_strength


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: rhythm_stats_max_onset_strength
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: rhythm_stats_max_onset_strength


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: rhythm_stats_tempo_bpm
────────────────────────────────────────────────────────────────────────────────
  Processing feature: rhythm_stats_tempo_bpm_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: rhythm_stats_tempo_bpm_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: rhythm_stats_tempo_bpm_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: rhythm_stats_tempo_bpm_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: rms_spec
────────────────────────────────────────────────────────────────────────────────
  Processing feature: rms_spec_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: rms_spec_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: rms_spec_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: rms_spec_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: rms_wave
────────────────────────────────────────────────────────────────────────────────
  Processing feature: rms_wave_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: rms_wave_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: rms_wave_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: rms_wave_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: shimmer_shimmer
────────────────────────────────────────────────────────────────────────────────
  Processing feature: shimmer_shimmer_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: shimmer_shimmer_apq3
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: shimmer_shimmer_apq3


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: shimmer_shimmer_apq5
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: shimmer_shimmer_apq5


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: shimmer_shimmer_dB
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: shimmer_shimmer_dB


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: shimmer_shimmer_local
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: shimmer_shimmer_local


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: shimmer_shimmer_range
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: shimmer_shimmer_range


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: spectral_bandwidth
────────────────────────────────────────────────────────────────────────────────
  Processing feature: spectral_bandwidth_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_bandwidth_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_bandwidth_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_bandwidth_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: spectral_centroid
────────────────────────────────────────────────────────────────────────────────
  Processing feature: spectral_centroid_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_centroid_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_centroid_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_centroid_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: spectral_contrast
────────────────────────────────────────────────────────────────────────────────
  Processing feature: spectral_contrast_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_contrast_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_contrast_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_contrast_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: spectral_flatness
────────────────────────────────────────────────────────────────────────────────
  Processing feature: spectral_flatness_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_flatness_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_flatness_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_flatness_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: spectral_rolloff_1
────────────────────────────────────────────────────────────────────────────────
  Processing feature: spectral_rolloff_1_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_rolloff_1_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_rolloff_1_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_rolloff_1_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: spectral_rolloff_85
────────────────────────────────────────────────────────────────────────────────
  Processing feature: spectral_rolloff_85_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_rolloff_85_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_rolloff_85_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_rolloff_85_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: spectral_rolloff_99
────────────────────────────────────────────────────────────────────────────────
  Processing feature: spectral_rolloff_99_min


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_rolloff_99_mean


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_rolloff_99_std


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt
  Processing feature: spectral_rolloff_99_max


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: voice_breaks
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: voice_breaks


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

────────────────────────────────────────────────────────────────────────────────
Processing feature group: zero_crossing_rate
────────────────────────────────────────────────────────────────────────────────
  Processing single feature: zero_crossing_rate


  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,
  bp = ax.boxplot(plot_data,


    ✓ Saved: boxplots.png
    ✓ Saved: scatter_analysis.png
    ✓ Saved: statistics.txt

✅ Decision boundary visualizations created!
   • Total features analyzed: 65
   • Output folder: FakeRealMusicOriginal\Vizualizations_with_preds\FULL_TEST\mix\visualizations_decision_boundary/
✅ Ready for academic thesis analysis!



Unnamed: 0,model,track,source,data_type,segment_id,duration,rms_wave_min,rms_wave_mean,rms_wave_std,rms_wave_max,...,spectral_flatness_min,spectral_flatness_mean,spectral_flatness_std,spectral_flatness_max,f0_min,f0_mean,f0_std,f0_max,intonation_pattern_pitch_variability,prediction_confidence
0,ElevenLabs,1__Ed_Sheeran_-_Perfect_Echoes_of_You_Wariant_...,mix,GENERATED,,120.0,2.089001e-05,0.139664,0.070806,0.333599,...,2.588399e-08,0.001425,0.010149,0.122521,65.406391,140.237022,44.911029,314.742105,67.918416,Uncertain
1,ElevenLabs,10__Adele_Rolling_in_the_Deep_-_Edge_of_the_He...,mix,GENERATED,,99.892245,0.0,0.178018,0.0838,0.411453,...,1.46127e-09,0.027694,0.144977,1.000001,65.406391,93.489759,105.430151,806.963558,97.10983,Confident REAL
2,ElevenLabs,2__Travis_Scott_Sico_Mode_-_Run_This_Town_Wari...,mix,GENERATED,,103.88898,0.0,0.159084,0.090357,0.442858,...,1.049918e-10,0.025026,0.127544,1.000001,65.406391,256.992874,347.838956,1790.764258,205.753161,Uncertain
3,ElevenLabs,3__Imagine_Dragons_Believer_-_Together_We_Rise...,mix,GENERATED,,89.887347,0.0,0.146642,0.071364,0.353091,...,6.306086e-09,0.017176,0.119802,1.000001,65.406391,191.993434,326.049139,2093.004522,235.435288,Confident FAKE
4,ElevenLabs,4__Offset_Bodies_-_Ruckus_Resonance_Wariant_2_...,mix,GENERATED,,73.926531,2.330183e-07,0.166111,0.066529,0.385434,...,2.245041e-09,0.004042,0.04156,1.000001,72.154946,1003.296535,865.411727,2093.004522,468.677755,Confident FAKE
5,ElevenLabs,5__Hozier_Take_Me_To_Church_-_Echoes_of_Grace_...,mix,GENERATED,,111.856327,0.0,0.172327,0.081276,0.445339,...,1.182726e-08,0.01073,0.065279,1.000001,65.406391,77.175342,21.737045,173.608412,38.811593,Confident REAL
6,ElevenLabs,6__Bruno_Mars_Grenade_-_Higher_Than_the_Sky_Wa...,mix,GENERATED,,120.0,5.826393e-06,0.162072,0.05902,0.320668,...,7.0304e-08,0.002098,0.023333,0.308626,65.406391,102.828116,36.364664,242.699341,55.499371,Confident REAL
7,ElevenLabs,7__Saint_Levant_I_Guess_-_Moonlit_Mirage_Waria...,mix,GENERATED,,105.874286,1.244751e-16,0.119609,0.095772,0.448202,...,5.631994e-10,0.010897,0.079268,1.000001,65.406391,128.594068,59.413374,355.332633,75.437719,Uncertain
8,ElevenLabs,8__Nirvana_Hearh-Shaped_Box_-_Chaos_Inside_War...,mix,GENERATED,,103.836735,3.943387e-10,0.184602,0.063161,0.455253,...,1.474988e-09,0.00413,0.038965,1.000001,65.406391,110.192705,210.89265,1700.047939,160.065009,Confident REAL
9,ElevenLabs,9__Bass_Astral_x_Igo_Its_Dark_-_Electric_Night...,mix,GENERATED,,97.828571,3.003e-07,0.158837,0.106573,0.477063,...,1.046976e-09,0.012431,0.092933,1.000001,65.406391,83.909844,25.268654,178.695527,45.561308,Uncertain
