In [1]:
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from pathlib import Path
from scipy import stats
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import matplotlib.patches as mpatches
from math import pi
from collections import defaultdict

In [2]:
current_dir = Path.cwd()
project_root = current_dir.parent.parent

sns.set(style='whitegrid')

In [3]:
# Available Datasets = ['FakeRealMusicOriginal/', 'FakeRealMusicOriginalNormalized/']
Dataset = 'FakeRealMusicOriginalNormalized/'

# Available Sources = ['mix', 'vocals0', 'drums0', 'bass0', 'other0']
# separated_source = 'mix'

# Available LUFS = ['', 'minus14/', 'minus23/']
LUFS = 'minus14/'

# Available Perturbations = ['', 'base/', 'mp3_192/', 'noise_snr30/', 'resample22k/', 'reverb_room/']
perturbation = 'base/'

test_name = 'Full_Occlusion_512_512_all_models/'

In [10]:
features_path = f'../../results/SpectrogramExplainability/{Dataset}{LUFS}{perturbation}Occlusion/{test_name}occlusion_features/occlusion_patches/occlusion_patches_features.json'

In [11]:
def flatten_feature(feat_dict, prefix=''):
                    result = {}
                    
                    for key, val in feat_dict.items():
                        col_name = f"{prefix}_{key}" if prefix else key
                        
                        if isinstance(val, dict):
                            stats_keys = {'min', 'mean', 'std', 'max'}
                            
                            if stats_keys.intersection(val.keys()):
                                for stat_name, stat_val in val.items():
                                    result[f"{col_name}_{stat_name}"] = float(stat_val) if isinstance(stat_val, (int, float)) else np.nan
                            else:
                                nested = flatten_feature(val, prefix=col_name)
                                result.update(nested)
                        
                        elif isinstance(val, list):
                            if len(val) > 0 and all(isinstance(x, (int, float)) for x in val):
                                result[f"{col_name}_mean"] = float(np.mean(val))
                                result[f"{col_name}_min"] = float(np.min(val))
                                result[f"{col_name}_max"] = float(np.max(val))
                                result[f"{col_name}_std"] = float(np.std(val)) if len(val) > 1 else 0.0
                            else:
                                pass
                        
                        elif isinstance(val, (int, float)):
                            result[col_name] = float(val)
                        elif isinstance(val, bool):
                            result[col_name] = val
                        elif isinstance(val, str):
                            try:
                                result[col_name] = float(val)
                            except (ValueError, TypeError):
                                pass
                    
                    return result

def load_and_prepare_data_full(json_file):
    """
    Load JSON data and preserve ALL sub-features from nested structure.
    
    Data structure example:
    {
        model_name: {
            track_id: {
                "type": "patch",
                "patches": {
                    patch_name: {
                        "features": {
                            "duration": 120.0,
                            "rms_wave": {"min": ..., "mean": ..., "std": ..., "max": ...},
                            "jitter": {"jitter_local": ..., "jitter_rap": ..., ...},
                            ...
                        },
                        "occlusion_meta": {
                            "group": "words" | "best" | "most_influential",
                            "rank": 1,
                            "importance": -0.1234,
                            "abs_importance": 0.1234,
                            "tstart": 0,
                            "tend": 512,
                            "fstart": 0,
                            "fend": 512,
                            "start_time_sec": 0.0,
                            "end_time_sec": 5.944308390022676,
                            "patch_type": NEGATIVE | POSITIVE,
                            "model": "ElevenLabs" | "REAL" | "SUNO" | "SUNO_PRO" | "UDIO",
                            "track_stem": "some_track_name"
                        }
                    ...
                    }
                }
            },
            ...
        },
        ...
    }
    
    Output:
    - DataFrame with collumns: model, track, data_type, source, segment_id, [all_features}]
    """
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
    
    all_rows = []
    
    type_mapping = {
        'ElevenLabs': 'GENERATED',       
        'REAL': 'REAL',
        'SUNO': 'GENERATED',
        'SUNO_PRO': 'GENERATED',
        'UDIO': 'GENERATED',
    }
    
    for model_name, tracks_dict in data.items():
        for track_key, track_data in tracks_dict.items():
            
            if not isinstance(track_data, dict) or 'patches' not in track_data:
                continue
            
            patches_root = track_data.get('patches', {})
            track_type = track_data.get('type', 'unknown')

            for patch_key, patch_data in patches_root.items():

                if not isinstance(patch_data, dict) or 'features' not in patch_data:
                    continue

                features = patch_data.get('features', {})
                occlusion_meta = patch_data.get('occlusion_meta', {})
                            
                row = {
                    'model': model_name,
                    'track': track_key,
                    'patch_key': patch_key,
                    'data_type': type_mapping.get(model_name, model_name),
                    **flatten_feature(occlusion_meta)
                }
                
                flattened = flatten_feature(features)
                row.update(flattened)
                
                all_rows.append(row)
    
    features_df = pd.DataFrame(all_rows)
    
    if features_df.empty:
        print("⚠️ Warning: No data loaded from JSON file!")
        return features_df, []
    
    exclude_cols = {'model', 'track', 'patch_key', 'data_type'}
    feature_cols = [col for col in features_df.columns if col not in exclude_cols]
    
    print(f"\n{'='*80}")
    print(f"✅ Data loaded successfully!")
    print(f"   • Models: {features_df['model'].unique().tolist()}")
    print(f"   • Total records: {len(features_df)}")
    print(f"   • Total features: {len(feature_cols)}")
    print(f"   • Sample features: {feature_cols[:10]}")
    print(f"{'='*80}\n")
    
    return features_df, feature_cols


In [12]:
features_df, features_to_analyze = load_and_prepare_data_full(features_path)
print(f"\n✓ Data loaded: {len(features_df)} samples, {len(features_to_analyze)} features")
print(f"✓ Models: {features_df['model'].value_counts().to_dict()}\n")

FileNotFoundError: [Errno 2] No such file or directory: '../../results/SpectrogramExplainability/FakeRealMusicOriginalNormalized/minus14/base/Occlusion/Full_Occlusion_512_512_all_models/occlusion_features/occlusion_patches/occlusion_patches_features.json'

In [7]:
features_df.head(5)

Unnamed: 0,model,track,patch_key,data_type,rank,importance,abs_importance,tstart,tend,fstart,...,gne,breath_count,intonation_pattern_pitch_variability,voice_breaks,rhythm_stats_tempo_bpm_mean,rhythm_stats_tempo_bpm_min,rhythm_stats_tempo_bpm_max,rhythm_stats_tempo_bpm_std,rhythm_stats_avg_onset_strength,rhythm_stats_max_onset_strength
0,ElevenLabs,1._Ed_Sheeran_-_Perfect_Echoes_of_You_Wariant_...,best_rank1,GENERATED,1.0,-0.365145,0.365145,3072.0,3584.0,0.0,...,8.049597,0.0,34.796156,6.0,143.554688,143.554688,143.554688,0.0,1.107978,10.817753
1,ElevenLabs,1._Ed_Sheeran_-_Perfect_Echoes_of_You_Wariant_...,best_rank2,GENERATED,2.0,-0.296195,0.296195,2560.0,3072.0,0.0,...,12.196389,2.0,47.67805,3.0,129.199219,129.199219,129.199219,0.0,0.960012,9.509796
2,ElevenLabs,1._Ed_Sheeran_-_Perfect_Echoes_of_You_Wariant_...,worst_rank1,GENERATED,1.0,0.010287,0.010287,0.0,512.0,0.0,...,20.054665,4.0,79.461762,9.0,156.605114,156.605114,156.605114,0.0,0.441874,10.169856
3,ElevenLabs,1._Ed_Sheeran_-_Perfect_Echoes_of_You_Wariant_...,worst_rank2,GENERATED,2.0,-0.010822,0.010822,7680.0,8192.0,0.0,...,7.980928,0.0,80.041176,1.0,123.046875,123.046875,123.046875,0.0,1.214267,7.344761
4,ElevenLabs,10._Adele_Rolling_in_the_Deep_-_Edge_of_the_He...,best_rank1,GENERATED,1.0,-0.092937,0.092937,5120.0,5632.0,0.0,...,8.776263,1.0,66.584306,4.0,78.302557,78.302557,78.302557,0.0,1.169037,13.721089


In [8]:
def setup_professional_style():
    plt.rcParams['font.family'] = 'sans-serif'
    plt.rcParams['font.sans-serif'] = ['Arial', 'Helvetica']
    plt.rcParams['font.size'] = 10
    plt.rcParams['axes.labelsize'] = 12
    plt.rcParams['axes.titlesize'] = 13
    plt.rcParams['xtick.labelsize'] = 11
    plt.rcParams['ytick.labelsize'] = 11
    plt.rcParams['legend.fontsize'] = 10
    plt.rcParams['figure.titlesize'] = 16
    
    plt.rcParams['axes.grid'] = True
    plt.rcParams['grid.alpha'] = 0.3
    plt.rcParams['grid.linestyle'] = '--'
    plt.rcParams['grid.linewidth'] = 0.5
    
    plt.rcParams['axes.linewidth'] = 1.5
    plt.rcParams['xtick.major.width'] = 1.5
    plt.rcParams['ytick.major.width'] = 1.5
    
    sns.set_palette("husl")

PROFESSIONAL_COLORS = {
    'REAL': '#1f77b4',
    'ElevenLabs': '#ff7f0e',
    'SUNO': '#2ca02c',
    'SUNO_PRO': '#d62728',
    'UDIO': '#9467bd'
}

In [9]:
base_output_folder = Path(f'{Dataset}{test_name}{LUFS}{perturbation}occlusion_features/')
base_output_folder.mkdir(parents=True, exist_ok=True)

In [10]:
from pathlib import Path
from collections import defaultdict

import numpy as np
import matplotlib.pyplot as plt


BOX_FILL_COLORS = {
    'REAL': '#aec7e8',
    'ElevenLabs': '#ffbb78',
    'SUNO': '#98df8a',
    'SUNO_PRO': '#ff9896',
    'UDIO': '#c5b0d5'
}

SIGN_COLORS = {
    'positive': '#2ca02c',
    'negative': '#d62728'
}


def format_influence_statistics_box(labels, plot_data):
    """
    english:
    labels: labels on X axis, e.g. ['REAL\\nnegative', 'REAL\\npositive', ...]
    plot_data: list of 1D arrays (as in boxplot)
    """
    rows = []

    header = ["Group", "Mean", "Std", "Count"]
    rows.append(header)

    for label, data in zip(labels, plot_data):
        if data is None or len(data) == 0:
            continue

        if "\n" in label:
            model, sign = label.split("\n", 1)
            group_name = f"{model} ({sign})"
        else:
            group_name = label

        mean_str = f"{np.mean(data):.4f}"
        std_str = f"{np.std(data):.4f}"
        count_str = f"{len(data)}"

        row = [group_name, mean_str, std_str, count_str]
        rows.append(row)

    if len(rows) == 1:
        return ""

    n_cols = len(rows[0])
    col_widths = [
        max(len(str(row[c])) for row in rows)
        for c in range(n_cols)
    ]

    def fmt_row(row):
        cells = []
        for i, (val, w) in enumerate(zip(row, col_widths)):
            text = str(val)
            if i == 0:
                cells.append(text.ljust(w))
            else:
                cells.append(text.rjust(w))
        return "  ".join(cells)

    lines = []
    lines.append(fmt_row(rows[0]))
    lines.append("─" * (sum(col_widths) + 2 * (n_cols - 1)))

    for row in rows[1:]:
        lines.append(fmt_row(row))

    return "\n".join(lines)


def add_bottom_stats_panel(fig, anchor_ax, text, width_frac=0.38, y_margin=0.04):
    if not text:
        return None

    bbox = anchor_ax.get_position()

    panel_width = bbox.width * width_frac
    left = bbox.x0 + (bbox.width - panel_width) / 2.0

    height = 0.10
    bottom = y_margin

    stats_ax = fig.add_axes([left, bottom, panel_width, height])
    stats_ax.axis("off")

    stats_ax.text(
        0.0, 1.0, text,
        ha="left", va="top",
        fontsize=9,
        transform=stats_ax.transAxes,
        bbox=dict(
            boxstyle="round,pad=0.4",
            facecolor="white",
            alpha=0.95,
            edgecolor="black",
            linewidth=1.0
        ),
        family="monospace"
    )

    return stats_ax


def viz2_real_vs_generated_boxplots_with_influence(
        features_df,
        base_output_folder=Path('./'),
        importance_col='importance'
):
    """
    Visualization of features split by influence sign (positive / negative)
    For each base feature (feature_base):

    - if there is a single column (stat == 'single'):
        * creates a 1x2 figure:
          - left: per-model, positive vs negative,
          - right: global, only sign.
    - if there are min/mean/std/max statistics:
        * for each column (e.g. *_min, *_mean, *_std, *_max)

        
    Under each panel, it places a small table (separate axis) with mean/std/count.
    """

    setup_professional_style()

    base_folder = Path(f'{base_output_folder}/visualizations_boxplot_influence2')
    base_folder.mkdir(exist_ok=True, parents=True)

    print(f"\n{'='*80}")
    print("Creating INFLUENCE-SPLIT boxplot visualizations for academic thesis...")
    print(f"{'='*80}\n")

    if importance_col not in features_df.columns:
        raise ValueError(f"Column '{importance_col}' not found in features_df")

    df = features_df.copy()
    df['influence_sign'] = np.where(df[importance_col] >= 0,
                                    'positive', 'negative')

    exclude_cols = {
        'model', 'track', 'patch_key', 'data_type',
        importance_col, 'influence_sign'
    }

    occlusion_meta_cols = {
        'group', 'rank', 'importance', 'abs_importance',
        'tstart', 'tend', 'fstart', 'fend',
        'start_time_sec', 'end_time_sec',
        'patch_type', 'model', 'track_stem'
    }

    all_cols = [
        col for col in df.columns
        if col not in exclude_cols and col not in occlusion_meta_cols
    ]

    feature_groups = defaultdict(list)

    for col in all_cols:
        parts = col.split('_')
        if len(parts) > 1 and parts[-1] in ['min', 'mean', 'std', 'max']:
            base_name = '_'.join(parts[:-1])
            stat = parts[-1]
        else:
            base_name = col
            stat = 'single'
        feature_groups[base_name].append((col, stat))

    print(f"Found {len(feature_groups)} feature groups\n")

    models = sorted(df['model'].dropna().unique())
    signs = ['negative', 'positive']

    for feature_base, columns_list in sorted(feature_groups.items()):
        print(f"Processing feature: {feature_base}")

        feature_folder = base_folder / feature_base
        feature_folder.mkdir(exist_ok=True, parents=True)

        if len(columns_list) == 1 and columns_list[0][1] == 'single':
            col = columns_list[0][0]

            fig, axes = plt.subplots(1, 2, figsize=(20, 8))
            ax_models, ax_global = axes

            plot_data = []
            x_labels = []

            for model in models:
                for sign in signs:
                    mask = (
                        (df['model'] == model) &
                        (df['influence_sign'] == sign)
                    )
                    data = df.loc[mask, col].dropna()
                    if len(data) > 0:
                        plot_data.append(data.values)
                        x_labels.append(f'{model}\n{sign}')
                    else:
                        plot_data.append(None)
                        x_labels.append(f'{model}\n{sign}')

            non_empty_indices = [
                i for i, d in enumerate(plot_data)
                if d is not None and len(d) > 0
            ]
            if not non_empty_indices:
                print(f"  ⚠️ SKIPPED: No valid data for feature '{col}'")
                plt.close(fig)
                continue

            plot_data = [plot_data[i] for i in non_empty_indices]
            x_labels = [x_labels[i] for i in non_empty_indices]

            bp = ax_models.boxplot(
                plot_data,
                labels=x_labels,
                patch_artist=True,
                widths=0.6,
                showmeans=True,
                meanline=False,
                notch=False,
                vert=True,
                whis=1.5,
                meanprops=dict(
                    marker='D', markerfacecolor='red',
                    markersize=7, markeredgecolor='darkred',
                    markeredgewidth=1.5
                ),
                medianprops=dict(color='darkblue', linewidth=2),
                whiskerprops=dict(linewidth=1.5, color='black'),
                capprops=dict(linewidth=1.5, color='black'),
                boxprops=dict(linewidth=1.5, color='black')
            )

            for i, patch in enumerate(bp['boxes']):
                label = x_labels[i]
                sign = 'positive' if 'positive' in label else 'negative'
                color = SIGN_COLORS.get(sign, '#cccccc')
                patch.set_facecolor(color)
                patch.set_alpha(0.7)
                patch.set_edgecolor('black')
                patch.set_linewidth(2)

            for i, data in enumerate(plot_data):
                y = data
                x = np.random.normal(i + 1, 0.05, size=len(y))
                ax_models.scatter(
                    x, y, alpha=0.35, s=25,
                    color='black', edgecolors='gray', linewidth=0.5
                )

            ax_models.set_ylabel('Value', fontsize=13, fontweight='bold')
            ax_models.set_title(
                f'{col} per model (positive vs negative influence)',
                fontsize=13, fontweight='bold', pad=15
            )
            ax_models.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.8)
            ax_models.set_axisbelow(True)
            ax_models.spines['top'].set_visible(False)
            ax_models.spines['right'].set_visible(False)
            ax_models.spines['left'].set_linewidth(1.8)
            ax_models.spines['bottom'].set_linewidth(1.8)
            for tick in ax_models.get_xticklabels():
                tick.set_rotation(45)
                tick.set_ha('right')

            global_plot_data = []
            global_labels = []
            for sign in signs:
                data = df.loc[df['influence_sign'] == sign, col].dropna()
                if len(data) > 0:
                    global_plot_data.append(data.values)
                    global_labels.append(sign)

            if global_plot_data:
                bp2 = ax_global.boxplot(
                    global_plot_data,
                    labels=global_labels,
                    patch_artist=True,
                    widths=0.6,
                    showmeans=True,
                    meanline=False,
                    notch=False,
                    vert=True,
                    whis=1.5,
                    meanprops=dict(
                        marker='D', markerfacecolor='red',
                        markersize=7, markeredgecolor='darkred',
                        markeredgewidth=1.5
                    ),
                    medianprops=dict(color='darkblue', linewidth=2),
                    whiskerprops=dict(linewidth=1.5, color='black'),
                    capprops=dict(linewidth=1.5, color='black'),
                    boxprops=dict(linewidth=1.5, color='black')
                )

                for i, patch in enumerate(bp2['boxes']):
                    sign = global_labels[i]
                    color = SIGN_COLORS.get(sign, '#cccccc')
                    patch.set_facecolor(color)
                    patch.set_alpha(0.7)
                    patch.set_edgecolor('black')
                    patch.set_linewidth(2)

                for i, data in enumerate(global_plot_data):
                    y = data
                    x = np.random.normal(i + 1, 0.05, size=len(y))
                    ax_global.scatter(
                        x, y, alpha=0.35, s=25,
                        color='black', edgecolors='gray', linewidth=0.5
                    )

                ax_global.set_ylabel('Value', fontsize=13, fontweight='bold')
                ax_global.set_title(
                    f'{col} (all models, positive vs negative influence)',
                    fontsize=13, fontweight='bold', pad=15
                )
                ax_global.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.8)
                ax_global.set_axisbelow(True)
                ax_global.spines['top'].set_visible(False)
                ax_global.spines['right'].set_visible(False)
                ax_global.spines['left'].set_linewidth(1.8)
                ax_global.spines['bottom'].set_linewidth(1.8)
            else:
                ax_global.text(
                    0.5, 0.5,
                    'No data for positive / negative influence',
                    transform=ax_global.transAxes,
                    ha='center', va='center', fontsize=12, color='red'
                )
                ax_global.axis('off')

            fig.suptitle(
                f'Feature Analysis (influence split): '
                f'{feature_base.replace("_", " ").title()}',
                fontsize=16, fontweight='bold', y=0.97
            )

            plt.tight_layout(rect=[0.03, 0.14, 0.97, 0.93])

            stats_text = format_influence_statistics_box(x_labels, plot_data)
            add_bottom_stats_panel(fig, ax_models, stats_text,
                                   width_frac=0.45, y_margin=0.04)

            if global_plot_data:
                global_stats_text = format_influence_statistics_box(
                    global_labels, global_plot_data
                )
                add_bottom_stats_panel(fig, ax_global, global_stats_text,
                                       width_frac=0.30, y_margin=0.04)

            output_file = feature_folder / f'{feature_base}_influence_boxplots.png'
            plt.savefig(
                output_file, dpi=300, bbox_inches='tight',
                facecolor='white', edgecolor='none'
            )
            plt.close(fig)

            print(f"  ✓ Saved: {output_file}")

        else:
            stat_order = ['min', 'mean', 'std', 'max']
            columns_sorted = sorted(
                columns_list,
                key=lambda x: next(
                    (i for i, stat in enumerate(stat_order) if stat == x[1]),
                    999
                )
            )

            for col, stat in columns_sorted:
                stat_label = stat.upper() if stat != 'single' else col
                print(f"    -> Stat: {stat_label} ({col})")

                fig, axes = plt.subplots(1, 2, figsize=(20, 8))
                ax_models, ax_global = axes

                plot_data = []
                x_labels = []

                for model in models:
                    for sign in signs:
                        mask = (
                            (df['model'] == model) &
                            (df['influence_sign'] == sign)
                        )
                        data = df.loc[mask, col].dropna()
                        if len(data) > 0:
                            plot_data.append(data.values)
                            x_labels.append(f'{model}\n{sign}')
                        else:
                            plot_data.append(None)
                            x_labels.append(f'{model}\n{sign}')

                non_empty_indices = [
                    i for i, d in enumerate(plot_data)
                    if d is not None and len(d) > 0
                ]
                if not non_empty_indices:
                    print(f"      ⚠️ SKIPPED: No valid data for {col}")
                    plt.close(fig)
                    continue

                plot_data = [plot_data[i] for i in non_empty_indices]
                x_labels = [x_labels[i] for i in non_empty_indices]

                bp = ax_models.boxplot(
                    plot_data,
                    labels=x_labels,
                    patch_artist=True,
                    widths=0.6,
                    showmeans=True,
                    meanline=False,
                    notch=False,
                    vert=True,
                    whis=1.5,
                    meanprops=dict(
                        marker='D', markerfacecolor='red',
                        markersize=7, markeredgecolor='darkred',
                        markeredgewidth=1.5
                    ),
                    medianprops=dict(color='darkblue', linewidth=2),
                    whiskerprops=dict(linewidth=1.5, color='black'),
                    capprops=dict(linewidth=1.5, color='black'),
                    boxprops=dict(linewidth=1.5, color='black')
                )

                for i, patch in enumerate(bp['boxes']):
                    label = x_labels[i]
                    sign = 'positive' if 'positive' in label else 'negative'
                    color = SIGN_COLORS.get(sign, '#cccccc')
                    patch.set_facecolor(color)
                    patch.set_alpha(0.7)
                    patch.set_edgecolor('black')
                    patch.set_linewidth(2)

                for i, data in enumerate(plot_data):
                    y = data
                    x = np.random.normal(i + 1, 0.05, size=len(y))
                    ax_models.scatter(
                        x, y, alpha=0.35, s=25,
                        color='black', edgecolors='gray', linewidth=0.5
                    )

                ax_models.set_ylabel('Value', fontsize=13, fontweight='bold')
                ax_models.set_title(
                    f'{feature_base} – {stat_label} per model\n'
                    f'(positive vs negative influence)',
                    fontsize=13, fontweight='bold', pad=15
                )
                ax_models.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.8)
                ax_models.set_axisbelow(True)
                ax_models.spines['top'].set_visible(False)
                ax_models.spines['right'].set_visible(False)
                ax_models.spines['left'].set_linewidth(1.8)
                ax_models.spines['bottom'].set_linewidth(1.8)
                for tick in ax_models.get_xticklabels():
                    tick.set_rotation(45)
                    tick.set_ha('right')

                global_plot_data = []
                global_labels = []
                for sign in signs:
                    data = df.loc[df['influence_sign'] == sign, col].dropna()
                    if len(data) > 0:
                        global_plot_data.append(data.values)
                        global_labels.append(sign)

                if global_plot_data:
                    bp2 = ax_global.boxplot(
                        global_plot_data,
                        labels=global_labels,
                        patch_artist=True,
                        widths=0.6,
                        showmeans=True,
                        meanline=False,
                        notch=False,
                        vert=True,
                        whis=1.5,
                        meanprops=dict(
                            marker='D', markerfacecolor='red',
                            markersize=7, markeredgecolor='darkred',
                            markeredgewidth=1.5
                        ),
                        medianprops=dict(color='darkblue', linewidth=2),
                        whiskerprops=dict(linewidth=1.5, color='black'),
                        capprops=dict(linewidth=1.5, color='black'),
                        boxprops=dict(linewidth=1.5, color='black')
                    )

                    for i, patch in enumerate(bp2['boxes']):
                        sign = global_labels[i]
                        color = SIGN_COLORS.get(sign, '#cccccc')
                        patch.set_facecolor(color)
                        patch.set_alpha(0.7)
                        patch.set_edgecolor('black')
                        patch.set_linewidth(2)

                    for i, data in enumerate(global_plot_data):
                        y = data
                        x = np.random.normal(i + 1, 0.05, size=len(y))
                        ax_global.scatter(
                            x, y, alpha=0.35, s=25,
                            color='black', edgecolors='gray', linewidth=0.5
                        )

                    ax_global.set_ylabel('Value', fontsize=13, fontweight='bold')
                    ax_global.set_title(
                        f'{feature_base} – {stat_label}\n'
                        f'(all models, positive vs negative influence)',
                        fontsize=13, fontweight='bold', pad=15
                    )
                    ax_global.grid(axis='y', alpha=0.3, linestyle='--', linewidth=0.8)
                    ax_global.set_axisbelow(True)
                    ax_global.spines['top'].set_visible(False)
                    ax_global.spines['right'].set_visible(False)
                    ax_global.spines['left'].set_linewidth(1.8)
                    ax_global.spines['bottom'].set_linewidth(1.8)
                else:
                    ax_global.text(
                        0.5, 0.5,
                        f'No data for {stat_label}',
                        transform=ax_global.transAxes,
                        ha='center', va='center', fontsize=12, color='red'
                    )
                    ax_global.axis('off')

                fig.suptitle(
                    f'Feature Analysis (influence split): '
                    f'{feature_base.replace("_", " ").title()} – {stat_label}',
                    fontsize=16, fontweight='bold', y=0.97
                )

                plt.tight_layout(rect=[0.03, 0.14, 0.97, 0.93])

                stats_text = format_influence_statistics_box(x_labels, plot_data)
                add_bottom_stats_panel(fig, ax_models, stats_text,
                                       width_frac=0.45, y_margin=0.04)

                if global_plot_data:
                    global_stats_text = format_influence_statistics_box(
                        global_labels, global_plot_data
                    )
                    add_bottom_stats_panel(fig, ax_global, global_stats_text,
                                           width_frac=0.30, y_margin=0.04)

                output_file = feature_folder / f'{feature_base}_{stat}_influence_boxplots.png'
                plt.savefig(
                    output_file, dpi=300, bbox_inches='tight',
                    facecolor='white', edgecolor='none'
                )
                plt.close(fig)

                print(f"      ✓ Saved: {output_file}")

    print(f"\n{'='*80}")
    print(f"✅ Influence-split boxplot visualizations saved to: {base_folder}")
    print(f"✅ Ready for academic thesis presentation!")
    print(f"{'='*80}\n")


In [11]:
viz2_real_vs_generated_boxplots_with_influence(
    features_df
)


Creating INFLUENCE-SPLIT boxplot visualizations for academic thesis...

Found 32 feature groups

Processing feature: breath_count


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\breath_count\breath_count_influence_boxplots.png
Processing feature: duration


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\duration\duration_influence_boxplots.png
Processing feature: f0
    -> Stat: MIN (f0_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\f0\f0_min_influence_boxplots.png
    -> Stat: MEAN (f0_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\f0\f0_mean_influence_boxplots.png
    -> Stat: STD (f0_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\f0\f0_std_influence_boxplots.png
    -> Stat: MAX (f0_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\f0\f0_max_influence_boxplots.png
Processing feature: gne


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\gne\gne_influence_boxplots.png
Processing feature: hnr


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\hnr\hnr_influence_boxplots.png
Processing feature: intonation_pattern_pitch_variability


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\intonation_pattern_pitch_variability\intonation_pattern_pitch_variability_influence_boxplots.png
Processing feature: jitter_jitter
    -> Stat: STD (jitter_jitter_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\jitter_jitter\jitter_jitter_std_influence_boxplots.png
Processing feature: jitter_jitter_local


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\jitter_jitter_local\jitter_jitter_local_influence_boxplots.png
Processing feature: jitter_jitter_mean_absolute_ms


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\jitter_jitter_mean_absolute_ms\jitter_jitter_mean_absolute_ms_influence_boxplots.png
Processing feature: jitter_jitter_ppq5


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\jitter_jitter_ppq5\jitter_jitter_ppq5_influence_boxplots.png
Processing feature: jitter_jitter_range


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\jitter_jitter_range\jitter_jitter_range_influence_boxplots.png
Processing feature: jitter_jitter_rap


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\jitter_jitter_rap\jitter_jitter_rap_influence_boxplots.png
Processing feature: rhythm_stats_avg_onset_strength


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\rhythm_stats_avg_onset_strength\rhythm_stats_avg_onset_strength_influence_boxplots.png
Processing feature: rhythm_stats_max_onset_strength


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\rhythm_stats_max_onset_strength\rhythm_stats_max_onset_strength_influence_boxplots.png
Processing feature: rhythm_stats_tempo_bpm
    -> Stat: MIN (rhythm_stats_tempo_bpm_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rhythm_stats_tempo_bpm\rhythm_stats_tempo_bpm_min_influence_boxplots.png
    -> Stat: MEAN (rhythm_stats_tempo_bpm_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rhythm_stats_tempo_bpm\rhythm_stats_tempo_bpm_mean_influence_boxplots.png
    -> Stat: STD (rhythm_stats_tempo_bpm_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rhythm_stats_tempo_bpm\rhythm_stats_tempo_bpm_std_influence_boxplots.png
    -> Stat: MAX (rhythm_stats_tempo_bpm_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rhythm_stats_tempo_bpm\rhythm_stats_tempo_bpm_max_influence_boxplots.png
Processing feature: rms_spec
    -> Stat: MIN (rms_spec_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rms_spec\rms_spec_min_influence_boxplots.png
    -> Stat: MEAN (rms_spec_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rms_spec\rms_spec_mean_influence_boxplots.png
    -> Stat: STD (rms_spec_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rms_spec\rms_spec_std_influence_boxplots.png
    -> Stat: MAX (rms_spec_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rms_spec\rms_spec_max_influence_boxplots.png
Processing feature: rms_wave
    -> Stat: MIN (rms_wave_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rms_wave\rms_wave_min_influence_boxplots.png
    -> Stat: MEAN (rms_wave_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rms_wave\rms_wave_mean_influence_boxplots.png
    -> Stat: STD (rms_wave_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rms_wave\rms_wave_std_influence_boxplots.png
    -> Stat: MAX (rms_wave_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\rms_wave\rms_wave_max_influence_boxplots.png
Processing feature: shimmer_shimmer
    -> Stat: STD (shimmer_shimmer_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\shimmer_shimmer\shimmer_shimmer_std_influence_boxplots.png
Processing feature: shimmer_shimmer_apq3


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\shimmer_shimmer_apq3\shimmer_shimmer_apq3_influence_boxplots.png
Processing feature: shimmer_shimmer_apq5


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\shimmer_shimmer_apq5\shimmer_shimmer_apq5_influence_boxplots.png
Processing feature: shimmer_shimmer_dB


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\shimmer_shimmer_dB\shimmer_shimmer_dB_influence_boxplots.png
Processing feature: shimmer_shimmer_local


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\shimmer_shimmer_local\shimmer_shimmer_local_influence_boxplots.png
Processing feature: shimmer_shimmer_range


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\shimmer_shimmer_range\shimmer_shimmer_range_influence_boxplots.png
Processing feature: spectral_bandwidth
    -> Stat: MIN (spectral_bandwidth_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_bandwidth\spectral_bandwidth_min_influence_boxplots.png
    -> Stat: MEAN (spectral_bandwidth_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_bandwidth\spectral_bandwidth_mean_influence_boxplots.png
    -> Stat: STD (spectral_bandwidth_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_bandwidth\spectral_bandwidth_std_influence_boxplots.png
    -> Stat: MAX (spectral_bandwidth_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_bandwidth\spectral_bandwidth_max_influence_boxplots.png
Processing feature: spectral_centroid
    -> Stat: MIN (spectral_centroid_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_centroid\spectral_centroid_min_influence_boxplots.png
    -> Stat: MEAN (spectral_centroid_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_centroid\spectral_centroid_mean_influence_boxplots.png
    -> Stat: STD (spectral_centroid_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_centroid\spectral_centroid_std_influence_boxplots.png
    -> Stat: MAX (spectral_centroid_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_centroid\spectral_centroid_max_influence_boxplots.png
Processing feature: spectral_contrast
    -> Stat: MIN (spectral_contrast_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_contrast\spectral_contrast_min_influence_boxplots.png
    -> Stat: MEAN (spectral_contrast_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_contrast\spectral_contrast_mean_influence_boxplots.png
    -> Stat: STD (spectral_contrast_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_contrast\spectral_contrast_std_influence_boxplots.png
    -> Stat: MAX (spectral_contrast_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_contrast\spectral_contrast_max_influence_boxplots.png
Processing feature: spectral_flatness
    -> Stat: MIN (spectral_flatness_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_flatness\spectral_flatness_min_influence_boxplots.png
    -> Stat: MEAN (spectral_flatness_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_flatness\spectral_flatness_mean_influence_boxplots.png
    -> Stat: STD (spectral_flatness_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_flatness\spectral_flatness_std_influence_boxplots.png
    -> Stat: MAX (spectral_flatness_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_flatness\spectral_flatness_max_influence_boxplots.png
Processing feature: spectral_rolloff_1
    -> Stat: MIN (spectral_rolloff_1_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_1\spectral_rolloff_1_min_influence_boxplots.png
    -> Stat: MEAN (spectral_rolloff_1_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_1\spectral_rolloff_1_mean_influence_boxplots.png
    -> Stat: STD (spectral_rolloff_1_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_1\spectral_rolloff_1_std_influence_boxplots.png
    -> Stat: MAX (spectral_rolloff_1_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_1\spectral_rolloff_1_max_influence_boxplots.png
Processing feature: spectral_rolloff_85
    -> Stat: MIN (spectral_rolloff_85_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_85\spectral_rolloff_85_min_influence_boxplots.png
    -> Stat: MEAN (spectral_rolloff_85_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_85\spectral_rolloff_85_mean_influence_boxplots.png
    -> Stat: STD (spectral_rolloff_85_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_85\spectral_rolloff_85_std_influence_boxplots.png
    -> Stat: MAX (spectral_rolloff_85_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_85\spectral_rolloff_85_max_influence_boxplots.png
Processing feature: spectral_rolloff_99
    -> Stat: MIN (spectral_rolloff_99_min)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_99\spectral_rolloff_99_min_influence_boxplots.png
    -> Stat: MEAN (spectral_rolloff_99_mean)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_99\spectral_rolloff_99_mean_influence_boxplots.png
    -> Stat: STD (spectral_rolloff_99_std)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_99\spectral_rolloff_99_std_influence_boxplots.png
    -> Stat: MAX (spectral_rolloff_99_max)


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


      ✓ Saved: visualizations_boxplot_influence2\spectral_rolloff_99\spectral_rolloff_99_max_influence_boxplots.png
Processing feature: voice_breaks


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\voice_breaks\voice_breaks_influence_boxplots.png
Processing feature: zero_crossing_rate


  bp = ax_models.boxplot(
  bp2 = ax_global.boxplot(


  ✓ Saved: visualizations_boxplot_influence2\zero_crossing_rate\zero_crossing_rate_influence_boxplots.png

✅ Influence-split boxplot visualizations saved to: visualizations_boxplot_influence2
✅ Ready for academic thesis presentation!

