In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pywt
from scipy.io import wavfile
import librosa
import os
import time
import warnings
warnings.filterwarnings('ignore')

from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
import pandas as pd
import seaborn as sns
from tqdm import tqdm
import joblib
from scipy import stats
import json
from datetime import datetime

In [None]:
# Set style for better plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

In [None]:
class MemoryEfficientMeasurementMatrices:
    """Memory-efficient measurement matrices with block generation"""
    
    @staticmethod
    def bernoulli_matrix_block(rows, cols, start_row=0, seed=42):
        """Generate Bernoulli matrix block without storing full matrix"""
        np.random.seed(seed + start_row)
        matrix = np.random.choice([-1, 1], size=(rows, cols))
        return matrix / np.sqrt(rows)
    
    @staticmethod
    def gaussian_matrix_block(rows, cols, start_row=0, seed=42):
        """Generate Gaussian matrix block"""
        np.random.seed(seed + start_row)
        matrix = np.random.randn(rows, cols)
        return matrix / np.sqrt(rows)
    
    @staticmethod
    def logistic_henon_matrix_block(rows, cols, start_row=0, seed=42):
        """Generate Logistic-Henon chaotic matrix block"""
        np.random.seed(seed + start_row)
        matrix = np.zeros((rows, cols))
        
        # Initialize chaotic maps
        r = 3.99
        x = np.random.rand()
        a, b = 1.4, 0.3
        x_h, y_h = np.random.rand(), np.random.rand()
        
        # Generate each element
        for i in range(rows):
            for j in range(cols):
                # Logistic map
                x = r * x * (1 - x)
                
                # Henon map
                x_h_new = 1 - a * x_h**2 + y_h
                y_h = b * x_h
                x_h = x_h_new
                
                # Combine
                chaotic_value = (x + x_h) / 2
                matrix[i, j] = 2 * chaotic_value - 1
            
            # Reset periodically to avoid correlation
            if (start_row + i) % 100 == 0:
                x = (x + np.random.rand()) / 2
                x_h = (x_h + np.random.rand()) / 2
        
        return matrix / np.sqrt(rows)

In [None]:
class AudioCompressionFramework:
    """Complete framework for audio compression experiments"""
    
    def __init__(self, base_path="UrbanSound8K/", wavelet='db4', dwt_level=4, 
                 sample_rate=22050, duration=4.0, random_seed=42):
        self.base_path = base_path
        self.wavelet = wavelet
        self.dwt_level = dwt_level
        self.sample_rate = sample_rate
        self.duration = duration
        self.random_seed = random_seed
        self.scaler = StandardScaler()
        self.label_encoder = LabelEncoder()
        
        # Results storage
        self.results = {}
        self.experiment_data = {}
        
        # Create output directories
        self.create_directories()
    
    def create_directories(self):
        """Create necessary directories"""
        directories = [
            'results',
            'models', 
            'features',
            'visualizations',
            'checkpoints'
        ]
        for dir_name in directories:
            os.makedirs(dir_name, exist_ok=True)
    
    def load_urbansound_dataset(self, max_files_per_class=None):
        """Load UrbanSound8K dataset with optional limit"""
        print("Loading UrbanSound8K dataset...")
        
        # Read metadata
        metadata_path = os.path.join(self.base_path, "metadata/UrbanSound8K.csv")
        metadata = pd.read_csv(metadata_path)
        
        # Group by class and limit files if specified
        if max_files_per_class:
            sampled_metadata = metadata.groupby('class').apply(
                lambda x: x.sample(min(len(x), max_files_per_class), random_state=self.random_seed)
            ).reset_index(drop=True)
        else:
            sampled_metadata = metadata
        
        file_paths = []
        labels = []
        
        for _, row in tqdm(sampled_metadata.iterrows(), total=len(sampled_metadata), desc="Building file list"):
            fold = row['fold']
            filename = row['slice_file_name']
            file_path = os.path.join(self.base_path, f"audio/fold{fold}", filename)
            
            if os.path.exists(file_path):
                file_paths.append(file_path)
                labels.append(row['class'])
        
        print(f"Loaded {len(file_paths)} files, {len(set(labels))} classes")
        return file_paths, labels
    
    def extract_original_features(self, audio):
        """Extract features from original audio (baseline)"""
        # MFCC features (standard for audio classification)
        mfccs = librosa.feature.mfcc(y=audio, sr=self.sample_rate, n_mfcc=20)
        mfccs_scaled = np.mean(mfccs.T, axis=0)
        
        # Chroma features
        chroma = librosa.feature.chroma_stft(y=audio, sr=self.sample_rate)
        chroma_scaled = np.mean(chroma.T, axis=0)
        
        # Spectral features
        spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=self.sample_rate)
        spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=self.sample_rate)
        
        # Combine features
        features = np.concatenate([
            mfccs_scaled,
            chroma_scaled,
            [np.mean(spectral_centroid), np.std(spectral_centroid)],
            [np.mean(spectral_bandwidth), np.std(spectral_bandwidth)],
            librosa.feature.zero_crossing_rate(audio)[0].mean(),
            np.mean(librosa.feature.rms(y=audio))
        ])
        
        return features
    
    def compress_audio(self, audio, compression_ratio, matrix_type='bernoulli'):
        """Memory-efficient audio compression"""
        # Apply DWT
        coeffs = pywt.wavedec(audio, self.wavelet, level=self.dwt_level)
        coeffs_flat = np.concatenate(coeffs)
        N = len(coeffs_flat)
        M = int(N * compression_ratio)
        
        # Block processing to avoid large matrices
        block_size = 1000  # Process 1000 measurements at a time
        compressed = np.zeros(M)
        
        # Get matrix generator based on type
        if matrix_type == 'bernoulli':
            matrix_func = MemoryEfficientMeasurementMatrices.bernoulli_matrix_block
        elif matrix_type == 'gaussian':
            matrix_func = MemoryEfficientMeasurementMatrices.gaussian_matrix_block
        elif matrix_type == 'logistic_henon':
            matrix_func = MemoryEfficientMeasurementMatrices.logistic_henon_matrix_block
        else:
            raise ValueError(f"Unknown matrix type: {matrix_type}")
        
        # Process in blocks
        for block_start in range(0, M, block_size):
            block_end = min(block_start + block_size, M)
            block_rows = block_end - block_start
            
            # Generate matrix block
            Phi_block = matrix_func(block_rows, N, start_row=block_start, seed=self.random_seed)
            
            # Compute measurements for this block
            compressed[block_start:block_end] = Phi_block @ coeffs_flat
        
        # Add statistics as features
        features = np.concatenate([
            compressed,
            [
                np.mean(compressed),
                np.std(compressed),
                np.max(np.abs(compressed)),
                np.min(compressed),
                np.percentile(np.abs(compressed), 90),
                M / N  # Actual compression ratio
            ]
        ])
        
        return features, N, M
    
    def prepare_features(self, file_paths, labels, feature_type='original', 
                        compression_ratio=None, matrix_type=None):
        """Prepare features for training"""
        print(f"\nPreparing {feature_type} features...")
        
        features_list = []
        labels_list = []
        compression_info = []
        
        for i, (file_path, label) in tqdm(enumerate(zip(file_paths, labels)), 
                                         total=len(file_paths),
                                         desc=f"Processing {feature_type}"):
            try:
                # Load and preprocess audio
                audio, sr = librosa.load(file_path, sr=self.sample_rate, duration=self.duration)
                
                # Ensure fixed length
                target_length = int(self.duration * self.sample_rate)
                if len(audio) < target_length:
                    audio = np.pad(audio, (0, target_length - len(audio)))
                else:
                    audio = audio[:target_length]
                
                # Normalize
                audio = audio / (np.max(np.abs(audio)) + 1e-8)
                
                # Extract features
                if feature_type == 'original':
                    features = self.extract_original_features(audio)
                    compression_info.append({'N': len(audio), 'M': len(features), 'ratio': 1.0})
                else:
                    features, N, M = self.compress_audio(audio, compression_ratio, matrix_type)
                    compression_info.append({'N': N, 'M': M, 'ratio': M/N})
                
                features_list.append(features)
                labels_list.append(label)
                
            except Exception as e:
                print(f"Error processing {file_path}: {e}")
                continue
        
        X = np.array(features_list)
        y = np.array(labels_list)
        
        # Encode labels
        y_encoded = self.label_encoder.fit_transform(y)
        
        # Save feature info
        feature_info = {
            'feature_type': feature_type,
            'matrix_type': matrix_type,
            'compression_ratio': compression_ratio,
            'X_shape': X.shape,
            'y_shape': y.shape,
            'compression_stats': pd.DataFrame(compression_info).mean().to_dict()
        }
        
        return X, y_encoded, feature_info
    
    def train_and_evaluate(self, X_train, X_test, y_train, y_test, feature_info, 
                          classifier_names=['SVM', 'RandomForest', 'MLP']):
        """Train and evaluate classifiers"""
        
        results = []
        trained_models = {}
        
        for clf_name in classifier_names:
            print(f"  Training {clf_name}...")
            
            # Initialize classifier
            if clf_name == 'SVM':
                clf = SVC(kernel='rbf', C=1.0, gamma='scale', 
                         random_state=self.random_seed, probability=True)
            elif clf_name == 'RandomForest':
                clf = RandomForestClassifier(n_estimators=100, 
                                           random_state=self.random_seed,
                                           n_jobs=-1)  # Use all cores
            elif clf_name == 'MLP':
                clf = MLPClassifier(hidden_layer_sizes=(128, 64), 
                                   max_iter=500, random_state=self.random_seed,
                                   early_stopping=True)
            else:
                continue
            
            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            
            # Train
            start_time = time.time()
            clf.fit(X_train_scaled, y_train)
            train_time = time.time() - start_time
            
            # Predict
            start_time = time.time()
            y_pred = clf.predict(X_test_scaled)
            y_pred_proba = clf.predict_proba(X_test_scaled) if hasattr(clf, 'predict_proba') else None
            test_time = time.time() - start_time
            
            # Evaluate
            accuracy = accuracy_score(y_test, y_pred)
            f1 = f1_score(y_test, y_pred, average='weighted')
            
            # Store results
            result = {
                **feature_info,
                'classifier': clf_name,
                'accuracy': accuracy,
                'f1_score': f1,
                'train_time': train_time,
                'test_time': test_time,
                'model_size': self._estimate_model_size(clf),
                'train_samples': len(X_train),
                'test_samples': len(X_test),
                'feature_dim': X_train.shape[1]
            }
            
            results.append(result)
            
            # Save trained model
            model_key = f"{feature_info['feature_type']}_{feature_info.get('matrix_type', 'original')}_{clf_name}"
            trained_models[model_key] = {
                'model': clf,
                'scaler': scaler,
                'feature_info': feature_info
            }
            
            print(f"    Accuracy: {accuracy:.4f}, F1: {f1:.4f}, "
                  f"Train: {train_time:.2f}s, Test: {test_time:.4f}s")
        
        return results, trained_models
    
    def _estimate_model_size(self, model):
        """Estimate model size in MB"""
        import sys
        
        # Save to temporary file and check size
        temp_file = 'temp_model.pkl'
        joblib.dump(model, temp_file)
        size_mb = os.path.getsize(temp_file) / (1024 * 1024)
        os.remove(temp_file)
        
        return size_mb
    
    def run_experiment_pipeline(self, file_paths, labels, test_size=0.2, 
                               max_files_per_class=100):
        """Complete experiment pipeline"""
        
        print("="*80)
        print("STARTING COMPREHENSIVE AUDIO COMPRESSION EXPERIMENTS")
        print("="*80)
        
        # Limit files if specified
        if max_files_per_class:
            print(f"Limiting to {max_files_per_class} files per class")
            file_paths, labels = self._limit_files_per_class(file_paths, labels, max_files_per_class)
        
        # Split data once for consistency
        X_full = list(zip(file_paths, labels))
        y_full = labels
        
        # Encode labels for stratification
        le = LabelEncoder()
        y_encoded = le.fit_transform(y_full)
        
        train_idx, test_idx = train_test_split(
            np.arange(len(X_full)), test_size=test_size, 
            random_state=self.random_seed, stratify=y_encoded
        )
        
        train_files = [file_paths[i] for i in train_idx]
        train_labels = [labels[i] for i in train_idx]
        test_files = [file_paths[i] for i in test_idx]
        test_labels = [labels[i] for i in test_idx]
        
        print(f"Train set: {len(train_files)} files")
        print(f"Test set: {len(test_files)} files")
        print("-"*80)
        
        # Experiment 1: Original features (baseline)
        print("\n[EXPERIMENT 1] Original Audio Features (Baseline)")
        X_train_orig, y_train_orig, orig_info = self.prepare_features(
            train_files, train_labels, feature_type='original'
        )
        X_test_orig, y_test_orig, _ = self.prepare_features(
            test_files, test_labels, feature_type='original'
        )
        
        orig_results, orig_models = self.train_and_evaluate(
            X_train_orig, X_test_orig, y_train_orig, y_test_orig, orig_info
        )
        
        self.results['original'] = orig_results
        self.experiment_data['original'] = {
            'X_train': X_train_orig, 'X_test': X_test_orig,
            'y_train': y_train_orig, 'y_test': y_test_orig,
            'info': orig_info
        }
        
        # Experiments 2-10: Compressed sensing with different matrices and ratios
        matrix_types = ['bernoulli', 'gaussian', 'logistic_henon']
        compression_ratios = [0.5, 0.6, 0.7]  # 50%, 60%, 70%
        
        for matrix_type in matrix_types:
            for comp_ratio in compression_ratios:
                exp_name = f"{matrix_type}_{int(comp_ratio*100)}"
                print(f"\n[EXPERIMENT {exp_name.upper()}] {matrix_type.capitalize()} Matrix, {comp_ratio*100:.0f}% Compression")
                
                # Prepare compressed features
                X_train_comp, y_train_comp, comp_info = self.prepare_features(
                    train_files, train_labels, 
                    feature_type='compressed',
                    compression_ratio=comp_ratio,
                    matrix_type=matrix_type
                )
                
                X_test_comp, y_test_comp, _ = self.prepare_features(
                    test_files, test_labels,
                    feature_type='compressed',
                    compression_ratio=comp_ratio,
                    matrix_type=matrix_type
                )
                
                # Train and evaluate
                comp_results, comp_models = self.train_and_evaluate(
                    X_train_comp, X_test_comp, y_train_comp, y_test_comp, comp_info
                )
                
                self.results[exp_name] = comp_results
                self.experiment_data[exp_name] = {
                    'X_train': X_train_comp, 'X_test': X_test_comp,
                    'y_train': y_train_comp, 'y_test': y_test_comp,
                    'info': comp_info
                }
                
                # Save checkpoint
                self.save_checkpoint(exp_name)
        
        # Combine all results
        all_results = []
        for exp_name, exp_results in self.results.items():
            all_results.extend(exp_results)
        
        self.complete_results = pd.DataFrame(all_results)
        
        # Save everything
        self.save_results()
        
        return self.complete_results
    
    def _limit_files_per_class(self, file_paths, labels, max_per_class):
        """Limit number of files per class"""
        df = pd.DataFrame({'path': file_paths, 'label': labels})
        sampled_df = df.groupby('label').apply(
            lambda x: x.sample(min(len(x), max_per_class), random_state=self.random_seed)
        ).reset_index(drop=True)
        
        return sampled_df['path'].tolist(), sampled_df['label'].tolist()
    
    def save_checkpoint(self, exp_name):
        """Save checkpoint after each experiment"""
        checkpoint = {
            'results': self.results,
            'experiment_data': {k: v for k, v in self.experiment_data.items() if 'X_train' not in k},  # Don't save data
            'timestamp': datetime.now().isoformat()
        }
        
        checkpoint_file = f"checkpoints/checkpoint_{exp_name}.pkl"
        joblib.dump(checkpoint, checkpoint_file)
        print(f"  Checkpoint saved: {checkpoint_file}")
    
    def save_results(self):
        """Save all results to files"""
        print("\nSaving results...")
        
        # Save results DataFrame
        results_file = "results/compression_experiments_results.csv"
        self.complete_results.to_csv(results_file, index=False)
        print(f"‚úì Results saved to: {results_file}")
        
        # Save detailed results JSON
        json_file = "results/experiment_details.json"
        with open(json_file, 'w') as f:
            json.dump({
                'experiment_summary': self.complete_results.to_dict('records'),
                'statistics': self._calculate_statistics(),
                'config': {
                    'wavelet': self.wavelet,
                    'dwt_level': self.dwt_level,
                    'sample_rate': self.sample_rate,
                    'duration': self.duration,
                    'random_seed': self.random_seed
                }
            }, f, indent=2)
        print(f"‚úì Detailed results saved to: {json_file}")
        
        # Save feature dimensions comparison
        dims_df = self.complete_results[['feature_type', 'matrix_type', 'compression_ratio', 'feature_dim']].drop_duplicates()
        dims_df.to_csv("results/feature_dimensions.csv", index=False)
        print(f"‚úì Feature dimensions saved")
    
    def _calculate_statistics(self):
        """Calculate experiment statistics"""
        stats = {}
        
        # Group by experiment type
        for exp_name, exp_results in self.results.items():
            exp_df = pd.DataFrame(exp_results)
            stats[exp_name] = {
                'mean_accuracy': exp_df['accuracy'].mean(),
                'std_accuracy': exp_df['accuracy'].std(),
                'mean_f1': exp_df['f1_score'].mean(),
                'mean_train_time': exp_df['train_time'].mean(),
                'mean_test_time': exp_df['test_time'].mean(),
                'mean_model_size': exp_df['model_size'].mean()
            }
        
        return stats

In [None]:
class ComprehensiveVisualizer:
    """Create comprehensive visualizations"""
    
    def __init__(self, results_df):
        self.results_df = results_df
    
    def create_all_visualizations(self):
        """Create all comparison visualizations"""
        print("\nCreating visualizations...")
        
        self.plot_accuracy_comparison()
        self.plot_compression_tradeoff()
        self.plot_matrix_comparison()
        self.plot_feature_dimension_impact()
        self.plot_training_time_comparison()
        self.plot_confusion_matrix_heatmaps()
        self.plot_statistical_significance()
        
        print("‚úì All visualizations saved to 'visualizations/' folder")
    
    def plot_accuracy_comparison(self):
        """Plot accuracy comparison across all experiments"""
        fig, axes = plt.subplots(2, 2, figsize=(16, 12))
        fig.suptitle('Accuracy Comparison Across Compression Methods and Ratios', 
                    fontsize=16, fontweight='bold')
        
        # Plot 1: Accuracy by matrix type (grouped bars)
        ax = axes[0, 0]
        matrix_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        matrix_pivot = matrix_data.pivot_table(
            index=['matrix_type', 'compression_ratio'],
            columns='classifier',
            values='accuracy',
            aggfunc='mean'
        )
        
        matrix_pivot.plot(kind='bar', ax=ax, width=0.8, colormap='tab20c')
        ax.set_title('Accuracy by Matrix Type and Compression Ratio')
        ax.set_xlabel('Matrix Type √ó Compression Ratio')
        ax.set_ylabel('Accuracy')
        ax.legend(title='Classifier')
        ax.grid(True, alpha=0.3)
        plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
        
        # Plot 2: Original vs Best Compression
        ax = axes[0, 1]
        original_acc = self.results_df[self.results_df['feature_type'] == 'original']
        compressed_acc = self.results_df[self.results_df['feature_type'] == 'compressed']
        
        # Find best compressed configuration for each classifier
        best_compressed = []
        for classifier in self.results_df['classifier'].unique():
            classifier_data = compressed_acc[compressed_acc['classifier'] == classifier]
            best_idx = classifier_data['accuracy'].idxmax()
            best_compressed.append(classifier_data.loc[best_idx])
        
        best_compressed_df = pd.DataFrame(best_compressed)
        
        x = np.arange(len(self.results_df['classifier'].unique()))
        width = 0.35
        
        for i, classifier in enumerate(self.results_df['classifier'].unique()):
            orig_acc = original_acc[original_acc['classifier'] == classifier]['accuracy'].values[0]
            best_comp_acc = best_compressed_df[best_compressed_df['classifier'] == classifier]['accuracy'].values[0]
            
            ax.bar(i - width/2, orig_acc, width, label='Original' if i == 0 else '', color='blue', alpha=0.7)
            ax.bar(i + width/2, best_comp_acc, width, label='Best Compressed' if i == 0 else '', color='red', alpha=0.7)
            
            # Add text labels
            ax.text(i - width/2, orig_acc + 0.01, f'{orig_acc:.3f}', ha='center', va='bottom')
            ax.text(i + width/2, best_comp_acc + 0.01, f'{best_comp_acc:.3f}', ha='center', va='bottom')
        
        ax.set_xlabel('Classifier')
        ax.set_ylabel('Accuracy')
        ax.set_title('Original vs Best Compressed Accuracy')
        ax.set_xticks(x)
        ax.set_xticklabels(self.results_df['classifier'].unique())
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        # Plot 3: Compression Ratio Impact
        ax = axes[1, 0]
        for matrix_type in matrix_data['matrix_type'].unique():
            matrix_subset = matrix_data[matrix_data['matrix_type'] == matrix_type]
            avg_by_ratio = matrix_subset.groupby('compression_ratio')['accuracy'].mean()
            ax.plot(avg_by_ratio.index, avg_by_ratio.values, 
                   marker='o', linewidth=2, label=matrix_type.capitalize())
        
        ax.set_xlabel('Compression Ratio')
        ax.set_ylabel('Average Accuracy')
        ax.set_title('Impact of Compression Ratio on Accuracy')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        # Plot 4: Feature Dimension vs Accuracy
        ax = axes[1, 1]
        scatter_data = self.results_df.groupby(['feature_type', 'matrix_type', 'compression_ratio']).agg({
            'feature_dim': 'mean',
            'accuracy': 'mean'
        }).reset_index()
        
        colors = {'original': 'black', 'bernoulli': 'blue', 'gaussian': 'green', 'logistic_henon': 'red'}
        sizes = {'original': 200, 'bernoulli': 100, 'gaussian': 100, 'logistic_henon': 100}
        
        for _, row in scatter_data.iterrows():
            color = colors.get(row['matrix_type'] if row['feature_type'] == 'compressed' else 'original', 'gray')
            size = sizes.get(row['matrix_type'] if row['feature_type'] == 'compressed' else 'original', 100)
            marker = 'o' if row['feature_type'] == 'original' else 's'
            
            ax.scatter(row['feature_dim'], row['accuracy'], 
                      color=color, s=size, alpha=0.7, marker=marker)
            
            if row['feature_type'] == 'compressed':
                label = f"{row['matrix_type'][:3]}_{int(row['compression_ratio']*100)}%"
                ax.text(row['feature_dim'], row['accuracy'] + 0.005, 
                       label, fontsize=8, ha='center')
        
        ax.set_xlabel('Feature Dimension')
        ax.set_ylabel('Accuracy')
        ax.set_title('Feature Dimension vs Accuracy')
        ax.grid(True, alpha=0.3)
        
        # Add legend
        from matplotlib.lines import Line2D
        legend_elements = [
            Line2D([0], [0], marker='o', color='w', label='Original',
                  markerfacecolor='black', markersize=10),
            Line2D([0], [0], marker='s', color='w', label='Bernoulli',
                  markerfacecolor='blue', markersize=10),
            Line2D([0], [0], marker='s', color='w', label='Gaussian',
                  markerfacecolor='green', markersize=10),
            Line2D([0], [0], marker='s', color='w', label='Logistic-Henon',
                  markerfacecolor='red', markersize=10)
        ]
        ax.legend(handles=legend_elements, title='Matrix Type')
        
        plt.tight_layout()
        plt.savefig('visualizations/accuracy_comparison.png', dpi=300, bbox_inches='tight')
        plt.show()
    
    def plot_compression_tradeoff(self):
        """Plot compression trade-off analysis"""
        fig, axes = plt.subplots(1, 3, figsize=(15, 5))
        fig.suptitle('Compression Trade-off Analysis', fontsize=14, fontweight='bold')
        
        # Filter compressed data
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        
        # Plot 1: Accuracy vs Compression Ratio
        ax = axes[0]
        for classifier in compressed_data['classifier'].unique():
            classifier_data = compressed_data[compressed_data['classifier'] == classifier]
            avg_by_ratio = classifier_data.groupby('compression_ratio')['accuracy'].mean()
            ax.plot(avg_by_ratio.index, avg_by_ratio.values, marker='o', label=classifier)
        
        ax.set_xlabel('Compression Ratio')
        ax.set_ylabel('Accuracy')
        ax.set_title('Accuracy vs Compression Ratio')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        # Plot 2: Training Time vs Feature Dimension
        ax = axes[1]
        for matrix_type in compressed_data['matrix_type'].unique():
            matrix_subset = compressed_data[compressed_data['matrix_type'] == matrix_type]
            ax.scatter(matrix_subset['feature_dim'], matrix_subset['train_time'], 
                      alpha=0.6, label=matrix_type, s=50)
        
        ax.set_xlabel('Feature Dimension')
        ax.set_ylabel('Training Time (s)')
        ax.set_title('Training Time vs Feature Dimension')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        # Plot 3: Model Size vs Accuracy
        ax = axes[2]
        for matrix_type in compressed_data['matrix_type'].unique():
            for classifier in compressed_data['classifier'].unique():
                subset = compressed_data[(compressed_data['matrix_type'] == matrix_type) & 
                                        (compressed_data['classifier'] == classifier)]
                if len(subset) > 0:
                    ax.scatter(subset['model_size'], subset['accuracy'], 
                              alpha=0.6, s=50, 
                              label=f"{matrix_type}_{classifier}" if matrix_type == 'bernoulli' else '')
        
        ax.set_xlabel('Model Size (MB)')
        ax.set_ylabel('Accuracy')
        ax.set_title('Model Size vs Accuracy')
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('visualizations/compression_tradeoff.png', dpi=300, bbox_inches='tight')
        plt.show()
    
    def plot_matrix_comparison(self):
        """Detailed matrix type comparison"""
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        
        # Plot 1: Matrix type performance by classifier
        ax = axes[0, 0]
        matrix_perf = compressed_data.groupby(['matrix_type', 'classifier'])['accuracy'].mean().unstack()
        matrix_perf.plot(kind='bar', ax=ax, width=0.8, colormap='Set2')
        ax.set_title('Matrix Type Performance by Classifier')
        ax.set_xlabel('Matrix Type')
        ax.set_ylabel('Accuracy')
        ax.legend(title='Classifier')
        ax.grid(True, alpha=0.3)
        
        # Add value labels
        for i, patch in enumerate(ax.patches):
            ax.text(patch.get_x() + patch.get_width()/2, patch.get_height() + 0.01,
                   f'{patch.get_height():.3f}', ha='center', va='bottom', fontsize=8)
        
        # Plot 2: Speed comparison
        ax = axes[0, 1]
        speed_data = compressed_data.groupby(['matrix_type', 'classifier'])[['train_time', 'test_time']].mean()
        speed_data.plot(kind='bar', ax=ax, width=0.8)
        ax.set_title('Training and Testing Time by Matrix Type')
        ax.set_xlabel('Matrix Type √ó Classifier')
        ax.set_ylabel('Time (seconds)')
        ax.grid(True, alpha=0.3)
        plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
        
        # Plot 3: Statistical distribution
        ax = axes[1, 0]
        accuracy_data = []
        labels = []
        for matrix_type in compressed_data['matrix_type'].unique():
            matrix_subset = compressed_data[compressed_data['matrix_type'] == matrix_type]
            accuracy_data.append(matrix_subset['accuracy'].values)
            labels.append(matrix_type)
        
        ax.boxplot(accuracy_data, labels=labels)
        ax.set_title('Accuracy Distribution by Matrix Type')
        ax.set_ylabel('Accuracy')
        ax.grid(True, alpha=0.3)
        
        # Plot 4: Best performing configuration
        ax = axes[1, 1]
        best_configs = []
        for matrix_type in compressed_data['matrix_type'].unique():
            for classifier in compressed_data['classifier'].unique():
                subset = compressed_data[(compressed_data['matrix_type'] == matrix_type) & 
                                        (compressed_data['classifier'] == classifier)]
                if len(subset) > 0:
                    best_idx = subset['accuracy'].idxmax()
                    best_configs.append(subset.loc[best_idx])
        
        best_df = pd.DataFrame(best_configs)
        best_df['config'] = best_df['matrix_type'] + '_' + best_df['compression_ratio'].astype(str)
        
        colors = plt.cm.viridis(np.linspace(0, 1, len(best_df)))
        bars = ax.bar(range(len(best_df)), best_df['accuracy'], color=colors)
        ax.set_title('Best Configuration for Each Matrix+Classifier')
        ax.set_xlabel('Configuration')
        ax.set_ylabel('Accuracy')
        ax.set_xticks(range(len(best_df)))
        ax.set_xticklabels([f"{row['matrix_type'][:3]}_{int(row['compression_ratio']*100)}%\n{row['classifier']}" 
                           for _, row in best_df.iterrows()], rotation=45, ha='right')
        
        # Add value labels
        for bar, acc in zip(bars, best_df['accuracy']):
            ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                   f'{acc:.3f}', ha='center', va='bottom', fontsize=8)
        
        plt.tight_layout()
        plt.savefig('visualizations/matrix_comparison.png', dpi=300, bbox_inches='tight')
        plt.show()
    
    def plot_feature_dimension_impact(self):
        """Analyze impact of feature dimension"""
        fig, axes = plt.subplots(1, 2, figsize=(12, 5))
        
        # Get compressed data
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        
        # Plot 1: Dimension reduction percentage
        ax = axes[0]
        original_dims = self.results_df[self.results_df['feature_type'] == 'original']['feature_dim'].mean()
        
        dim_reduction = []
        labels = []
        for _, row in compressed_data.iterrows():
            reduction = (1 - row['feature_dim'] / original_dims) * 100
            dim_reduction.append(reduction)
            labels.append(f"{row['matrix_type'][:3]}_{int(row['compression_ratio']*100)}%")
        
        ax.bar(range(len(dim_reduction)), dim_reduction, alpha=0.7)
        ax.set_xlabel('Configuration')
        ax.set_ylabel('Dimension Reduction (%)')
        ax.set_title('Feature Dimension Reduction')
        ax.set_xticks(range(len(dim_reduction)))
        ax.set_xticklabels(labels, rotation=45, ha='right')
        ax.grid(True, alpha=0.3, axis='y')
        
        # Plot 2: Accuracy retention vs dimension reduction
        ax = axes[1]
        original_acc = self.results_df[self.results_df['feature_type'] == 'original']
        
        for classifier in compressed_data['classifier'].unique():
            classifier_orig = original_acc[original_acc['classifier'] == classifier]['accuracy'].values[0]
            classifier_comp = compressed_data[compressed_data['classifier'] == classifier]
            
            retention = []
            reduction = []
            for _, row in classifier_comp.iterrows():
                acc_retention = (row['accuracy'] / classifier_orig) * 100
                dim_reduction = (1 - row['feature_dim'] / original_dims) * 100
                retention.append(acc_retention)
                reduction.append(dim_reduction)
            
            ax.scatter(reduction, retention, alpha=0.6, label=classifier, s=80)
        
        ax.set_xlabel('Dimension Reduction (%)')
        ax.set_ylabel('Accuracy Retention (%)')
        ax.set_title('Accuracy Retention vs Dimension Reduction')
        ax.axhline(y=100, color='r', linestyle='--', alpha=0.5, label='Original Accuracy')
        ax.legend()
        ax.grid(True, alpha=0.3)
        
        plt.tight_layout()
        plt.savefig('visualizations/feature_dimension_impact.png', dpi=300, bbox_inches='tight')
        plt.show()
    
    def plot_training_time_comparison(self):
        """Compare training times"""
        fig, axes = plt.subplots(1, 2, figsize=(12, 5))
        
        # Plot 1: Training time by configuration
        ax = axes[0]
        time_data = self.results_df.pivot_table(
            index=['feature_type', 'matrix_type', 'compression_ratio'],
            columns='classifier',
            values='train_time',
            aggfunc='mean'
        )
        
        time_data.plot(kind='bar', ax=ax, width=0.8, colormap='tab20c')
        ax.set_title('Training Time by Configuration')
        ax.set_xlabel('Configuration')
        ax.set_ylabel('Training Time (seconds)')
        ax.legend(title='Classifier')
        ax.grid(True, alpha=0.3)
        plt.setp(ax.get_xticklabels(), rotation=45, ha='right')
        
        # Plot 2: Speedup factor
        ax = axes[1]
        original_times = self.results_df[self.results_df['feature_type'] == 'original']
        compressed_times = self.results_df[self.results_df['feature_type'] == 'compressed']
        
        speedup_data = []
        labels = []
        for classifier in self.results_df['classifier'].unique():
            orig_time = original_times[original_times['classifier'] == classifier]['train_time'].mean()
            
            for matrix_type in compressed_times['matrix_type'].unique():
                for ratio in compressed_times['compression_ratio'].unique():
                    comp_time = compressed_times[(compressed_times['classifier'] == classifier) &
                                                (compressed_times['matrix_type'] == matrix_type) &
                                                (compressed_times['compression_ratio'] == ratio)]['train_time'].mean()
                    
                    if not np.isnan(comp_time) and comp_time > 0:
                        speedup = orig_time / comp_time
                        speedup_data.append(speedup)
                        labels.append(f"{classifier[:3]}_{matrix_type[:3]}_{int(ratio*100)}%")
        
        colors = ['green' if x >= 1 else 'red' for x in speedup_data]
        bars = ax.bar(range(len(speedup_data)), speedup_data, color=colors, alpha=0.7)
        ax.axhline(y=1, color='black', linestyle='--', alpha=0.5, label='No Speedup')
        ax.set_xlabel('Configuration')
        ax.set_ylabel('Speedup Factor (Original/Compressed)')
        ax.set_title('Training Speedup from Compression')
        ax.set_xticks(range(len(speedup_data)))
        ax.set_xticklabels(labels, rotation=45, ha='right')
        ax.legend()
        ax.grid(True, alpha=0.3, axis='y')
        
        # Add value labels
        for bar, speedup in zip(bars, speedup_data):
            ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.05,
                   f'{speedup:.2f}x', ha='center', va='bottom', fontsize=8)
        
        plt.tight_layout()
        plt.savefig('visualizations/training_time_comparison.png', dpi=300, bbox_inches='tight')
        plt.show()
    
    def plot_confusion_matrix_heatmaps(self):
        """Create confusion matrix heatmaps for best configurations"""
        # This is a placeholder - you'll need to implement based on your actual predictions
        print("Confusion matrix visualization would require prediction data storage")
        
        # Example of what you could implement:
        """
        fig, axes = plt.subplots(3, 3, figsize=(15, 15))
        
        best_configs = self._get_best_configurations()
        
        for idx, (exp_name, config) in enumerate(best_configs.items()):
            row, col = divmod(idx, 3)
            ax = axes[row, col]
            
            # Get predictions (you need to store these during training)
            y_true = config['y_test']
            y_pred = config['predictions']
            
            cm = confusion_matrix(y_true, y_pred, normalize='true')
            sns.heatmap(cm, annot=True, fmt='.2f', cmap='Blues', ax=ax)
            ax.set_title(f"{exp_name}\nAccuracy: {config['accuracy']:.3f}")
        
        plt.tight_layout()
        plt.savefig('visualizations/confusion_matrices.png', dpi=300, bbox_inches='tight')
        plt.show()
        """
    
    def plot_statistical_significance(self):
        """Plot statistical significance tests"""
        # Perform ANOVA or t-tests
        print("Statistical significance analysis would require multiple runs")
        
        # Example implementation:
        """
        from scipy import stats
        import itertools
        
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        
        # Compare matrix types
        matrix_accuracies = {}
        for matrix_type in compressed_data['matrix_type'].unique():
            matrix_accuracies[matrix_type] = compressed_data[
                compressed_data['matrix_type'] == matrix_type
            ]['accuracy'].values
        
        # Create significance matrix
        fig, ax = plt.subplots(figsize=(8, 6))
        
        matrix_types = list(matrix_accuracies.keys())
        significance_matrix = np.zeros((len(matrix_types), len(matrix_types)))
        
        for i, j in itertools.combinations(range(len(matrix_types)), 2):
            t_stat, p_value = stats.ttest_ind(
                matrix_accuracies[matrix_types[i]],
                matrix_accuracies[matrix_types[j]]
            )
            significance_matrix[i, j] = p_value
            significance_matrix[j, i] = p_value
        
        # Plot heatmap
        sns.heatmap(significance_matrix, annot=True, fmt='.4f', 
                   cmap='RdYlGn_r', vmin=0, vmax=0.05,
                   xticklabels=matrix_types, yticklabels=matrix_types,
                   ax=ax)
        ax.set_title('Statistical Significance (p-values)\nMatrix Type Comparisons')
        
        plt.tight_layout()
        plt.savefig('visualizations/statistical_significance.png', dpi=300, bbox_inches='tight')
        plt.show()
        """

In [None]:
class SummaryReportGenerator:
    """Generate comprehensive summary reports"""
    
    def __init__(self, results_df):
        self.results_df = results_df
    
    def generate_report(self):
        """Generate complete summary report"""
        print("\n" + "="*80)
        print("EXPERIMENT SUMMARY REPORT")
        print("="*80)
        
        self.print_overall_summary()
        self.print_best_configurations()
        self.print_compression_analysis()
        self.print_recommendations()
        
        # Save report
        self.save_detailed_report()
    
    def print_overall_summary(self):
        """Print overall experiment summary"""
        print("\nüìä OVERALL SUMMARY")
        print("-"*40)
        
        # Original performance
        original_data = self.results_df[self.results_df['feature_type'] == 'original']
        print("Original Audio Features (Baseline):")
        for classifier in original_data['classifier'].unique():
            classifier_data = original_data[original_data['classifier'] == classifier]
            print(f"  {classifier}: Accuracy = {classifier_data['accuracy'].mean():.4f}, "
                  f"Features = {int(classifier_data['feature_dim'].mean())}")
        
        # Compressed performance summary
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        print(f"\nCompressed Sensing Average (All configurations):")
        print(f"  Accuracy: {compressed_data['accuracy'].mean():.4f}")
        print(f"  Feature Dimension Reduction: "
              f"{(1 - compressed_data['feature_dim'].mean() / original_data['feature_dim'].mean()) * 100:.1f}%")
        print(f"  Training Time Reduction: "
              f"{(1 - compressed_data['train_time'].mean() / original_data['train_time'].mean()) * 100:.1f}%")
    
    def print_best_configurations(self):
        """Print best performing configurations"""
        print("\nüèÜ BEST CONFIGURATIONS")
        print("-"*40)
        
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        original_data = self.results_df[self.results_df['feature_type'] == 'original']
        
        # Best overall
        best_overall = compressed_data.loc[compressed_data['accuracy'].idxmax()]
        print(f"Best Overall Compressed Configuration:")
        print(f"  Matrix: {best_overall['matrix_type'].capitalize()}")
        print(f"  Compression: {best_overall['compression_ratio']*100:.0f}%")
        print(f"  Classifier: {best_overall['classifier']}")
        print(f"  Accuracy: {best_overall['accuracy']:.4f}")
        print(f"  Feature Dim: {int(best_overall['feature_dim'])}")
        
        # Best by matrix type
        print(f"\nBest by Matrix Type:")
        for matrix_type in compressed_data['matrix_type'].unique():
            matrix_data = compressed_data[compressed_data['matrix_type'] == matrix_type]
            best_matrix = matrix_data.loc[matrix_data['accuracy'].idxmax()]
            print(f"  {matrix_type.capitalize()}: "
                  f"{best_matrix['classifier']} at {best_matrix['compression_ratio']*100:.0f}% "
                  f"(Acc: {best_matrix['accuracy']:.4f})")
        
        # Best by classifier
        print(f"\nBest by Classifier:")
        for classifier in compressed_data['classifier'].unique():
            classifier_data = compressed_data[compressed_data['classifier'] == classifier]
            best_classifier = classifier_data.loc[classifier_data['accuracy'].idxmax()]
            
            # Compare with original
            orig_acc = original_data[original_data['classifier'] == classifier]['accuracy'].values[0]
            accuracy_drop = orig_acc - best_classifier['accuracy']
            
            print(f"  {classifier}: {best_classifier['matrix_type']} "
                  f"{best_classifier['compression_ratio']*100:.0f}% "
                  f"(Acc: {best_classifier['accuracy']:.4f}, "
                  f"Drop: {accuracy_drop:.4f})")
    
    def print_compression_analysis(self):
        """Print compression trade-off analysis"""
        print("\nüìâ COMPRESSION TRADE-OFF ANALYSIS")
        print("-"*40)
        
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        original_data = self.results_df[self.results_df['feature_type'] == 'original']
        
        # Analyze by compression ratio
        print("Performance by Compression Ratio:")
        for ratio in sorted(compressed_data['compression_ratio'].unique()):
            ratio_data = compressed_data[compressed_data['compression_ratio'] == ratio]
            avg_acc = ratio_data['accuracy'].mean()
            avg_features = ratio_data['feature_dim'].mean()
            orig_features = original_data['feature_dim'].mean()
            
            feature_reduction = (1 - avg_features / orig_features) * 100
            
            print(f"  {ratio*100:.0f}% compression: "
                  f"Accuracy = {avg_acc:.4f}, "
                  f"Features reduced by {feature_reduction:.1f}%")
    
    def print_recommendations(self):
        """Print practical recommendations"""
        print("\nüí° PRACTICAL RECOMMENDATIONS")
        print("-"*40)
        
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        
        # Find configurations with < 5% accuracy drop
        original_data = self.results_df[self.results_df['feature_type'] == 'original']
        good_configs = []
        
        for classifier in compressed_data['classifier'].unique():
            orig_acc = original_data[original_data['classifier'] == classifier]['accuracy'].values[0]
            classifier_data = compressed_data[compressed_data['classifier'] == classifier]
            
            for _, row in classifier_data.iterrows():
                accuracy_drop = orig_acc - row['accuracy']
                if accuracy_drop <= 0.05:  # 5% or less drop
                    good_configs.append({
                        'classifier': classifier,
                        'matrix': row['matrix_type'],
                        'ratio': row['compression_ratio'],
                        'accuracy': row['accuracy'],
                        'drop': accuracy_drop,
                        'features': row['feature_dim'],
                        'train_time': row['train_time']
                    })
        
        if good_configs:
            print("Recommended configurations (accuracy drop ‚â§ 5%):")
            for config in sorted(good_configs, key=lambda x: x['accuracy'], reverse=True)[:5]:
                print(f"  ‚Ä¢ {config['classifier']} with {config['matrix']} "
                      f"{config['ratio']*100:.0f}%: "
                      f"Acc = {config['accuracy']:.4f}, "
                      f"Drop = {config['drop']:.4f}, "
                      f"Features = {int(config['features'])}")
        else:
            print("No configurations with ‚â§5% accuracy drop found.")
        
        # Speed-focused recommendations
        print("\nSpeed-focused recommendations:")
        fast_configs = compressed_data.nsmallest(5, 'train_time')
        for _, row in fast_configs.iterrows():
            print(f"  ‚Ä¢ {row['classifier']} with {row['matrix_type']} "
                  f"{row['compression_ratio']*100:.0f}%: "
                  f"Train time = {row['train_time']:.2f}s, "
                  f"Acc = {row['accuracy']:.4f}")
    
    def save_detailed_report(self):
        """Save detailed report to file"""
        report_content = []
        report_content.append("="*80)
        report_content.append("COMPREHENSIVE AUDIO COMPRESSION EXPERIMENT REPORT")
        report_content.append("="*80)
        report_content.append(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
        report_content.append(f"Total Experiments: {len(self.results_df)}")
        report_content.append("")
        
        # Add summary sections
        sections = [
            self._get_overall_summary_text(),
            self._get_best_configurations_text(),
            self._get_compression_analysis_text(),
            self._get_recommendations_text()
        ]
        
        for section in sections:
            report_content.extend(section)
            report_content.append("")
        
        # Save to file
        with open('results/experiment_report.txt', 'w') as f:
            f.write('\n'.join(report_content))
        
        print(f"\n‚úì Detailed report saved to: results/experiment_report.txt")
    
    def _get_overall_summary_text(self):
        """Get overall summary as text"""
        lines = []
        lines.append("OVERALL SUMMARY")
        lines.append("-"*40)
        
        original_data = self.results_df[self.results_df['feature_type'] == 'original']
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        
        lines.append("Original Audio Features (Baseline):")
        for classifier in original_data['classifier'].unique():
            classifier_data = original_data[original_data['classifier'] == classifier]
            lines.append(f"  {classifier}: Accuracy = {classifier_data['accuracy'].mean():.4f}, "
                        f"Features = {int(classifier_data['feature_dim'].mean())}")
        
        lines.append("")
        lines.append("Compressed Sensing Summary:")
        lines.append(f"  Average Accuracy: {compressed_data['accuracy'].mean():.4f}")
        lines.append(f"  Average Feature Reduction: "
                     f"{(1 - compressed_data['feature_dim'].mean() / original_data['feature_dim'].mean()) * 100:.1f}%")
        lines.append(f"  Average Training Time Reduction: "
                     f"{(1 - compressed_data['train_time'].mean() / original_data['train_time'].mean()) * 100:.1f}%")
        
        return lines
    
    def _get_best_configurations_text(self):
        """Get best configurations as text"""
        lines = []
        lines.append("BEST CONFIGURATIONS")
        lines.append("-"*40)
        
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        
        # Best overall
        best_overall = compressed_data.loc[compressed_data['accuracy'].idxmax()]
        lines.append(f"Best Overall:")
        lines.append(f"  Matrix: {best_overall['matrix_type'].capitalize()}")
        lines.append(f"  Compression: {best_overall['compression_ratio']*100:.0f}%")
        lines.append(f"  Classifier: {best_overall['classifier']}")
        lines.append(f"  Accuracy: {best_overall['accuracy']:.4f}")
        lines.append(f"  Features: {int(best_overall['feature_dim'])}")
        lines.append(f"  Training Time: {best_overall['train_time']:.2f}s")
        
        return lines
    
    def _get_compression_analysis_text(self):
        """Get compression analysis as text"""
        lines = []
        lines.append("COMPRESSION TRADE-OFF ANALYSIS")
        lines.append("-"*40)
        
        compressed_data = self.results_df[self.results_df['feature_type'] == 'compressed']
        
        # By matrix type
        lines.append("Performance by Matrix Type:")
        for matrix_type in compressed_data['matrix_type'].unique():
            matrix_data = compressed_data[compressed_data['matrix_type'] == matrix_type]
            lines.append(f"  {matrix_type.capitalize()}: "
                        f"Accuracy = {matrix_data['accuracy'].mean():.4f}, "
                        f"Train Time = {matrix_data['train_time'].mean():.2f}s")
        
        # By compression ratio
        lines.append("")
        lines.append("Performance by Compression Ratio:")
        for ratio in sorted(compressed_data['compression_ratio'].unique()):
            ratio_data = compressed_data[compressed_data['compression_ratio'] == ratio]
            lines.append(f"  {ratio*100:.0f}%: "
                        f"Accuracy = {ratio_data['accuracy'].mean():.4f}, "
                        f"Features = {ratio_data['feature_dim'].mean():.0f}")
        
        return lines
    
    def _get_recommendations_text(self):
        """Get recommendations as text"""
        lines = []
        lines.append("RECOMMENDATIONS")
        lines.append("-"*40)
        lines.append("Based on the experiments, here are practical recommendations:")
        lines.append("")
        lines.append("1. For Maximum Accuracy:")
        lines.append("   - Use the best overall configuration identified above")
        lines.append("")
        lines.append("2. For Speed/Practicality:")
        lines.append("   - Bernoulli matrix is fastest to generate")
        lines.append("   - 70% compression gives good speedup with reasonable accuracy")
        lines.append("")
        lines.append("3. For Memory Efficiency:")
        lines.append("   - Higher compression ratios use less memory")
        lines.append("   - Logistic-Henon may have security benefits but is slower")
        
        return lines

In [None]:
# Main execution function
def main():
    """Main execution function"""
    print("üöÄ Starting Comprehensive Audio Compression Experiments")
    print("="*80)
    
    # Initialize framework
    framework = AudioCompressionFramework(
        base_path="UrbanSound8K/",
        wavelet='db4',
        dwt_level=4,
        sample_rate=22050,
        duration=4.0,
        random_seed=42
    )
    
    # Load dataset (limit to 50 files per class for initial testing)
    print("\nüìÇ Loading dataset...")
    file_paths, labels = framework.load_urbansound_dataset(max_files_per_class=50)
    
    print(f"\nüìä Dataset Statistics:")
    print(f"  Total files: {len(file_paths)}")
    print(f"  Classes: {len(set(labels))}")
    print(f"  Sample rate: {framework.sample_rate} Hz")
    print(f"  Duration: {framework.duration} seconds")
    print(f"  Expected audio length: {int(framework.duration * framework.sample_rate)} samples")
    
    # Run experiments
    print("\n" + "="*80)
    print("üèÉ Running Experiments...")
    print("="*80)
    
    start_time = time.time()
    
    # Run the complete pipeline
    results_df = framework.run_experiment_pipeline(
        file_paths, 
        labels,
        test_size=0.2,
        max_files_per_class=None  # Use all loaded files
    )
    
    total_time = time.time() - start_time
    print(f"\n‚úÖ All experiments completed in {total_time/3600:.2f} hours")
    
    # Generate visualizations
    print("\nüé® Generating visualizations...")
    visualizer = ComprehensiveVisualizer(results_df)
    visualizer.create_all_visualizations()
    
    # Generate summary report
    print("\nüìã Generating summary report...")
    report_generator = SummaryReportGenerator(results_df)
    report_generator.generate_report()
    
    print("\n" + "="*80)
    print("üéâ EXPERIMENT COMPLETE!")
    print("="*80)
    print("\nüìÅ Output files:")
    print("  - results/compression_experiments_results.csv")
    print("  - results/experiment_details.json")
    print("  - results/experiment_report.txt")
    print("  - results/feature_dimensions.csv")
    print("  - visualizations/*.png")
    print("  - checkpoints/*.pkl")
    print("\nüìä Next steps:")
    print("  1. Review the visualizations in the 'visualizations/' folder")
    print("  2. Read the detailed report in 'results/experiment_report.txt'")
    print("  3. Analyze trade-offs for your specific application")
    print("  4. Run with full dataset if results are promising")
    
    return results_df

In [None]:
# Quick test function (for debugging)
def quick_test():
    """Quick test with minimal data"""
    print("Running quick test with 10 files per class...")
    
    framework = AudioCompressionFramework()
    file_paths, labels = framework.load_urbansound_dataset(max_files_per_class=10)
    
    # Just test one configuration
    print("\nTesting Bernoulli 50% compression...")
    X_train, y_train, info = framework.prepare_features(
        file_paths[:20], labels[:20],
        feature_type='compressed',
        compression_ratio=0.5,
        matrix_type='bernoulli'
    )
    
    X_test, y_test, _ = framework.prepare_features(
        file_paths[20:30], labels[20:30],
        feature_type='compressed',
        compression_ratio=0.5,
        matrix_type='bernoulli'
    )
    
    results, models = framework.train_and_evaluate(
        X_train, X_test, y_train, y_test, info,
        classifier_names=['SVM']
    )
    
    print(f"\nQuick test results: Accuracy = {results[0]['accuracy']:.4f}")
    return results

if __name__ == "__main__":
    # Uncomment for quick testing
    # results = quick_test()
    
    # Run full experiments
    results = main()