## 1. Import knihoven a základní nastavení

In [None]:
import numpy as np
import librosa
import librosa.display
import pandas as pd
from scipy import signal, stats
from sklearn.model_selection import train_test_split, cross_val_score, learning_curve
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
import os
from IPython.display import Audio, display, HTML
from tqdm.notebook import tqdm
import joblib

✅ Knihovny načteny
Librosa version: 0.11.0
XGBoost version: 3.0.5


In [None]:
class AudioFeatureExtractor:
    """Extrakce features pro detekci vzdálenosti od mikrofonu"""
    
    def __init__(self, sr=16000, n_mfcc=13):
        self.sr = sr
        self.n_mfcc = n_mfcc
        
    def extract_features(self, audio_input):
        """
        Extrahuje všechny features z audio souboru nebo audio dat
        
        Args:
            audio_input: cesta k audio souboru NEBO numpy array s audio daty
            
        Returns:
            dict: slovník s features
        """
        # Načtení audia
        if isinstance(audio_input, str):
            y, sr = librosa.load(audio_input, sr=self.sr)
        else:
            y = audio_input
            sr = self.sr
        
        features = {}
        
        # 1. ENERGETICKÉ FEATURES
        features.update(self._extract_energy_features(y))
        
        # 2. SPEKTRÁLNÍ FEATURES
        features.update(self._extract_spectral_features(y, sr))
        
        # 3. CLARITY FEATURES
        features.update(self._extract_clarity_features(y, sr))
        
        # 4. ROOM ACOUSTICS FEATURES
        features.update(self._extract_room_features(y, sr))
        
        # 5. MFCC FEATURES (pro dodatečnou informaci)
        features.update(self._extract_mfcc_features(y, sr))
        
        return features

class AudioPreprocessor:
    """Preprocessing audio souborů - rozdělení na chunky"""
    
    def __init__(self, chunk_duration=3.0, overlap=0.5, sr=16000, 
                 min_silence_duration=0.1, silence_threshold=-40):
        """
        Args:
            chunk_duration: délka jednoho chunku v sekundách
            overlap: překryv mezi chunky (0.0 = žádný, 0.5 = 50% překryv)
            sr: sample rate
            min_silence_duration: minimální délka ticha v sekundách pro detekci
            silence_threshold: práh pro detekci ticha v dB
        """
        self.chunk_duration = chunk_duration
        self.overlap = overlap
        self.sr = sr
        self.min_silence_duration = min_silence_duration
        self.silence_threshold = silence_threshold
    
    def split_into_chunks(self, audio_path, remove_silence=True):
        """
        Rozdělí audio soubor na chunky
        
        Args:
            audio_path: cesta k audio souboru
            remove_silence: zda odstraňovat tiché části
            
        Returns:
            list: seznam audio chunků (numpy arrays)
            list: seznam časových pozic začátku každého chunku
        """
        # Načtení audia
        y, sr = librosa.load(audio_path, sr=self.sr)
        
        # Odstranění ticha pokud je požadováno
        if remove_silence:
            y = self._remove_silence(y, sr)
        
        # Pokud je audio příliš krátké, vrátíme ho celé
        if len(y) < self.chunk_duration * sr:
            return [y], [0.0]
        
        # Výpočet parametrů pro chunking
        chunk_samples = int(self.chunk_duration * sr)
        hop_samples = int(chunk_samples * (1 - self.overlap))
        
        chunks = []
        timestamps = []
        
        # Rozdělení na chunky
        for start in range(0, len(y) - chunk_samples + 1, hop_samples):
            end = start + chunk_samples
            chunk = y[start:end]
            
            # Kontrola, zda chunk není příliš tichý
            if self._is_valid_chunk(chunk):
                chunks.append(chunk)
                timestamps.append(start / sr)
        
        # Pokud máme zbytek, přidáme ho jako poslední chunk (s paddingem)
        if len(chunks) > 0:  # Pouze pokud už máme nějaké chunky
            remainder = len(y) % hop_samples
            if remainder > sr * 0.5:  # Pokud je zbytek delší než 0.5s
                last_chunk = y[-chunk_samples:]
                if len(last_chunk) < chunk_samples:
                    # Padding
                    last_chunk = np.pad(last_chunk, (0, chunk_samples - len(last_chunk)))
                if self._is_valid_chunk(last_chunk):
                    chunks.append(last_chunk)
                    timestamps.append((len(y) - chunk_samples) / sr)
        
        return chunks, timestamps


In [None]:
class AudioDistanceClassifier:
    def __init__(self, model_type='xgboost', n_classes=2):
        self.model_type = model_type
        self.n_classes = n_classes
        self.feature_extractor = AudioFeatureExtractor()
        self.preprocessor = AudioPreprocessor()
        self.scaler = StandardScaler()
        self.model = None
        self.feature_importance = None
        self.class_names = None
        
    def prepare_dataset(self, audio_files, labels, use_chunks=True, augment=False):
        """
        Připraví dataset z audio souborů
        
        Args:
            audio_files: seznam cest k audio souborům
            labels: seznam labelů
            use_chunks: zda rozdělit soubory na chunky
            augment: zda použít data augmentaci
            
        Returns:
            X: feature matrix
            y: labels
            feature_names: názvy features
        """
        if use_chunks:
            # Preprocessing - rozdělení na chunky
            chunk_data, chunk_labels = self.preprocessor.preprocess_dataset(
                audio_files, labels, augment=augment
            )
            
            # Extrakce features z chunků
            features_list = []
            valid_labels = []
            
            print("\nExtracting features from chunks...")
            for idx, (chunk_info, label) in enumerate(zip(chunk_data, chunk_labels)):
                try:
                    # Extrakce features přímo z audio dat (ne ze souboru)
                    features = self.feature_extractor.extract_features(chunk_info['chunk_audio'])
                    features_list.append(features)
                    valid_labels.append(label)
                    
                    if (idx + 1) % 100 == 0:
                        print(f"Processed {idx + 1}/{len(chunk_data)} chunks")
                        
                except Exception as e:
                    print(f"Error extracting features from chunk {idx}: {e}")
                    continue
        else:
            # Bez chunkingu - použijeme celé soubory
            features_list = []
            valid_labels = []
            
            print("\nExtracting features from full audio files...")
            for audio_file, label in zip(audio_files, labels):
                try:
                    features = self.feature_extractor.extract_features(audio_file)
                    features_list.append(features)
                    valid_labels.append(label)
                except Exception as e:
                    print(f"Error processing {audio_file}: {e}")
                    continue
        
        # Převod na DataFrame
        df = pd.DataFrame(features_list)
        X = df.values
        y = np.array(valid_labels)
        
        print(f"\nFinal dataset size: {X.shape[0]} samples with {X.shape[1]} features")
        
        return X, y, df.columns
    
    def train(self, X, y, feature_names):
        """
        Trénuje model
        
        Args:
            X: feature matrix
            y: labels
            feature_names: názvy features
        """
        # Rozdělení dat
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )

        # Škálování features
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)

        # Trénování modelu
        if self.model_type == 'xgboost':
            self.model = xgb.XGBClassifier(
                n_estimators=100,
                max_depth=5,
                learning_rate=0.1,
                objective='multi:softmax',
                num_class=self.n_classes,
                random_state=42
            )
        else:
            self.model = GradientBoostingClassifier(
                n_estimators=100,
                max_depth=20,
                learning_rate=0.1,
                random_state=42
            )
        
        self.model.fit(X_train_scaled, y_train)
        
        # Evaluace
        y_pred = self.model.predict(X_test_scaled)
        
        target_names = [
            'distance_ok',
            'distance_too_close',
            'distance_too_far',
            'gain_ok',
            'gain_too_high',
            'gain_too_low',
            'mic_coondenser',
            'mic_dynamic',
            'noise_bad',
            'noise_ok',
            'space_studio',
            'space_with_reverb'
        ][:self.n_classes]
        print("\nClassification Report:")
        print(classification_report(y_test, y_pred, target_names=target_names))
        
        # Confusion matrix
        cm = confusion_matrix(y_test, y_pred)
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=target_names, yticklabels=target_names)
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        plt.show()
        
        # Cross-validation
        cv_scores = cross_val_score(self.model, X_train_scaled, y_train, cv=5)
        print(f"\nCross-validation accuracy: {cv_scores.mean():.3f} (+/- {cv_scores.std() * 2:.3f})")
        
        # Feature importance
        importance = self.model.feature_importances_
            
        self.feature_importance = pd.DataFrame({
            'feature': feature_names,
            'importance': importance
        }).sort_values('importance', ascending=False)
        
        return self.model, self.feature_importance

In [None]:
classifier = AudioDistanceClassifier(model_type='xgboost', n_classes=4)
feedback = classifier.get_interpretable_feedback('test_audio.wav')
print(f"\nPredikce: {feedback['prediction']}")
print(f"Jistota: {feedback['confidence']:.1f}%")
print("\nDoporučení:")
for rec in feedback['recommendations']:
    print(f"  • {rec}")