# üîä Audio Anomaly Detection v2 (Advanced Techniques)

**Advanced improvements for better generalization:**
- ‚úÖ Data augmentation (time stretch, pitch shift, noise, SpecAugment)
- ‚úÖ Variational Autoencoder (VAE) with KL regularization
- ‚úÖ Sub-cluster anomaly detection (k-means + distance)
- ‚úÖ Mahalanobis distance scoring
- ‚úÖ Segment-based processing (overlapping windows)
- ‚úÖ Enhanced ensemble with weighted voting

**Target:** AUC > 0.70 on both source and target domains

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
# Install dependencies
!pip install panns-inference librosa tqdm scikit-learn joblib torch -q
print('‚úÖ Installation complete!')

‚úÖ Installation complete!


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import librosa
import glob
import os
import re
from collections import defaultdict
from tqdm import tqdm
from panns_inference import AudioTagging
from sklearn.mixture import GaussianMixture
from sklearn.ensemble import IsolationForest
from sklearn.neighbors import LocalOutlierFactor
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import roc_auc_score, accuracy_score, roc_curve
from sklearn.covariance import MinCovDet
from scipy.spatial.distance import mahalanobis
import joblib
import warnings
warnings.filterwarnings('ignore')

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'üñ•Ô∏è Using device: {device}')

üñ•Ô∏è Using device: cpu


## 1. Configuration

In [5]:
# ============= UPDATE THESE PATHS =============
BASE_DATA_PATH = '/content/drive/MyDrive/Data'
SAVE_PATH = '/content/drive/MyDrive/MaintanenceAI'
# ===============================================

MACHINE_TYPES = ['fan', 'pump', 'valve']
SAMPLE_RATE = 16000  # Use 16kHz for efficiency
SEGMENT_LENGTH = 1.0  # seconds per segment
SEGMENT_HOP = 0.5  # 50% overlap
N_MELS = 128
N_MFCC = 20
PCA_COMPONENTS = 64  # More aggressive reduction
N_SUBCLUSTERS = 16  # For sub-cluster anomaly detection
VAE_LATENT_DIM = 16
VAE_EPOCHS = 100
N_AUGMENTATIONS = 3  # Augmentation multiplier

print(f'üéØ Config: {MACHINE_TYPES}')
print(f'üìä Segment: {SEGMENT_LENGTH}s, Hop: {SEGMENT_HOP}s')
print(f'üìä PCA: {PCA_COMPONENTS} dims, Sub-clusters: {N_SUBCLUSTERS}')

üéØ Config: ['fan', 'pump', 'valve']
üìä Segment: 1.0s, Hop: 0.5s
üìä PCA: 64 dims, Sub-clusters: 16


## 2. Load PANNs Model

In [6]:
print('üîÑ Loading PANNs model...')
panns_model = AudioTagging(checkpoint_path=None, device=device)
print('‚úÖ PANNs model loaded!')

üîÑ Loading PANNs model...
Checkpoint path: /root/panns_data/Cnn14_mAP=0.431.pth
Using CPU.
‚úÖ PANNs model loaded!


## 3. Data Augmentation

In [7]:
class AudioAugmentor:
    """
    Audio augmentation for robust training.
    """
    def __init__(self, sr=SAMPLE_RATE):
        self.sr = sr

    def time_stretch(self, audio, rate=None):
        """Time stretch without changing pitch."""
        if rate is None:
            rate = np.random.uniform(0.8, 1.2)
        return librosa.effects.time_stretch(audio, rate=rate)

    def pitch_shift(self, audio, n_steps=None):
        """Pitch shift."""
        if n_steps is None:
            n_steps = np.random.uniform(-2, 2)
        return librosa.effects.pitch_shift(audio, sr=self.sr, n_steps=n_steps)

    def add_noise(self, audio, snr_db=None):
        """Add Gaussian noise."""
        if snr_db is None:
            snr_db = np.random.uniform(20, 40)
        signal_power = np.mean(audio**2)
        noise_power = signal_power / (10**(snr_db/10))
        noise = np.random.normal(0, np.sqrt(noise_power), len(audio))
        return audio + noise

    def time_mask(self, audio, max_mask_ratio=0.1):
        """Randomly mask time segments."""
        mask_len = int(len(audio) * np.random.uniform(0, max_mask_ratio))
        start = np.random.randint(0, len(audio) - mask_len)
        audio_masked = audio.copy()
        audio_masked[start:start+mask_len] = 0
        return audio_masked

    def gain(self, audio, gain_db=None):
        """Apply gain."""
        if gain_db is None:
            gain_db = np.random.uniform(-6, 6)
        return audio * (10**(gain_db/20))

    def augment(self, audio, n_augmentations=N_AUGMENTATIONS):
        """Apply random augmentations."""
        augmented = [audio]  # Original

        for i in range(n_augmentations):
            aug = audio.copy()
            # Apply random subset of augmentations
            if np.random.random() > 0.5:
                aug = self.add_noise(aug)
            if np.random.random() > 0.5:
                aug = self.time_stretch(aug)
            if np.random.random() > 0.7:
                aug = self.pitch_shift(aug)
            if np.random.random() > 0.7:
                aug = self.time_mask(aug)
            if np.random.random() > 0.5:
                aug = self.gain(aug)
            augmented.append(aug)

        return augmented

augmentor = AudioAugmentor()
print('‚úÖ Augmentor ready!')

‚úÖ Augmentor ready!


## 4. Segment-Based Feature Extraction

In [8]:
def extract_log_mel_spectrogram(audio, sr, n_mels=N_MELS, n_fft=1024, hop_length=512):
    """Extract log mel spectrogram."""
    mel = librosa.feature.melspectrogram(
        y=audio, sr=sr, n_mels=n_mels, n_fft=n_fft, hop_length=hop_length
    )
    log_mel = librosa.power_to_db(mel, ref=np.max)
    return log_mel


def extract_segment_features(audio, sr):
    """
    Extract comprehensive features from an audio segment.
    """
    features = []

    # Log-mel spectrogram statistics
    log_mel = extract_log_mel_spectrogram(audio, sr)
    features.extend([
        np.mean(log_mel, axis=1),  # Mean per mel band
        np.std(log_mel, axis=1),   # Std per mel band
        np.max(log_mel, axis=1) - np.min(log_mel, axis=1),  # Range
    ])

    # MFCCs with deltas
    mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=N_MFCC)
    mfcc_delta = librosa.feature.delta(mfcc)
    mfcc_delta2 = librosa.feature.delta(mfcc, order=2)
    features.extend([
        np.mean(mfcc, axis=1),
        np.std(mfcc, axis=1),
        np.mean(mfcc_delta, axis=1),
        np.mean(mfcc_delta2, axis=1),
    ])

    # Spectral features
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)[0]
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr)[0]
    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr)[0]
    spectral_flatness = librosa.feature.spectral_flatness(y=audio)[0]
    zcr = librosa.feature.zero_crossing_rate(audio)[0]
    rms = librosa.feature.rms(y=audio)[0]

    features.extend([
        [np.mean(spectral_centroid), np.std(spectral_centroid)],
        [np.mean(spectral_bandwidth), np.std(spectral_bandwidth)],
        [np.mean(spectral_rolloff), np.std(spectral_rolloff)],
        [np.mean(spectral_flatness), np.std(spectral_flatness)],
        [np.mean(zcr), np.std(zcr)],
        [np.mean(rms), np.std(rms), np.max(rms)],
    ])

    # Flatten and concatenate
    return np.concatenate([np.array(f).flatten() for f in features])


def extract_file_features(file_path, augment=False):
    """
    Extract features from an audio file using segment-based processing.
    Returns multiple feature vectors (one per segment).
    """
    try:
        audio, sr = librosa.load(file_path, sr=SAMPLE_RATE, mono=True)

        # Apply augmentation if training
        if augment:
            audio_versions = augmentor.augment(audio)
        else:
            audio_versions = [audio]

        all_features = []
        segment_samples = int(SEGMENT_LENGTH * sr)
        hop_samples = int(SEGMENT_HOP * sr)

        for audio_ver in audio_versions:
            # Pad if too short
            if len(audio_ver) < segment_samples:
                audio_ver = np.pad(audio_ver, (0, segment_samples - len(audio_ver)))

            # Extract segments
            for start in range(0, len(audio_ver) - segment_samples + 1, hop_samples):
                segment = audio_ver[start:start + segment_samples]
                feat = extract_segment_features(segment, sr)
                all_features.append(feat)

        return np.array(all_features)

    except Exception as e:
        print(f'Error: {file_path}: {e}')
        return None


def extract_panns_embedding(file_path, model):
    """Extract PANNs embedding (full file)."""
    try:
        audio, sr = librosa.load(file_path, sr=32000, mono=True)
        if len(audio) < 32000:
            audio = np.pad(audio, (0, 32000 - len(audio)))
        audio = audio[np.newaxis, :]
        _, embedding = model.inference(audio)
        return embedding[0]
    except:
        return None


def extract_hybrid_features(file_paths, panns_model, augment=False, desc='Extracting'):
    """
    Extract segment-based features for all files.
    """
    all_segment_features = []
    file_indices = []  # Track which segments belong to which file
    valid_paths = []

    for idx, path in enumerate(tqdm(file_paths, desc=desc)):
        # Segment features
        seg_feat = extract_file_features(path, augment=augment)
        if seg_feat is None or len(seg_feat) == 0:
            continue

        all_segment_features.append(seg_feat)
        file_indices.append(np.full(len(seg_feat), idx))
        valid_paths.append(path)

    # Concatenate all segments
    segment_features = np.vstack(all_segment_features)
    file_indices = np.concatenate(file_indices)

    return segment_features, file_indices, valid_paths


print('‚úÖ Feature extraction ready!')
# Test feature dimension
test_audio = np.random.randn(int(SEGMENT_LENGTH * SAMPLE_RATE))
test_feat = extract_segment_features(test_audio, SAMPLE_RATE)
print(f'üìä Segment feature dimension: {len(test_feat)}')

‚úÖ Feature extraction ready!
üìä Segment feature dimension: 477


## 5. Variational Autoencoder (VAE)

In [9]:
class VariationalAutoencoder(nn.Module):
    """
    VAE for anomaly detection with KL regularization.
    """
    def __init__(self, input_dim, latent_dim=VAE_LATENT_DIM):
        super().__init__()

        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(256),
            nn.Dropout(0.3),
            nn.Linear(256, 128),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(128),
            nn.Dropout(0.2),
            nn.Linear(128, 64),
            nn.LeakyReLU(0.2),
        )

        # Latent space
        self.fc_mu = nn.Linear(64, latent_dim)
        self.fc_logvar = nn.Linear(64, latent_dim)

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 64),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(64),
            nn.Linear(64, 128),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(128),
            nn.Dropout(0.2),
            nn.Linear(128, 256),
            nn.LeakyReLU(0.2),
            nn.BatchNorm1d(256),
            nn.Linear(256, input_dim),
        )

        self.latent_dim = latent_dim

    def encode(self, x):
        h = self.encoder(x)
        return self.fc_mu(h), self.fc_logvar(h)

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z):
        return self.decoder(z)

    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        recon = self.decode(z)
        return recon, mu, logvar

    def get_anomaly_score(self, x):
        """Get anomaly score (reconstruction error + KL divergence)."""
        self.eval()
        with torch.no_grad():
            recon, mu, logvar = self.forward(x)
            # Reconstruction error
            recon_loss = torch.mean((x - recon)**2, dim=1)
            # KL divergence
            kl_loss = -0.5 * torch.sum(1 + logvar - mu**2 - logvar.exp(), dim=1)
            # Combined score
            score = recon_loss + 0.1 * kl_loss
        return score.cpu().numpy()


def train_vae(X_train, epochs=VAE_EPOCHS, batch_size=128, lr=1e-3):
    """Train VAE on normal data."""
    input_dim = X_train.shape[1]
    model = VariationalAutoencoder(input_dim).to(device)
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs)

    X_tensor = torch.FloatTensor(X_train).to(device)
    dataset = torch.utils.data.TensorDataset(X_tensor)
    loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for batch in loader:
            x = batch[0]
            optimizer.zero_grad()

            recon, mu, logvar = model(x)

            # VAE loss
            recon_loss = F.mse_loss(recon, x, reduction='mean')
            kl_loss = -0.5 * torch.mean(1 + logvar - mu**2 - logvar.exp())
            loss = recon_loss + 0.1 * kl_loss

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            total_loss += loss.item()

        scheduler.step()
        if (epoch + 1) % 20 == 0:
            print(f'    Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(loader):.4f}')

    return model

print('‚úÖ VAE ready!')

‚úÖ VAE ready!


## 6. Sub-Cluster Anomaly Detection

In [10]:
class SubClusterDetector:
    """
    Anomaly detection using sub-clusters of normal data.
    Normal data may have multiple modes; we model each with a cluster.
    """
    def __init__(self, n_clusters=N_SUBCLUSTERS):
        self.n_clusters = n_clusters
        self.kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
        self.cluster_covs = []
        self.cluster_means = []
        self.fitted = False

    def fit(self, X):
        """Fit sub-clusters on normal data."""
        self.kmeans.fit(X)
        labels = self.kmeans.labels_

        self.cluster_means = []
        self.cluster_covs = []

        for i in range(self.n_clusters):
            cluster_data = X[labels == i]
            if len(cluster_data) > 1:
                self.cluster_means.append(np.mean(cluster_data, axis=0))
                try:
                    cov = MinCovDet().fit(cluster_data).covariance_
                except:
                    cov = np.cov(cluster_data.T) + 1e-6 * np.eye(cluster_data.shape[1])
                self.cluster_covs.append(cov)

        self.fitted = True

    def score_samples(self, X):
        """
        Score samples: minimum distance to any cluster.
        Higher score = more anomalous.
        """
        scores = []
        for x in X:
            min_dist = float('inf')
            for mean, cov in zip(self.cluster_means, self.cluster_covs):
                try:
                    cov_inv = np.linalg.pinv(cov)
                    dist = mahalanobis(x, mean, cov_inv)
                except:
                    dist = np.linalg.norm(x - mean)
                min_dist = min(min_dist, dist)
            scores.append(min_dist)
        return np.array(scores)

print('‚úÖ Sub-cluster detector ready!')

‚úÖ Sub-cluster detector ready!


## 7. Mahalanobis Distance Detector

In [11]:
class MahalanobisDetector:
    """
    Anomaly detection using Mahalanobis distance.
    """
    def __init__(self, robust=True):
        self.robust = robust
        self.mean = None
        self.cov_inv = None

    def fit(self, X):
        """Fit on normal data."""
        self.mean = np.mean(X, axis=0)

        if self.robust:
            try:
                mcd = MinCovDet().fit(X)
                cov = mcd.covariance_
            except:
                cov = np.cov(X.T)
        else:
            cov = np.cov(X.T)

        # Add regularization for stability
        cov = cov + 1e-6 * np.eye(cov.shape[0])
        self.cov_inv = np.linalg.pinv(cov)

    def score_samples(self, X):
        """Mahalanobis distance from normal distribution."""
        scores = []
        for x in X:
            scores.append(mahalanobis(x, self.mean, self.cov_inv))
        return np.array(scores)

print('‚úÖ Mahalanobis detector ready!')

‚úÖ Mahalanobis detector ready!


## 8. Advanced Ensemble Detector

In [12]:
class AdvancedEnsembleDetector:
    """
    Advanced ensemble with multiple complementary methods.
    """
    def __init__(self, pca_components=PCA_COMPONENTS):
        self.scaler = StandardScaler()
        self.pca = PCA(n_components=pca_components)

        # Detectors
        self.gmm = GaussianMixture(
            n_components=8, covariance_type='full',
            random_state=42, max_iter=300, reg_covar=1e-5
        )
        self.iforest = IsolationForest(
            n_estimators=200, contamination=0.05,
            random_state=42, n_jobs=-1
        )
        self.lof = LocalOutlierFactor(
            n_neighbors=30, contamination=0.05,
            novelty=True, n_jobs=-1
        )
        self.subcluster = SubClusterDetector(n_clusters=N_SUBCLUSTERS)
        self.mahal = MahalanobisDetector(robust=True)
        self.vae = None

        self.fitted = False

    def fit(self, X_train):
        """Fit all detectors."""
        print('  üìä Preprocessing...')
        X_scaled = self.scaler.fit_transform(X_train)
        X_pca = self.pca.fit_transform(X_scaled)
        print(f'    PCA variance: {sum(self.pca.explained_variance_ratio_)*100:.1f}%')

        print('  üîß Training detectors...')
        self.gmm.fit(X_pca)
        self.iforest.fit(X_pca)
        self.lof.fit(X_pca)

        print('  üéØ Training sub-cluster detector...')
        self.subcluster.fit(X_pca)

        print('  üìè Training Mahalanobis detector...')
        self.mahal.fit(X_pca)

        print('  üß† Training VAE...')
        self.vae = train_vae(X_pca)

        self.fitted = True
        print('  ‚úÖ Ensemble training complete!')

    def _get_individual_scores(self, X_pca):
        """Get scores from all methods."""
        scores = {}

        # GMM
        scores['gmm'] = -self.gmm.score_samples(X_pca)

        # Isolation Forest
        scores['iforest'] = -self.iforest.score_samples(X_pca)

        # LOF
        scores['lof'] = -self.lof.score_samples(X_pca)

        # Sub-cluster
        scores['subcluster'] = self.subcluster.score_samples(X_pca)

        # Mahalanobis
        scores['mahal'] = self.mahal.score_samples(X_pca)

        # VAE
        X_tensor = torch.FloatTensor(X_pca).to(device)
        scores['vae'] = self.vae.get_anomaly_score(X_tensor)

        return scores

    def score_files(self, all_segment_features, file_indices):
        """
        Score multiple files.
        """
        X_scaled = self.scaler.transform(all_segment_features)
        X_pca = self.pca.transform(X_scaled)

        # Get segment-level scores
        segment_scores = self._get_individual_scores(X_pca)

        # Aggregate by file
        unique_files = np.unique(file_indices)
        file_scores = {name: [] for name in segment_scores}

        for file_idx in unique_files:
            mask = file_indices == file_idx
            for name, scores in segment_scores.items():
                # Use multiple aggregations
                max_score = np.max(scores[mask])
                mean_score = np.mean(scores[mask])
                p90_score = np.percentile(scores[mask], 90)
                # Weighted combination
                agg_score = 0.5 * max_score + 0.3 * p90_score + 0.2 * mean_score
                file_scores[name].append(agg_score)

        return {name: np.array(scores) for name, scores in file_scores.items()}

    def ensemble_score(self, all_segment_features, file_indices):
        """
        Get ensemble score per file.
        """
        individual_scores = self.score_files(all_segment_features, file_indices)

        # Normalize each
        normalized = {}
        for name, scores in individual_scores.items():
            s_min, s_max = scores.min(), scores.max()
            if s_max > s_min:
                normalized[name] = (scores - s_min) / (s_max - s_min)
            else:
                normalized[name] = np.zeros_like(scores)

        # Weighted ensemble
        weights = {
            'gmm': 1.0,
            'iforest': 1.0,
            'lof': 0.8,
            'subcluster': 1.2,
            'mahal': 1.0,
            'vae': 1.2
        }

        ensemble = sum(weights[k] * normalized[k] for k in weights)
        ensemble /= sum(weights.values())

        return ensemble, individual_scores

print('‚úÖ Advanced ensemble ready!')

‚úÖ Advanced ensemble ready!


## 9. Training Functions

In [13]:
def parse_filename(filename):
    """Parse DCASE filename."""
    basename = os.path.basename(filename)
    section_match = re.search(r'section_(\d+)', basename)
    section = section_match.group(1) if section_match else 'unknown'
    label = 'anomaly' if 'anomaly' in basename else 'normal'
    return {'section': section, 'label': label}


def group_by_section(file_paths):
    """Group files by section."""
    sections = defaultdict(list)
    for path in file_paths:
        info = parse_filename(path)
        sections[info['section']].append(path)
    return dict(sections)


def get_label(filename):
    return 1 if 'anomaly' in filename else 0


def evaluate_detector(detector, test_segment_features, file_indices, y_test, test_name):
    """Evaluate detector on test data."""
    ensemble_scores, individual_scores = detector.ensemble_score(test_segment_features, file_indices)

    results = {}
    for name, scores in individual_scores.items():
        try:
            auc = roc_auc_score(y_test, scores)
        except:
            auc = 0.5
        results[f'{name}_auc'] = auc

    try:
        ensemble_auc = roc_auc_score(y_test, ensemble_scores)
    except:
        ensemble_auc = 0.5
    results['ensemble_auc'] = ensemble_auc

    # Best method
    all_aucs = {k: v for k, v in results.items() if k.endswith('_auc')}
    best_key = max(all_aucs, key=all_aucs.get)
    best_auc = all_aucs[best_key]
    best_method = best_key.replace('_auc', '')

    # Get best scores for accuracy
    if best_method == 'ensemble':
        best_scores = ensemble_scores
    else:
        best_scores = individual_scores[best_method]

    fpr, tpr, thresholds = roc_curve(y_test, best_scores)
    best_idx = np.argmax(tpr - fpr)
    predictions = (best_scores > thresholds[best_idx]).astype(int)
    accuracy = accuracy_score(y_test, predictions)

    results['best_auc'] = best_auc
    results['best_method'] = best_method
    results['accuracy'] = accuracy

    status = '‚úÖ' if best_auc > 0.7 else '‚ö†Ô∏è' if best_auc > 0.6 else '‚ùå'

    print(f'  {test_name}:')
    print(f'    GMM={results["gmm_auc"]:.3f}, IF={results["iforest_auc"]:.3f}, '
          f'LOF={results["lof_auc"]:.3f}, SC={results["subcluster_auc"]:.3f}, '
          f'MH={results["mahal_auc"]:.3f}, VAE={results["vae_auc"]:.3f}')
    print(f'    Ensemble={ensemble_auc:.3f} ‚Üí Best: {best_auc:.3f} ({best_method}) Acc: {accuracy*100:.1f}% {status}')

    return results


def train_section(machine_type, section_id, train_files, source_test, target_test):
    """Train model for a specific section."""
    print(f'\n  üìÇ Section {section_id}: {len(train_files)} files')

    # Extract features with augmentation
    print('  üéµ Extracting training features (with augmentation)...')
    train_seg_feat, train_idx, _ = extract_hybrid_features(
        train_files, panns_model, augment=True, desc=f'Train sec{section_id}'
    )
    print(f'    Segment features: {train_seg_feat.shape}')

    # Train detector
    detector = AdvancedEnsembleDetector()
    detector.fit(train_seg_feat)

    # Filter test files by section
    source_section = [f for f in source_test if f'section_{section_id}' in f]
    target_section = [f for f in target_test if f'section_{section_id}' in f]

    results = {'section': section_id}

    # Evaluate source
    if source_section:
        print(f'\n  üìä Evaluating source_test...')
        src_seg, src_idx, src_paths = extract_hybrid_features(
            source_section, panns_model, augment=False, desc=f'SrcTest sec{section_id}'
        )
        y_source = np.array([get_label(os.path.basename(p)) for p in src_paths])
        # Remap file indices to 0..n-1
        unique_idx = np.unique(src_idx)
        idx_map = {old: new for new, old in enumerate(unique_idx)}
        src_idx_mapped = np.array([idx_map[i] for i in src_idx])
        results['source'] = evaluate_detector(detector, src_seg, src_idx_mapped, y_source, 'source_test')

    # Evaluate target
    if target_section:
        print(f'\n  üìä Evaluating target_test...')
        tgt_seg, tgt_idx, tgt_paths = extract_hybrid_features(
            target_section, panns_model, augment=False, desc=f'TgtTest sec{section_id}'
        )
        y_target = np.array([get_label(os.path.basename(p)) for p in tgt_paths])
        unique_idx = np.unique(tgt_idx)
        idx_map = {old: new for new, old in enumerate(unique_idx)}
        tgt_idx_mapped = np.array([idx_map[i] for i in tgt_idx])
        results['target'] = evaluate_detector(detector, tgt_seg, tgt_idx_mapped, y_target, 'target_test')

    return detector, results


def train_machine(machine_type):
    """Train all sections for a machine type."""
    print(f'\n{"="*70}')
    print(f'üîä {machine_type.upper()} - Advanced Training v2')
    print(f'{"="*70}')

    data_path = os.path.join(BASE_DATA_PATH, machine_type)

    train_files = sorted(glob.glob(os.path.join(data_path, 'train', '*.wav')))
    source_test = sorted(glob.glob(os.path.join(data_path, 'source_test', '*.wav')))
    target_test = sorted(glob.glob(os.path.join(data_path, 'target_test', '*.wav')))

    # Keep only normal training files
    train_files = [f for f in train_files if 'normal' in os.path.basename(f)]

    print(f'üìÅ Train: {len(train_files)}, Source: {len(source_test)}, Target: {len(target_test)}')

    if not train_files:
        return None

    sections = group_by_section(train_files)
    print(f'üìÇ Sections: {list(sections.keys())}')

    all_results = {}
    for section_id in sorted(sections.keys()):
        detector, results = train_section(
            machine_type, section_id, sections[section_id],
            source_test, target_test
        )
        all_results[section_id] = results

    # Save
    os.makedirs(SAVE_PATH, exist_ok=True)
    save_file = os.path.join(SAVE_PATH, f'audio_advanced_v2_{machine_type}.pkl')
    joblib.dump({'results': all_results, 'sections': list(sections.keys())}, save_file)
    print(f'\n‚úÖ Saved: {save_file}')

    return all_results

print('‚úÖ Training functions ready!')

‚úÖ Training functions ready!


## 10. Train All Machines

In [14]:
all_machine_results = {}

for machine in MACHINE_TYPES:
    results = train_machine(machine)
    if results:
        all_machine_results[machine] = results

print(f'\n{"="*70}')
print('üéâ Training Complete!')
print(f'{"="*70}')


üîä FAN - Advanced Training v2
üìÅ Train: 3009, Source: 600, Target: 600
üìÇ Sections: ['00', '01', '02']

  üìÇ Section 00: 1003 files
  üéµ Extracting training features (with augmentation)...


Train sec00: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1003/1003 [19:21<00:00,  1.16s/it]


    Segment features: (75970, 477)
  üìä Preprocessing...
    PCA variance: 94.1%
  üîß Training detectors...
  üéØ Training sub-cluster detector...
  üìè Training Mahalanobis detector...
  üß† Training VAE...
    Epoch 20/100, Loss: 0.9531
    Epoch 40/100, Loss: 0.9092
    Epoch 60/100, Loss: 0.8867
    Epoch 80/100, Loss: 0.8725
    Epoch 100/100, Loss: 0.8636
  ‚úÖ Ensemble training complete!

  üìä Evaluating source_test...


SrcTest sec00: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [02:01<00:00,  1.65it/s]


  source_test:
    GMM=0.591, IF=0.511, LOF=0.642, SC=0.608, MH=0.517, VAE=0.517
    Ensemble=0.564 ‚Üí Best: 0.642 (lof) Acc: 64.5% ‚ö†Ô∏è

  üìä Evaluating target_test...


TgtTest sec00: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [02:00<00:00,  1.66it/s]


  target_test:
    GMM=0.645, IF=0.563, LOF=0.584, SC=0.610, MH=0.605, VAE=0.554
    Ensemble=0.603 ‚Üí Best: 0.645 (gmm) Acc: 64.0% ‚ö†Ô∏è

  üìÇ Section 01: 1003 files
  üéµ Extracting training features (with augmentation)...


Train sec01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1003/1003 [18:07<00:00,  1.08s/it]


    Segment features: (75953, 477)
  üìä Preprocessing...
    PCA variance: 94.2%
  üîß Training detectors...
  üéØ Training sub-cluster detector...
  üìè Training Mahalanobis detector...
  üß† Training VAE...
    Epoch 20/100, Loss: 0.9521
    Epoch 40/100, Loss: 0.9055
    Epoch 60/100, Loss: 0.8809
    Epoch 80/100, Loss: 0.8642
    Epoch 100/100, Loss: 0.8614
  ‚úÖ Ensemble training complete!

  üìä Evaluating source_test...


SrcTest sec01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:59<00:00,  1.67it/s]


  source_test:
    GMM=0.571, IF=0.542, LOF=0.588, SC=0.595, MH=0.531, VAE=0.537
    Ensemble=0.557 ‚Üí Best: 0.595 (subcluster) Acc: 60.0% ‚ùå

  üìä Evaluating target_test...


TgtTest sec01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:55<00:00,  1.73it/s]


  target_test:
    GMM=0.501, IF=0.523, LOF=0.471, SC=0.527, MH=0.551, VAE=0.502
    Ensemble=0.519 ‚Üí Best: 0.551 (mahal) Acc: 56.0% ‚ùå

  üìÇ Section 02: 1003 files
  üéµ Extracting training features (with augmentation)...


Train sec02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1003/1003 [18:01<00:00,  1.08s/it]


    Segment features: (75762, 477)
  üìä Preprocessing...
    PCA variance: 94.3%
  üîß Training detectors...
  üéØ Training sub-cluster detector...
  üìè Training Mahalanobis detector...
  üß† Training VAE...
    Epoch 20/100, Loss: 0.9593
    Epoch 40/100, Loss: 0.9025
    Epoch 60/100, Loss: 0.8763
    Epoch 80/100, Loss: 0.8683
    Epoch 100/100, Loss: 0.8586
  ‚úÖ Ensemble training complete!

  üìä Evaluating source_test...


SrcTest sec02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:53<00:00,  1.76it/s]


  source_test:
    GMM=0.602, IF=0.561, LOF=0.665, SC=0.628, MH=0.553, VAE=0.562
    Ensemble=0.608 ‚Üí Best: 0.665 (lof) Acc: 63.5% ‚ö†Ô∏è

  üìä Evaluating target_test...


TgtTest sec02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:52<00:00,  1.78it/s]


  target_test:
    GMM=0.492, IF=0.493, LOF=0.550, SC=0.490, MH=0.462, VAE=0.489
    Ensemble=0.493 ‚Üí Best: 0.550 (lof) Acc: 57.5% ‚ùå

‚úÖ Saved: /content/drive/MyDrive/MaintanenceAI/audio_advanced_v2_fan.pkl

üîä PUMP - Advanced Training v2
üìÅ Train: 3009, Source: 600, Target: 600
üìÇ Sections: ['00', '01', '02']

  üìÇ Section 00: 1003 files
  üéµ Extracting training features (with augmentation)...


Train sec00: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1003/1003 [18:54<00:00,  1.13s/it]


    Segment features: (75718, 477)
  üìä Preprocessing...
    PCA variance: 93.8%
  üîß Training detectors...
  üéØ Training sub-cluster detector...
  üìè Training Mahalanobis detector...
  üß† Training VAE...
    Epoch 20/100, Loss: 0.9717
    Epoch 40/100, Loss: 0.9199
    Epoch 60/100, Loss: 0.8946
    Epoch 80/100, Loss: 0.8786
    Epoch 100/100, Loss: 0.8756
  ‚úÖ Ensemble training complete!

  üìä Evaluating source_test...


SrcTest sec00: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:51<00:00,  1.79it/s]


  source_test:
    GMM=0.621, IF=0.569, LOF=0.636, SC=0.635, MH=0.503, VAE=0.604
    Ensemble=0.600 ‚Üí Best: 0.636 (lof) Acc: 62.5% ‚ö†Ô∏è

  üìä Evaluating target_test...


TgtTest sec00: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:53<00:00,  1.77it/s]


  target_test:
    GMM=0.523, IF=0.517, LOF=0.486, SC=0.516, MH=0.594, VAE=0.514
    Ensemble=0.531 ‚Üí Best: 0.594 (mahal) Acc: 57.0% ‚ùå

  üìÇ Section 01: 1003 files
  üéµ Extracting training features (with augmentation)...


Train sec01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1003/1003 [18:08<00:00,  1.08s/it]


    Segment features: (75926, 477)
  üìä Preprocessing...
    PCA variance: 93.7%
  üîß Training detectors...
  üéØ Training sub-cluster detector...
  üìè Training Mahalanobis detector...
  üß† Training VAE...
    Epoch 20/100, Loss: 0.9728
    Epoch 40/100, Loss: 0.9316
    Epoch 60/100, Loss: 0.9045
    Epoch 80/100, Loss: 0.8813
    Epoch 100/100, Loss: 0.8817
  ‚úÖ Ensemble training complete!

  üìä Evaluating source_test...


SrcTest sec01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:55<00:00,  1.74it/s]


  source_test:
    GMM=0.613, IF=0.524, LOF=0.624, SC=0.580, MH=0.536, VAE=0.533
    Ensemble=0.576 ‚Üí Best: 0.624 (lof) Acc: 62.5% ‚ö†Ô∏è

  üìä Evaluating target_test...


TgtTest sec01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:55<00:00,  1.73it/s]


  target_test:
    GMM=0.456, IF=0.443, LOF=0.480, SC=0.459, MH=0.499, VAE=0.442
    Ensemble=0.454 ‚Üí Best: 0.499 (mahal) Acc: 56.0% ‚ùå

  üìÇ Section 02: 1003 files
  üéµ Extracting training features (with augmentation)...


Train sec02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1003/1003 [17:52<00:00,  1.07s/it]


    Segment features: (75728, 477)
  üìä Preprocessing...
    PCA variance: 93.8%
  üîß Training detectors...
  üéØ Training sub-cluster detector...
  üìè Training Mahalanobis detector...
  üß† Training VAE...
    Epoch 20/100, Loss: 0.9666
    Epoch 40/100, Loss: 0.9108
    Epoch 60/100, Loss: 0.8893
    Epoch 80/100, Loss: 0.8751
    Epoch 100/100, Loss: 0.8750
  ‚úÖ Ensemble training complete!

  üìä Evaluating source_test...


SrcTest sec02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:56<00:00,  1.71it/s]


  source_test:
    GMM=0.625, IF=0.530, LOF=0.636, SC=0.610, MH=0.495, VAE=0.568
    Ensemble=0.578 ‚Üí Best: 0.636 (lof) Acc: 62.0% ‚ö†Ô∏è

  üìä Evaluating target_test...


TgtTest sec02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:53<00:00,  1.76it/s]


  target_test:
    GMM=0.559, IF=0.488, LOF=0.589, SC=0.564, MH=0.491, VAE=0.518
    Ensemble=0.530 ‚Üí Best: 0.589 (lof) Acc: 59.5% ‚ùå

‚úÖ Saved: /content/drive/MyDrive/MaintanenceAI/audio_advanced_v2_pump.pkl

üîä VALVE - Advanced Training v2
üìÅ Train: 3009, Source: 600, Target: 600
üìÇ Sections: ['00', '01', '02']

  üìÇ Section 00: 1003 files
  üéµ Extracting training features (with augmentation)...


Train sec00: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1003/1003 [18:35<00:00,  1.11s/it]


    Segment features: (75979, 477)
  üìä Preprocessing...
    PCA variance: 93.4%
  üîß Training detectors...
  üéØ Training sub-cluster detector...
  üìè Training Mahalanobis detector...
  üß† Training VAE...
    Epoch 20/100, Loss: 0.9627
    Epoch 40/100, Loss: 0.9140
    Epoch 60/100, Loss: 0.8849
    Epoch 80/100, Loss: 0.8703
    Epoch 100/100, Loss: 0.8633
  ‚úÖ Ensemble training complete!

  üìä Evaluating source_test...


SrcTest sec00: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:53<00:00,  1.77it/s]


  source_test:
    GMM=0.468, IF=0.365, LOF=0.487, SC=0.477, MH=0.245, VAE=0.380
    Ensemble=0.358 ‚Üí Best: 0.487 (lof) Acc: 52.5% ‚ùå

  üìä Evaluating target_test...


TgtTest sec00: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:51<00:00,  1.79it/s]


  target_test:
    GMM=0.444, IF=0.389, LOF=0.526, SC=0.502, MH=0.296, VAE=0.396
    Ensemble=0.397 ‚Üí Best: 0.526 (lof) Acc: 56.0% ‚ùå

  üìÇ Section 01: 1003 files
  üéµ Extracting training features (with augmentation)...


Train sec01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1003/1003 [17:50<00:00,  1.07s/it]


    Segment features: (75846, 477)
  üìä Preprocessing...
    PCA variance: 93.3%
  üîß Training detectors...
  üéØ Training sub-cluster detector...
  üìè Training Mahalanobis detector...
  üß† Training VAE...
    Epoch 20/100, Loss: 0.9868
    Epoch 40/100, Loss: 0.9398
    Epoch 60/100, Loss: 0.9121
    Epoch 80/100, Loss: 0.8981
    Epoch 100/100, Loss: 0.8961
  ‚úÖ Ensemble training complete!

  üìä Evaluating source_test...


SrcTest sec01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:54<00:00,  1.74it/s]


  source_test:
    GMM=0.521, IF=0.451, LOF=0.598, SC=0.475, MH=0.441, VAE=0.494
    Ensemble=0.480 ‚Üí Best: 0.598 (lof) Acc: 61.5% ‚ùå

  üìä Evaluating target_test...


TgtTest sec01: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:52<00:00,  1.77it/s]


  target_test:
    GMM=0.483, IF=0.414, LOF=0.483, SC=0.502, MH=0.508, VAE=0.377
    Ensemble=0.464 ‚Üí Best: 0.508 (mahal) Acc: 55.0% ‚ùå

  üìÇ Section 02: 1003 files
  üéµ Extracting training features (with augmentation)...


Train sec02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1003/1003 [18:04<00:00,  1.08s/it]


    Segment features: (75864, 477)
  üìä Preprocessing...
    PCA variance: 94.1%
  üîß Training detectors...
  üéØ Training sub-cluster detector...
  üìè Training Mahalanobis detector...
  üß† Training VAE...
    Epoch 20/100, Loss: 0.9343
    Epoch 40/100, Loss: 0.8895
    Epoch 60/100, Loss: 0.8590
    Epoch 80/100, Loss: 0.8457
    Epoch 100/100, Loss: 0.8429
  ‚úÖ Ensemble training complete!

  üìä Evaluating source_test...


SrcTest sec02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:53<00:00,  1.76it/s]


  source_test:
    GMM=0.647, IF=0.547, LOF=0.658, SC=0.597, MH=0.543, VAE=0.535
    Ensemble=0.600 ‚Üí Best: 0.658 (lof) Acc: 61.5% ‚ö†Ô∏è

  üìä Evaluating target_test...


TgtTest sec02: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [01:53<00:00,  1.76it/s]


  target_test:
    GMM=0.467, IF=0.435, LOF=0.470, SC=0.489, MH=0.472, VAE=0.424
    Ensemble=0.470 ‚Üí Best: 0.489 (subcluster) Acc: 52.5% ‚ùå

‚úÖ Saved: /content/drive/MyDrive/MaintanenceAI/audio_advanced_v2_valve.pkl

üéâ Training Complete!


## 11. Results Summary

In [15]:
print('\nüìã Final Results Summary:\n')
print(f'{"Machine":<8} {"Sec":<5} {"Test":<8} {"GMM":<6} {"IF":<6} {"LOF":<6} {"SC":<6} {"MH":<6} {"VAE":<6} {"Ens":<6} {"Best":<6} {"Acc":<6}')
print('-' * 95)

all_aucs = []
for machine, sec_results in all_machine_results.items():
    for sec_id, results in sec_results.items():
        for test_type in ['source', 'target']:
            if test_type in results:
                m = results[test_type]
                all_aucs.append(m['best_auc'])
                print(
                    f'{machine:<8} {sec_id:<5} {test_type:<8} '
                    f'{m["gmm_auc"]:<6.3f} {m["iforest_auc"]:<6.3f} {m["lof_auc"]:<6.3f} '
                    f'{m["subcluster_auc"]:<6.3f} {m["mahal_auc"]:<6.3f} {m["vae_auc"]:<6.3f} '
                    f'{m["ensemble_auc"]:<6.3f} {m["best_auc"]:<6.3f} {m["accuracy"]*100:<5.1f}%'
                )

print(f'\nüéØ Average AUC: {np.mean(all_aucs):.4f}')
print(f'üìä Range: {np.min(all_aucs):.4f} - {np.max(all_aucs):.4f}')

# Count successes
good = sum(1 for a in all_aucs if a > 0.7)
moderate = sum(1 for a in all_aucs if 0.6 < a <= 0.7)
poor = sum(1 for a in all_aucs if a <= 0.6)
print(f'\n‚úÖ Good (>0.7): {good}, ‚ö†Ô∏è Moderate (0.6-0.7): {moderate}, ‚ùå Poor (<0.6): {poor}')


üìã Final Results Summary:

Machine  Sec   Test     GMM    IF     LOF    SC     MH     VAE    Ens    Best   Acc   
-----------------------------------------------------------------------------------------------
fan      00    source   0.591  0.511  0.642  0.608  0.517  0.517  0.564  0.642  64.5 %
fan      00    target   0.645  0.563  0.584  0.610  0.605  0.554  0.603  0.645  64.0 %
fan      01    source   0.571  0.542  0.588  0.595  0.531  0.537  0.557  0.595  60.0 %
fan      01    target   0.501  0.523  0.471  0.527  0.551  0.502  0.519  0.551  56.0 %
fan      02    source   0.602  0.561  0.665  0.628  0.553  0.562  0.608  0.665  63.5 %
fan      02    target   0.492  0.493  0.550  0.490  0.462  0.489  0.493  0.550  57.5 %
pump     00    source   0.621  0.569  0.636  0.635  0.503  0.604  0.600  0.636  62.5 %
pump     00    target   0.523  0.517  0.486  0.516  0.594  0.514  0.531  0.594  57.0 %
pump     01    source   0.613  0.524  0.624  0.580  0.536  0.533  0.576  0.624  62.5 %
pump