In [6]:
# ======================
# IMPORTS AND SETUP
# ======================
import numpy as np
import pandas as pd
import os
import itertools
from sklearn.preprocessing import StandardScaler
from sklearn.multioutput import MultiOutputClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import faiss

In [7]:
# ======================
# DATA CONFIGURATION
# ======================
categories = ['Airplane', 'Alarm', 'Beep/Bleep', 'Bell', 'Bicycle', 'Bird Chirp', 'Bus', 'Car', 'Cat Meow',
        'Chainsaw', 'Clapping', 'Cough', 'Cow Moo', 'Cowbell', 'Crying', 'Dog Bark', 'Doorbell', 'Drip',
        'Drums', 'Fire', 'Footsteps', 'Guitar', 'Hammer', 'Helicopter', 'Hiccup', 'Horn Honk', 'Horse Neigh',
        'Insect Buzz', 'Jackhammer', 'Laughter', 'Lawn Mower', 'Motorcycle', 'Piano', 'Pig Oink', 'Power Drill',
        'Power Saw', 'Rain', 'Rooster Crow', 'Saxophone', 'Sewing Machine', 'Sheep/Goat Bleat', 'Ship/Boat',
        'Shout', 'Singing', 'Siren', 'Sneeze', 'Snoring', 'Speech', 'Stream/River', 'Thunder', 'Train', 'Truck',
        'Trumpet', 'Vacuum Cleaner', 'Violin', 'Washing Machine', 'Waves', 'Wind']

In [8]:
# ======================
# DATA LOADING
# ======================
try:
    print("Loading metadata and annotations...")
    ann_df = pd.read_csv('annotations.csv')
    meta_df = pd.read_csv('metadata.csv')
    
    # Split filenames
    train_filename = meta_df.sample(len(meta_df), random_state=42)['filename'].unique()[:int(len(meta_df)*0.7)]
    validation_filename = meta_df.sample(len(meta_df), random_state=42)['filename'].unique()[int(len(meta_df)*0.7):int(len(meta_df)*0.9)]
    test_filename = meta_df.sample(len(meta_df), random_state=42)['filename'].unique()[int(len(meta_df)*0.9):len(meta_df)]
    
    print("Data loading completed successfully!")
except Exception as e:
    print(f"Error loading data: {e}")
    raise

Loading metadata and annotations...
Data loading completed successfully!


In [9]:
# ======================
# HELPER FUNCTIONS
# ======================
def aggregate_labels(file_labels):
    __y = []
    for frame_labels in file_labels:
        if sum(frame_labels) == 0:
            __y.append([0])
        elif np.count_nonzero(frame_labels) == len(frame_labels):
             __y.append([1])
        else:
            __y.append([np.random.choice(frame_labels)])
    return __y

def read_files(file_names, features_dir='audio_features', labels_dir='labels'):
    X = []
    Y = {c: [] for c in categories}
    
    for f in file_names:
        try:
            # Load features
            features_mel = np.load(os.path.join(features_dir, f.split('.')[0] + '.npz'))["melspectrogram"]
            features_mfcc = np.load(os.path.join(features_dir, f.split('.')[0] + '.npz'))["mfcc"]
            
            # Process features
            min_length = min(features_mel.shape[0], features_mfcc.shape[0])
            features = np.concatenate([features_mel[:min_length], features_mfcc[:min_length]], axis=1)
            X.append(features)
            
            # Process labels
            y = np.load(os.path.join(labels_dir, f.split('.')[0] + '_labels.npz'))
            for c in categories:
                _y = aggregate_labels(y[c])
                Y[c].extend(list(itertools.chain.from_iterable(_y))[:min_length])
        except Exception as e:
            print(f"Error processing file {f}: {e}")
            continue
    
    return np.concatenate(X), np.array([Y[cls] for cls in categories]).T

In [10]:
# ======================
# DATA PREPROCESSING
# ======================
try:
    print("\nLoading and preprocessing data...")
    train_x, train_y = read_files(train_filename)
    val_x, val_y = read_files(validation_filename)
    test_x, test_y = read_files(test_filename)
    
    # Scale features
    scaler = StandardScaler()
    scaled_train_x = scaler.fit_transform(train_x)
    scaled_val_x = scaler.transform(val_x)
    scaled_test_x = scaler.transform(test_x)
    
    print("Data preprocessing completed!")
except Exception as e:
    print(f"Error during preprocessing: {e}")
    raise


Loading and preprocessing data...
Data preprocessing completed!


In [14]:
# ======================
# FAISS IMPLEMENTATION
# ======================
try:
    print("\nRunning FAISS similarity search...")
    
    # Prepare data
    train_vectors = scaled_train_x.astype('float32')
    test_vectors = scaled_test_x.astype('float32')
    
    # Build index
    index = faiss.IndexFlatL2(train_vectors.shape[1])
    index.add(train_vectors)
    
    # Search neighbors
    k = 5
    D, I = index.search(test_vectors, k)
    
    # Aggregate predictions
    neighbor_labels = np.array([train_y[neighbors] for neighbors in I])
    avg_labels = neighbor_labels.mean(axis=1)
    y_pred_faiss = (avg_labels >= 0.5).astype(int)
    
    # Convert to proper label format
    from sklearn.preprocessing import MultiLabelBinarizer
    mlb = MultiLabelBinarizer(classes=range(len(categories)))
    
    # Convert predictions to list of lists format
    y_pred_list = [[i for i, val in enumerate(row) if val == 1] for row in y_pred_faiss]
    y_true_list = [[i for i, val in enumerate(row) if val == 1] for row in test_y]
    
    # Binarize both true and predicted labels
    y_true_bin = mlb.fit_transform(y_true_list)
    y_pred_bin = mlb.transform(y_pred_list)
    
    # Evaluate
    print("\nFAISS Classification Report:")
    print(classification_report(y_true_bin, y_pred_bin, 
                              target_names=categories,
                              zero_division=0))
    
    # Diagnostic information
    print("\nLabel statistics:")
    print(f"True labels shape: {y_true_bin.shape}")
    print(f"Predicted labels shape: {y_pred_bin.shape}")
    print(f"Sample true labels: {y_true_list[:3]}")
    print(f"Sample predicted labels: {y_pred_list[:3]}")
    
except Exception as e:
    print(f"Error during FAISS processing: {e}")
    print(f"Test y shape: {test_y.shape if 'test_y' in locals() else 'N/A'}")
    print(f"Predicted shape: {y_pred_faiss.shape if 'y_pred_faiss' in locals() else 'N/A'}")


Running FAISS similarity search...

FAISS Classification Report:
                  precision    recall  f1-score   support

        Airplane       0.38      0.27      0.31      4260
           Alarm       0.05      0.01      0.01      2180
      Beep/Bleep       0.05      0.02      0.02      1846
            Bell       0.38      0.06      0.11      4445
         Bicycle       0.12      0.03      0.04       966
      Bird Chirp       0.44      0.19      0.26     10341
             Bus       0.08      0.02      0.03      3593
             Car       0.28      0.16      0.20     10091
        Cat Meow       0.20      0.02      0.04      1284
        Chainsaw       0.72      0.12      0.21      1982
        Clapping       0.52      0.46      0.49      2417
           Cough       0.00      0.00      0.00        85
         Cow Moo       0.02      0.00      0.00       649
         Cowbell       0.63      0.14      0.22       871
          Crying       0.42      0.11      0.17      1786
     

In [None]:
# ======================
# RANDOM FOREST IMPLEMENTATION 
# ======================
try:
    print("\nTraining Random Forest model...")
    
    RF = RandomForestClassifier(n_estimators=50, max_depth=10)
    RF_multi = MultiOutputClassifier(RF)
    RF_multi.fit(scaled_train_x, train_y)
    
    # Evaluate
    y_pred_rf = RF_multi.predict(scaled_test_x)
    print("\nRandom Forest Classification Report:")
    print(classification_report(test_y.astype(int), y_pred_rf.astype(int), target_names=categories))
    
except Exception as e:
    print(f"Error during Random Forest processing: {e}")

print("\nAll operations completed!")


Training Random Forest model...
