In [None]:
import librosa
import numpy as np
import soundfile as sf
import os

def normalize_audio(y):
    y_normalized = librosa.util.normalize(y)
    return y_normalized

def segment_and_filter(input_path, output_dir, segment_length=8, energy_threshold=-40):
    # Load audio
    y, sr = librosa.load(input_path, sr=None)

    # Split into x-second segments
    segment_samples = sr * segment_length
    segments = [y[i:i+segment_samples] for i in range(0, len(y), segment_samples)]

    # Save valid segments
    valid_segments = []
    for i, seg in enumerate(segments):
        # Compute energy (dB)
        rms = librosa.feature.rms(y=seg)
        db = librosa.amplitude_to_db(rms)
        avg_db = np.mean(db)

        print(avg_db)

        # Check energy threshold
        if avg_db > energy_threshold:
            valid_segments.append(seg)
            sf.write(f"{output_dir}/segment_{i}.flac", seg, sr)

    return valid_segments

def augment_cv(audio, sr, noise_path=None):
    augmented = []

    # Time-domain: Reverse audio
    reversed_audio = audio[::-1]
    augmented.append(reversed_audio)

    # Speed change (0.9x and 1.1x)
    for rate in [0.9, 1.1]:
        y_stretch = librosa.effects.time_stretch(audio, rate=rate)
        augmented.append(y_stretch)

    # Add background noise
    if noise_path:
        noise, _ = librosa.load(noise_path, sr=sr)
        min_len = min(len(audio), len(noise))
        mixed = audio[:min_len] + 0.2 * noise[:min_len]  # Adjust noise level
        augmented.append(mixed)

    return augmented

In [None]:
from rockpool.devices.xylo.syns65302 import AFESimPDM
from rockpool.devices.xylo.syns65302 import AFESimExternal

def extract_features(audio_path, output_dir):
    # Load audio and preprocess
    y, sr = librosa.load(audio_path, sr=16000)  # Resample to 16kHz if needed

    # Initialize AFESim
    afe = AFESim(sampling_rate=sr)

    # Extract features
    features = afe(y)

    # Save as .npy
    np.save(f"{output_dir}/{os.path.basename(audio_path)}.npy", features)

  from .autonotebook import tqdm as notebook_tqdm
  from .autonotebook import tqdm as notebook_tqdm



ModuleNotFoundError: No module named 'imp'

In [2]:
def encode_spikes(feature_matrix):
    # Use AFESim's built-in encoding (pseudo-code)
    spike_matrix = AFESim.encode_spikes(feature_matrix)
    return spike_matrix

In [13]:
#in_path = "/home/timon-l/devel/CITS5551/10700792/loc4/loc4/train/00006.flac"
#out_dir = "/home/timon-l/devel/CITS5551/10700792/loc4/loc4/train/output"
in_path = "/home/timon-l/devel/CITS5551/10700792/loc4/loc4/val/00009.flac"
out_dir = "/home/timon-l/devel/CITS5551/10700792/loc4/loc4/val/output"

segment_and_filter(in_path, out_dir, energy_threshold=-60)

-65.05963
-55.305115
-55.338783
-59.538254
-58.140827
-56.931057
-53.024204
-52.632557


[array([0.00048828, 0.00086212, 0.0005188 , ..., 0.00193024, 0.00241852,
        0.00086212], dtype=float32),
 array([ 0.00336456,  0.00157166,  0.00111389, ...,  0.00019836,
        -0.00034332,  0.00023651], dtype=float32),
 array([-2.3651123e-04, -1.5258789e-04,  9.9182129e-05, ...,
         1.6784668e-04, -3.8146973e-04, -3.2043457e-04], dtype=float32),
 array([-0.00041962, -0.00060272, -0.00082397, ..., -0.00131989,
        -0.00119781, -0.00119019], dtype=float32),
 array([-0.00048065, -0.001297  , -0.00126648, ...,  0.00026703,
         0.00047302, -0.00041199], dtype=float32),
 array([-0.00022888,  0.00048828,  0.00021362, ...,  0.00891876,
         0.01920319,  0.00262451], dtype=float32),
 array([-7.9345703e-04,  9.3002319e-03, -7.6980591e-03, ...,
        -5.5694580e-04, -6.8664551e-05, -6.8664551e-05], dtype=float32)]