In [None]:
import torchaudio
librispeech_test = torchaudio.datasets.LIBRISPEECH(".", url="test-clean", download=True)

In [None]:
!pip install pyrubberband
!pip install torch-time-stretch

Collecting torch-time-stretch
  Using cached torch_time_stretch-1.0.3-py3-none-any.whl (4.6 kB)
Collecting primePy>=1.3 (from torch-time-stretch)
  Using cached primePy-1.3-py3-none-any.whl (4.0 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.7.0->torch-time-stretch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.7.0->torch-time-stretch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.7.0->torch-time-stretch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.7.0->torch-time-stretch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.7.0->torch-time-stretch)
  Using cached nvidia_cublas_cu12-12.1.

In [None]:
import os
from scipy import signal
import pyrubberband as pyrb
import torch
import torchaudio
import torchaudio.transforms as T
from torch_time_stretch import time_stretch
import numpy as np
# import shutil
import tempfile
import subprocess
import soundfile as sf

def read_audio(filepath, fs=16000,  mono=True, normalize=False, preemphasis=False):
    """
    Reads audio file stored at <filepath>
    Parameters:
        filepath (str): audio file path
        fs (int, optional): samping rate
        mono (boolean, optional): return single channel
        normalize(boolean, optional): peak normalization of signal
        preemphasis (boolean, optional): apply pre-emphasis filter
    Returns:
        waveform (tensor): audio signal, dim(N,)
    """
    assert isinstance(filepath, str), "filepath must be specified as string"
    assert os.path.exists(filepath), f"{filepath} does not exist."

    try:
        waveform, sr = torchaudio.load(filepath)
        # mono channel
        if waveform.shape[0] == 2 and mono is True: waveform = waveform[0]
        else: waveform = waveform.reshape(-1)
        # preemphasis
        if preemphasis:
            waveform = pre_emphasis(waveform)
        # resample
        if sr != fs:
            resampler = T.Resample(sr, fs, dtype=waveform.dtype)
            waveform = resampler(waveform)
        # normalize
        if normalize:
            waveform = rms_normalize(waveform)
        return waveform
    except Exception as e:
        return None


def peak_normalize(waveform):
    """
    Peak normalizes the <waveform>
    Parameter:
        waveform (tensor): waveform, dims: (N,)
    """
    return waveform/torch.max(torch.abs(waveform))


def rms_normalize(waveform, r=-10):
    """
    RMS-normalization of  <waveform>
    Parameter:
        waveform (tensor): waveform, dims: (N,)
        rms (float): rms in dB
    """
    current_rms = torch.pow(torch.mean(torch.pow(waveform,2)) ,0.5)
    scaling_factor = (10**(r/10))/current_rms
    return waveform*scaling_factor


def pre_emphasis(waveform, coeff=0.97):
    filtered_sig = torch.empty_like(waveform)
    filtered_sig[1:] = waveform[1:] - coeff*waveform[:-1]
    filtered_sig[0] = waveform[0]
    return filtered_sig


def add_time_stretch(audio, fs, stretch_rate):
    """
    Adds time stretch to <clean> audio by <stretch_rate> factor.
    Parameters:
        audio (tensor): waveform, dims: (N,)
        fs (float): audio sample rate
        stretch_rate (float): playback rate
    Returns:
        audio_stretch (tensor): time stretched waveform dims: (N*<stretch_rate>,)

    """
    audio_stretch = time_stretch(audio.unsqueeze(0).unsqueeze(0), 1/stretch_rate, fs)
    # assert len(audio)/stretch_rate == len(audio_stretch), f"stretched audio length mismatch. Expected {len(audio)*stretch_rate}, got {len(audio_stretch)}"
    return audio_stretch.squeeze_()


def add_pitch_shift_rb(y, sr, shift, tmpdir=None):
    """
    Adds pitch shift to <y> audio sampled at <sr> by <shift> semitones. It calls rubberband package directly; does not use pyrubberband package.
    """
    if isinstance(y, np.ndarray) is False:
        y = y.numpy()

    if tmpdir is not None:
        tempfile.tempdir = tmpdir

    # Get the input and output tempfile
    fd, infile = tempfile.mkstemp(suffix='.wav')
    os.close(fd)
    fd, outfile = tempfile.mkstemp(suffix='.wav')
    os.close(fd)

    sf.write(infile, y, sr)
    command = ["rubberband", "-q", "--pitch", str(shift), infile, outfile]
    subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    y_out, _ = sf.read(outfile, always_2d=True, dtype=y.dtype)
    if y.ndim == 1:
        y_out = np.squeeze(y_out)

    os.unlink(infile)
    os.unlink(outfile)
    return torch.from_numpy(y_out).type(torch.float32)


def add_pitch_shift(audio, fs, semitone_shift, use_rb=True):
    """
    Adds pitch shift to <clean> audio by <semitone_shit> semitones.
    Parameters:
        audio (tensor): clean waveform, dims: (N,)
        fs (float): audio sample rate
        semitone_shift (float): semitones (can be between -12 and 12, but keep it low for good sound quality)
    Returns:
        audio_shift: pitch shifted added signal (tensor), dims: (N,)

    """
    if use_rb:
        audio_shift = pyrb.pitch_shift(audio.numpy(), fs, semitone_shift)
        return torch.from_numpy(audio_shift).type(torch.float32)
    else:
        audio_shift = torchaudio.functional.pitch_shift(audio, fs, semitone_shift)
        return audio_shift

def add_noise(audio, noise, snr):
    """
    Adds background <noise> to <clean> signal at desired <SNR> level
    Parameters:
        audio (tensor): clean waveform, dims: (N,)
        noise (tensor): noise waveform, dims: (M,)
        snr (int): SNR level in dB
    Returns:
        noisy_audio: noisy signal (tensor), dims: (N,)
    """
    # make equal lengths for clean and noise signals
    if len(audio) >= len(noise):
        reps = torch.ceil(torch.tensor(len(audio)/len(noise))).int()
        noise = torch.tile(noise, (reps,))[:len(audio)]
    else:
        start_idx = torch.randint(len(noise) - len(audio), (1,))
        noise = noise[start_idx:start_idx+len(audio)]

    assert len(noise) == len(audio), f"noise signal {len(noise)} and clean signal {len(audio)} length mismatch"

    # add noise at desired snr
    audio_rms = torch.mean(torch.pow(audio, 2))
    noise_rms = torch.mean(torch.pow(noise, 2))
    factor = torch.pow((audio_rms/noise_rms)/torch.pow(torch.tensor(10), (snr/10)), 0.5)
    noise = factor*noise
    noisy_audio = audio + noise
    assert 10*torch.log10(audio_rms/torch.mean(torch.pow(noise, 2))) - snr < 1e-4, f"snr mismatch {10*torch.log10(audio_rms/torch.mean(torch.pow(noise, 2))), snr, len(audio), len(noise), audio_rms, torch.mean(torch.pow(noise, 2)), noise_rms, factor, audio, torch.count_nonzero(audio)}"
    return noisy_audio


def add_reverb(clean, rir):
    """
    Filters <clean> signal with <rir> to get reverberation effect
    Parameters:
        clean (tensor): clean waveform, dims: (N,)
        rir (tensor): room impulse response, dims: (M,)
    Returns:
        reverb added signal (tensor), dims: (N,)
    """
    clean = clean.numpy()
    rir = rir.numpy()
    rir = rir/np.linalg.norm(rir)
    # filering
    p_max = np.argmax(np.abs(rir))
    filtered_clean = signal.convolve(clean, rir, mode="full")

    # time offset
    e = np.empty_like(filtered_clean, dtype=np.float32)
    e[-p_max:] = 0.0
    e[:-p_max] = filtered_clean[p_max:]
    # filtered_clean = e.copy()
    # e=None
    filtered_clean = e[:len(clean)]
    assert(len(filtered_clean)==len(clean))
    filtered_clean = torch.from_numpy(filtered_clean)
    return filtered_clean


def add_noise_reverb(audio, noise, snr, rir):
    """
    Adds background <noise> at desired <snr> level and reveberation using <rir> to <clean> signal
    Parameters:
        audio (tensor): clean waveform, dims: (N,)
        noise (tensor): noise waveform, dims: (M,)
        snr (int): SNR level in dB
        rir (tensor): room impulse response, dims: (M,)
    Returns:
        noise and reverb added signal (tensor), dims: (N,)
    """
    audio_reverb = add_reverb(audio, rir)
    noise_reverb = add_reverb(noise, rir)
    noise_reverb_clean = add_noise(audio_reverb, noise_reverb, snr)
    return noise_reverb_clean

In [None]:
import glob

# Load dataset using glob
flac_files = glob.glob("/content/LibriSpeech/test-clean/**/*.flac", recursive=True)
files_subset = flac_files[:40]

In [None]:
from transformers import AutoProcessor, WavLMModel
import torch

# Load pre-trained processor and model
processor = AutoProcessor.from_pretrained("patrickvonplaten/wavlm-libri-clean-100h-base-plus")
model = WavLMModel.from_pretrained("patrickvonplaten/wavlm-libri-clean-100h-base-plus")

preprocessor_config.json:   0%|          | 0.00/215 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/294 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/2.27k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/23.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


pytorch_model.bin:   0%|          | 0.00/378M [00:00<?, ?B/s]

Some weights of WavLMModel were not initialized from the model checkpoint at patrickvonplaten/wavlm-libri-clean-100h-base-plus and are newly initialized: ['wavlm.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wavlm.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Function to get hidden states
def get_hidden_states(audio_file_path):
    # Read audio file
    speech = read_audio(audio_file_path)  # Assuming you have a function to read audio files
    # Process input using processor
    inputs = processor(speech, sampling_rate=16000, return_tensors="pt")
    # Forward pass through the model
    with torch.no_grad():
        outputs = model(**inputs)
    # Get last hidden states
    last_hidden_states = outputs.last_hidden_state
    return last_hidden_states

In [None]:
import numpy as np

# Initialize an empty list to store all flattened arrays
all_flattened_arrays = []

# Iterate over each file path
for file_path in files_subset:
    #print("File:", file_path)
    hidden_states = get_hidden_states(file_path)
    hidden_states_np = hidden_states.detach().cpu().numpy()  # Convert to NumPy array
    #print(hidden_states_np.shape)
    #print(hidden_states_np)

    # Flatten the 3D array to make it 2D
    flattened_array = hidden_states_np.reshape(-1, hidden_states_np.shape[-1])

    # Append the flattened array to the list
    all_flattened_arrays.append(flattened_array)

# Convert the list of flattened arrays into a 2D NumPy array
all_flattened_arrays_stacked = np.vstack(all_flattened_arrays)

# Now 'all_flattened_arrays_stacked' is a 2D NumPy array containing all flattened arrays
# Print the 2D output
print("2D Output:")
print(all_flattened_arrays_stacked)

2D Output:
[[ 0.22573815 -0.32241032  0.26583418 ...  0.39793864 -0.11666549
  -0.6712432 ]
 [ 0.22179335 -0.33211088  0.26335183 ...  0.39563006 -0.10724224
  -0.64132327]
 [ 0.20467713 -0.32480022  0.2577814  ...  0.36006016 -0.11212713
  -0.6480782 ]
 ...
 [ 0.26904282 -0.40374023  0.24354921 ...  0.3931742  -0.10734152
  -0.66708213]
 [ 0.26093623 -0.40793258  0.22252095 ...  0.38005912 -0.10376661
  -0.7430424 ]
 [ 0.20624542 -0.34879166  0.19421141 ...  0.3674754  -0.09962951
  -0.47819084]]


In [None]:
print(all_flattened_arrays_stacked.shape)

(14031, 768)


In [None]:
from sklearn.cluster import KMeans

In [None]:
# Convert the list of flattened arrays into a 2D NumPy array
#all_flattened_arrays_stacked = np.vstack(all_flattened_arrays)

# Apply k-means clustering
kmeans = KMeans(n_clusters=1024)
kmeans.fit(all_flattened_arrays_stacked)

# Get the cluster centroids
cluster_centroids = kmeans.cluster_centers_

np.save('cluster_centroids.npy', cluster_centroids)
# Print the cluster centroids
print("Cluster Centroids:")
print(cluster_centroids)

Cluster Centroids:
[[ 0.00186975 -0.28623903 -0.04948331 ... -0.07392094 -0.12909643
   1.1734984 ]
 [ 0.18293059 -0.40393206  0.2550805  ...  0.20604333 -0.08421221
  -0.5137214 ]
 [ 0.3072219  -0.05999289  0.0120896  ... -0.17635082 -0.10022494
  -0.16201328]
 ...
 [ 0.27839148 -0.06992792  0.141306   ...  0.30905822 -0.04742779
   0.24472195]
 [ 0.20038237  0.12115759  0.23996928 ... -0.02088702 -0.02676779
  -0.02943507]
 [ 0.34944877  0.22600508  0.03970724 ...  0.10637247 -0.0070828
  -0.98968333]]


In [None]:
print(cluster_centroids.shape)

(1024, 768)


In [None]:
def get_hidden_states_clean(audio_file_path):
    speech = read_audio(audio_file_path)
    input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values  # Batch size 1
    hidden_states = model(input_values).last_hidden_state
    return hidden_states

In [None]:
def get_hidden_states_noisy(noisy_audio):
    input_values = processor(noisy_audio, sampling_rate=16000, return_tensors="pt").input_values  # Batch size 1
    hidden_states = model(input_values).last_hidden_state
    return hidden_states

In [None]:
import numpy as np

def compare_clean_noisy_files(file_paths, kmeans_model, noise_data, snr, offset_ms, rir_data):
    # Initialize variables to store total states
    total_original_states = 0
    total_noise_states = 0
    total_diff_states = 0

    # Iterate over each file path
    for file_path in file_paths:
        # Assign clean files to clusters
        clean_cluster_indices = assign_clean_files_to_clusters(file_path, kmeans_model)

        # Assign noisy files to clusters
        noisy_cluster_indices = assign_noisy_files_to_clusters(file_path, kmeans_model, noise_data, snr, offset_ms, rir_data)

        # Compare cluster indices between clean and noisy files
        num_original_states, num_noise_states, num_diff_states = compare_cluster_indices(clean_cluster_indices, noisy_cluster_indices)

        # Add to total states
        total_original_states += num_original_states
        total_noise_states += num_noise_states
        total_diff_states += num_diff_states

    # Calculate total percentage changed
    total_percentage_changed = (total_diff_states / total_original_states) * 100

    return total_original_states, total_noise_states, total_diff_states, total_percentage_changed

In [None]:
import os
import torch
import torchaudio
import torch.nn.functional as F
import numpy as np
from sklearn.cluster import KMeans
import numpy as np
def assign_noisy_files_to_clusters(file_path, kmeans_model, noise_data, snr, offset_ms, rir_data):
    # Read audio data
    file_path_data = read_audio(file_path)
    # Add noise to audio data
    noisy_audio = add_noise_reverb(file_path_data, noise_data, snr, rir_data)
    # Add time offset to the noisy audio
    if offset_ms != 0:
        sample_rate = 16000  # Assuming 16kHz sample rate, adjust according to your case
        offset_samples = int(sample_rate * offset_ms / 1000)
        noisy_audio = F.pad(noisy_audio, (offset_samples, 0))[:-offset_samples]
    # Get hidden states from noisy audio
    hidden_states_new = get_hidden_states_noisy(noisy_audio)
    # Convert hidden states to NumPy array
    hidden_states_np_new = hidden_states_new.detach().cpu().numpy()
    # Flatten the 3D array to make it 2D
    flattened_array_new = hidden_states_np_new.reshape(-1, hidden_states_np_new.shape[-1])
    # Assign the units to clusters using the trained KMeans model
    cluster_indices_new_noisy = kmeans_model.predict(flattened_array_new)

    return cluster_indices_new_noisy

def assign_clean_files_to_clusters(file_path, kmeans_model):
    # Get hidden states from clean audio
    hidden_states_new = get_hidden_states_clean(file_path)
    # Convert hidden states to NumPy array
    hidden_states_np_new = hidden_states_new.detach().cpu().numpy()
    # Flatten the 3D array to make it 2D
    flattened_array_new = hidden_states_np_new.reshape(-1, hidden_states_np_new.shape[-1])
    # Assign the units to clusters using the trained KMeans model
    cluster_indices_new_clean = kmeans_model.predict(flattened_array_new)

    return cluster_indices_new_clean

def compare_cluster_indices(cluster_indices_run1, cluster_indices_run2):
    # Calculate number of original and noise states
    num_original_states = len(cluster_indices_run1)
    num_noise_states = len(cluster_indices_run2)
    # Calculate number of different states
    num_diff_states = np.sum(np.array(cluster_indices_run1) != np.array(cluster_indices_run2))
    # Calculate percentage of changed states
    #percentage_changed_states = (num_diff_states / num_original_states) * 100

    return num_original_states, num_noise_states, num_diff_states

In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 122715
Percentage of Changed States: 99.18207020294682


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 121087
Percentage of Changed States: 97.86627009464385


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 117957
Percentage of Changed States: 95.33650698715721


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 15
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 114342
Percentage of Changed States: 92.41475183266384


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 20
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 111723
Percentage of Changed States: 90.29799477882759


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 25
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 110306
Percentage of Changed States: 89.15273141674817


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 114120
Percentage of Changed States: 92.235324545168


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 111689
Percentage of Changed States: 90.27051492398587


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 110134
Percentage of Changed States: 89.01371568049011


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 15
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 20
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 25
offset_ms = 10
# Reverb file path
reverb_file_path = '/content/0.5.wav'
rir_data = read_audio(reverb_file_path)
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms, rir_data)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)