In [None]:
import torchaudio
librispeech_test = torchaudio.datasets.LIBRISPEECH(".", url="test-clean", download=True)

In [None]:
!pip install pyrubberband
!pip install torch-time-stretch



In [None]:
import os
from scipy import signal
import pyrubberband as pyrb
import torch
import torchaudio
import torchaudio.transforms as T
from torch_time_stretch import time_stretch
import numpy as np
# import shutil
import tempfile
import subprocess
import soundfile as sf

def read_audio(filepath, fs=16000,  mono=True, normalize=False, preemphasis=False):
    """
    Reads audio file stored at <filepath>
    Parameters:
        filepath (str): audio file path
        fs (int, optional): samping rate
        mono (boolean, optional): return single channel
        normalize(boolean, optional): peak normalization of signal
        preemphasis (boolean, optional): apply pre-emphasis filter
    Returns:
        waveform (tensor): audio signal, dim(N,)
    """
    assert isinstance(filepath, str), "filepath must be specified as string"
    assert os.path.exists(filepath), f"{filepath} does not exist."

    try:
        waveform, sr = torchaudio.load(filepath)
        # mono channel
        if waveform.shape[0] == 2 and mono is True: waveform = waveform[0]
        else: waveform = waveform.reshape(-1)
        # preemphasis
        if preemphasis:
            waveform = pre_emphasis(waveform)
        # resample
        if sr != fs:
            resampler = T.Resample(sr, fs, dtype=waveform.dtype)
            waveform = resampler(waveform)
        # normalize
        if normalize:
            waveform = rms_normalize(waveform)
        return waveform
    except Exception as e:
        return None


def peak_normalize(waveform):
    """
    Peak normalizes the <waveform>
    Parameter:
        waveform (tensor): waveform, dims: (N,)
    """
    return waveform/torch.max(torch.abs(waveform))


def rms_normalize(waveform, r=-10):
    """
    RMS-normalization of  <waveform>
    Parameter:
        waveform (tensor): waveform, dims: (N,)
        rms (float): rms in dB
    """
    current_rms = torch.pow(torch.mean(torch.pow(waveform,2)) ,0.5)
    scaling_factor = (10**(r/10))/current_rms
    return waveform*scaling_factor


def pre_emphasis(waveform, coeff=0.97):
    filtered_sig = torch.empty_like(waveform)
    filtered_sig[1:] = waveform[1:] - coeff*waveform[:-1]
    filtered_sig[0] = waveform[0]
    return filtered_sig


def add_time_stretch(audio, fs, stretch_rate):
    """
    Adds time stretch to <clean> audio by <stretch_rate> factor.
    Parameters:
        audio (tensor): waveform, dims: (N,)
        fs (float): audio sample rate
        stretch_rate (float): playback rate
    Returns:
        audio_stretch (tensor): time stretched waveform dims: (N*<stretch_rate>,)

    """
    audio_stretch = time_stretch(audio.unsqueeze(0).unsqueeze(0), 1/stretch_rate, fs)
    # assert len(audio)/stretch_rate == len(audio_stretch), f"stretched audio length mismatch. Expected {len(audio)*stretch_rate}, got {len(audio_stretch)}"
    return audio_stretch.squeeze_()


def add_pitch_shift_rb(y, sr, shift, tmpdir=None):
    """
    Adds pitch shift to <y> audio sampled at <sr> by <shift> semitones. It calls rubberband package directly; does not use pyrubberband package.
    """
    if isinstance(y, np.ndarray) is False:
        y = y.numpy()

    if tmpdir is not None:
        tempfile.tempdir = tmpdir

    # Get the input and output tempfile
    fd, infile = tempfile.mkstemp(suffix='.wav')
    os.close(fd)
    fd, outfile = tempfile.mkstemp(suffix='.wav')
    os.close(fd)

    sf.write(infile, y, sr)
    command = ["rubberband", "-q", "--pitch", str(shift), infile, outfile]
    subprocess.run(command, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)

    y_out, _ = sf.read(outfile, always_2d=True, dtype=y.dtype)
    if y.ndim == 1:
        y_out = np.squeeze(y_out)

    os.unlink(infile)
    os.unlink(outfile)
    return torch.from_numpy(y_out).type(torch.float32)


def add_pitch_shift(audio, fs, semitone_shift, use_rb=True):
    """
    Adds pitch shift to <clean> audio by <semitone_shit> semitones.
    Parameters:
        audio (tensor): clean waveform, dims: (N,)
        fs (float): audio sample rate
        semitone_shift (float): semitones (can be between -12 and 12, but keep it low for good sound quality)
    Returns:
        audio_shift: pitch shifted added signal (tensor), dims: (N,)

    """
    if use_rb:
        audio_shift = pyrb.pitch_shift(audio.numpy(), fs, semitone_shift)
        return torch.from_numpy(audio_shift).type(torch.float32)
    else:
        audio_shift = torchaudio.functional.pitch_shift(audio, fs, semitone_shift)
        return audio_shift

def add_noise(audio, noise, snr):
    """
    Adds background <noise> to <clean> signal at desired <SNR> level
    Parameters:
        audio (tensor): clean waveform, dims: (N,)
        noise (tensor): noise waveform, dims: (M,)
        snr (int): SNR level in dB
    Returns:
        noisy_audio: noisy signal (tensor), dims: (N,)
    """
    # make equal lengths for clean and noise signals
    if len(audio) >= len(noise):
        reps = torch.ceil(torch.tensor(len(audio)/len(noise))).int()
        noise = torch.tile(noise, (reps,))[:len(audio)]
    else:
        start_idx = torch.randint(len(noise) - len(audio), (1,))
        noise = noise[start_idx:start_idx+len(audio)]

    assert len(noise) == len(audio), f"noise signal {len(noise)} and clean signal {len(audio)} length mismatch"

    # add noise at desired snr
    audio_rms = torch.mean(torch.pow(audio, 2))
    noise_rms = torch.mean(torch.pow(noise, 2))
    factor = torch.pow((audio_rms/noise_rms)/torch.pow(torch.tensor(10), (snr/10)), 0.5)
    noise = factor*noise
    noisy_audio = audio + noise
    assert 10*torch.log10(audio_rms/torch.mean(torch.pow(noise, 2))) - snr < 1e-4, f"snr mismatch {10*torch.log10(audio_rms/torch.mean(torch.pow(noise, 2))), snr, len(audio), len(noise), audio_rms, torch.mean(torch.pow(noise, 2)), noise_rms, factor, audio, torch.count_nonzero(audio)}"
    return noisy_audio


def add_reverb(clean, rir):
    """
    Filters <clean> signal with <rir> to get reverberation effect
    Parameters:
        clean (tensor): clean waveform, dims: (N,)
        rir (tensor): room impulse response, dims: (M,)
    Returns:
        reverb added signal (tensor), dims: (N,)
    """
    clean = clean.numpy()
    rir = rir.numpy()
    rir = rir/np.linalg.norm(rir)
    # filering
    p_max = np.argmax(np.abs(rir))
    filtered_clean = signal.convolve(clean, rir, mode="full")

    # time offset
    e = np.empty_like(filtered_clean, dtype=np.float32)
    e[-p_max:] = 0.0
    e[:-p_max] = filtered_clean[p_max:]
    # filtered_clean = e.copy()
    # e=None
    filtered_clean = e[:len(clean)]
    assert(len(filtered_clean)==len(clean))
    filtered_clean = torch.from_numpy(filtered_clean)
    return filtered_clean


def add_noise_reverb(audio, noise, snr, rir):
    """
    Adds background <noise> at desired <snr> level and reveberation using <rir> to <clean> signal
    Parameters:
        audio (tensor): clean waveform, dims: (N,)
        noise (tensor): noise waveform, dims: (M,)
        snr (int): SNR level in dB
        rir (tensor): room impulse response, dims: (M,)
    Returns:
        noise and reverb added signal (tensor), dims: (N,)
    """
    audio_reverb = add_reverb(audio, rir)
    noise_reverb = add_reverb(noise, rir)
    noise_reverb_clean = add_noise(audio_reverb, noise_reverb, snr)
    return noise_reverb_clean

In [None]:
import glob

# Load dataset using glob
flac_files = glob.glob("/content/LibriSpeech/test-clean/**/*.flac", recursive=True)
files_subset = flac_files[:40]

In [None]:
from transformers import AutoProcessor, WavLMModel
import torch

# Load pre-trained processor and model
processor = AutoProcessor.from_pretrained("patrickvonplaten/wavlm-libri-clean-100h-base-plus")
model = WavLMModel.from_pretrained("patrickvonplaten/wavlm-libri-clean-100h-base-plus")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of WavLMModel were not initialized from the model checkpoint at patrickvonplaten/wavlm-libri-clean-100h-base-plus and are newly initialized: ['wavlm.encoder.pos_conv_embed.conv.parametrizations.weight.original0', 'wavlm.encoder.pos_conv_embed.conv.parametrizations.weight.original1']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Function to get hidden states
def get_hidden_states(audio_file_path):
    # Read audio file
    speech = read_audio(audio_file_path)  # Assuming you have a function to read audio files
    # Process input using processor
    inputs = processor(speech, sampling_rate=16000, return_tensors="pt")
    # Forward pass through the model
    with torch.no_grad():
        outputs = model(**inputs)
    # Get last hidden states
    last_hidden_states = outputs.last_hidden_state
    return last_hidden_states

In [None]:
import numpy as np

# Initialize an empty list to store all flattened arrays
all_flattened_arrays = []

# Iterate over each file path
for file_path in files_subset:
    #print("File:", file_path)
    hidden_states = get_hidden_states(file_path)
    hidden_states_np = hidden_states.detach().cpu().numpy()  # Convert to NumPy array
    #print(hidden_states_np.shape)
    #print(hidden_states_np)

    # Flatten the 3D array to make it 2D
    flattened_array = hidden_states_np.reshape(-1, hidden_states_np.shape[-1])

    # Append the flattened array to the list
    all_flattened_arrays.append(flattened_array)

# Convert the list of flattened arrays into a 2D NumPy array
all_flattened_arrays_stacked = np.vstack(all_flattened_arrays)

# Now 'all_flattened_arrays_stacked' is a 2D NumPy array containing all flattened arrays
# Print the 2D output
print("2D Output:")
print(all_flattened_arrays_stacked)

2D Output:
[[ 0.21160552 -0.36747366  0.2591602  ...  0.3991834  -0.11817186
  -0.64794457]
 [ 0.19422345 -0.33622998  0.20106186 ...  0.3921625  -0.10909397
  -0.4404112 ]
 [ 0.19320723 -0.37407845  0.2569126  ...  0.3850337  -0.11940678
  -0.649481  ]
 ...
 [ 0.29545727 -0.29760972  0.20333748 ...  0.42432004 -0.0532552
  -0.6705804 ]
 [ 0.2995834  -0.2862816   0.20226032 ...  0.43538797 -0.03722861
  -0.71321124]
 [ 0.29365996 -0.29113418  0.20840597 ...  0.4357684  -0.02600184
  -0.7974921 ]]


In [None]:
print(all_flattened_arrays_stacked.shape)

(16340, 768)


In [None]:
from sklearn.cluster import KMeans

In [None]:
# Convert the list of flattened arrays into a 2D NumPy array
#all_flattened_arrays_stacked = np.vstack(all_flattened_arrays)

# Apply k-means clustering
kmeans = KMeans(n_clusters=1024)
kmeans.fit(all_flattened_arrays_stacked)

# Get the cluster centroids
cluster_centroids = kmeans.cluster_centers_

np.save('cluster_centroids.npy', cluster_centroids)
# Print the cluster centroids
print("Cluster Centroids:")
print(cluster_centroids)

Cluster Centroids:
[[ 0.21127245 -0.0455268  -0.06991363 ... -0.08187881 -0.09550318
  -0.892311  ]
 [ 0.23658565 -0.37759066  0.23056588 ...  0.39844608 -0.12076151
  -0.3566254 ]
 [ 0.28506923 -0.02195954  0.14926395 ... -0.03776896 -0.12490071
  -0.05496407]
 ...
 [ 0.21568468 -0.11676767  0.19417864 ...  0.11633828 -0.10565186
   0.24380258]
 [ 0.05629434 -0.5145386   0.1717377  ...  0.3568815  -0.00949694
   0.02305865]
 [ 0.21603067 -0.39209893  0.26516452 ...  0.32841694 -0.13919258
  -0.32482284]]


In [None]:
print(cluster_centroids.shape)

(1024, 768)


## Functions to assign cluster indices

In [None]:
def get_hidden_states_clean(audio_file_path):
    speech = read_audio(audio_file_path)
    input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values  # Batch size 1
    hidden_states = model(input_values).last_hidden_state
    return hidden_states

In [None]:
def get_hidden_states_noisy(noisy_audio):
    input_values = processor(noisy_audio, sampling_rate=16000, return_tensors="pt").input_values  # Batch size 1
    hidden_states = model(input_values).last_hidden_state
    return hidden_states

In [None]:
import numpy as np

def compare_clean_noisy_files(file_paths, kmeans_model, noise_data, snr, offset_ms):
    # Initialize variables to store total states
    total_original_states = 0
    total_noise_states = 0
    total_diff_states = 0

    # Iterate over each file path
    for file_path in file_paths:
        # Assign clean files to clusters
        clean_cluster_indices = assign_clean_files_to_clusters(file_path, kmeans_model)

        # Assign noisy files to clusters
        noisy_cluster_indices = assign_noisy_files_to_clusters(file_path, kmeans_model, noise_data, snr, offset_ms)

        # Compare cluster indices between clean and noisy files
        num_original_states, num_noise_states, num_diff_states = compare_cluster_indices(clean_cluster_indices, noisy_cluster_indices)

        # Add to total states
        total_original_states += num_original_states
        total_noise_states += num_noise_states
        total_diff_states += num_diff_states

    # Calculate total percentage changed
    total_percentage_changed = (total_diff_states / total_original_states) * 100

    return total_original_states, total_noise_states, total_diff_states, total_percentage_changed

In [None]:
import os
import torch
import torchaudio
import torch.nn.functional as F
import numpy as np
from sklearn.cluster import KMeans
import numpy as np
def assign_noisy_files_to_clusters(file_path, kmeans_model, noise_data, snr, offset_ms):
    # Read audio data
    file_path_data = read_audio(file_path)
    # Add noise to audio data
    noisy_audio = add_noise(file_path_data, noise_data, snr)
    # Add time offset to the noisy audio
    if offset_ms != 0:
        sample_rate = 16000  # Assuming 16kHz sample rate, adjust according to your case
        offset_samples = int(sample_rate * offset_ms / 1000)
        noisy_audio = F.pad(noisy_audio, (offset_samples, 0))[:-offset_samples]
    # Get hidden states from noisy audio
    hidden_states_new = get_hidden_states_noisy(noisy_audio)
    # Convert hidden states to NumPy array
    hidden_states_np_new = hidden_states_new.detach().cpu().numpy()
    # Flatten the 3D array to make it 2D
    flattened_array_new = hidden_states_np_new.reshape(-1, hidden_states_np_new.shape[-1])
    # Assign the units to clusters using the trained KMeans model
    cluster_indices_new_noisy = kmeans_model.predict(flattened_array_new)

    return cluster_indices_new_noisy

def assign_clean_files_to_clusters(file_path, kmeans_model):
    # Get hidden states from clean audio
    hidden_states_new = get_hidden_states_clean(file_path)
    # Convert hidden states to NumPy array
    hidden_states_np_new = hidden_states_new.detach().cpu().numpy()
    # Flatten the 3D array to make it 2D
    flattened_array_new = hidden_states_np_new.reshape(-1, hidden_states_np_new.shape[-1])
    # Assign the units to clusters using the trained KMeans model
    cluster_indices_new_clean = kmeans_model.predict(flattened_array_new)

    return cluster_indices_new_clean

def compare_cluster_indices(cluster_indices_run1, cluster_indices_run2):
    # Calculate number of original and noise states
    num_original_states = len(cluster_indices_run1)
    num_noise_states = len(cluster_indices_run2)
    # Calculate number of different states
    num_diff_states = np.sum(np.array(cluster_indices_run1) != np.array(cluster_indices_run2))
    # Calculate percentage of changed states
    #percentage_changed_states = (num_diff_states / num_original_states) * 100

    return num_original_states, num_noise_states, num_diff_states

In [None]:
new_files_subset = flac_files[:100]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 41606
Number of Noise States (2nd run): 41606
Number of Different States: 40524
Percentage of Changed States: 97.39941354612316


In [None]:
new_files_subset = flac_files[100:200]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 34266
Number of Noise States (2nd run): 34266
Number of Different States: 32456
Percentage of Changed States: 94.71779606607133


In [None]:
new_files_subset = flac_files[200:300]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 43385
Number of Noise States (2nd run): 43385
Number of Different States: 42475
Percentage of Changed States: 97.90250086435404


In [None]:
new_files_subset = flac_files[300:400]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 40902
Number of Noise States (2nd run): 40902
Number of Different States: 39829
Percentage of Changed States: 97.37665639822013


In [None]:
new_files_subset = flac_files[400:500]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 34886
Number of Noise States (2nd run): 34886
Number of Different States: 33879
Percentage of Changed States: 97.11345525425672


In [None]:
new_files_subset = flac_files[500:600]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 24495
Number of Noise States (2nd run): 24495
Number of Different States: 23186
Percentage of Changed States: 94.65605225556236


In [None]:
new_files_subset = flac_files[600:700]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 37885
Number of Noise States (2nd run): 37885
Number of Different States: 36421
Percentage of Changed States: 96.13567374950507


In [None]:
new_files_subset = flac_files[700:800]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 40294
Number of Noise States (2nd run): 40294
Number of Different States: 38183
Percentage of Changed States: 94.76100660147912


In [None]:
new_files_subset = flac_files[800:900]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 40510
Number of Noise States (2nd run): 40510
Number of Different States: 38857
Percentage of Changed States: 95.91952604295236


In [None]:
new_files_subset = flac_files[900:1000]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 39631
Number of Noise States (2nd run): 39631
Number of Different States: 38641
Percentage of Changed States: 97.50195553985517


In [None]:
new_files_subset = flac_files[:250]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 111015
Number of Noise States (2nd run): 111015
Number of Different States: 96565
Percentage of Changed States: 86.98374093590957


In [None]:
new_files_subset = flac_files[250:500]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 100286
Number of Noise States (2nd run): 100286
Number of Different States: 89017
Percentage of Changed States: 88.76313742695889


In [None]:
new_files_subset = flac_files[500:750]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 94282
Number of Noise States (2nd run): 94282
Number of Different States: 80175
Percentage of Changed States: 85.03744086888273


In [None]:
new_files_subset = flac_files[750:1000]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 88324
Number of Noise States (2nd run): 88324
Number of Different States: 75820
Percentage of Changed States: 85.84303247135546


In [None]:
new_files_subset = flac_files[:250]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 111015
Number of Noise States (2nd run): 111015
Number of Different States: 86005
Percentage of Changed States: 77.4715128586227


In [None]:
new_files_subset = flac_files[250:500]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 100286
Number of Noise States (2nd run): 100286
Number of Different States: 77681
Percentage of Changed States: 77.4594659274475


In [None]:
new_files_subset = flac_files[500:750]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 94282
Number of Noise States (2nd run): 94282
Number of Different States: 71667
Percentage of Changed States: 76.01344901465815


In [None]:
new_files_subset = flac_files[750:1000]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 88324
Number of Noise States (2nd run): 88324
Number of Different States: 68919
Percentage of Changed States: 78.02975408722432


In [None]:
new_files_subset = flac_files[500:750]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 15
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 182606
Number of Noise States (2nd run): 182606
Number of Different States: 133853
Percentage of Changed States: 73.30153445122285


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 20
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 211301
Number of Noise States (2nd run): 211301
Number of Different States: 150098
Percentage of Changed States: 71.03515837596603


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Babble.wav'
noise_data = read_audio(noise)
snr = 25
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 211301
Number of Noise States (2nd run): 211301
Number of Different States: 147658
Percentage of Changed States: 69.88040757024339


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 211301
Number of Noise States (2nd run): 211301
Number of Different States: 196042
Percentage of Changed States: 92.77854813749107


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 104754
Percentage of Changed States: 84.6654327673022


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 94593
Percentage of Changed States: 76.45299732475532


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 15
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 89559
Percentage of Changed States: 72.38436234613302


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 20
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 87512
Percentage of Changed States: 70.72991343845725


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Cafeteria.wav'
noise_data = read_audio(noise)
snr = 25
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 86466
Percentage of Changed States: 69.88450378656235


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 87424
Percentage of Changed States: 70.65878910827871


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 86249
Percentage of Changed States: 69.70911765419028


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 85818
Percentage of Changed States: 69.36077008252039


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 15
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 85356
Percentage of Changed States: 68.98736734908306


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 20
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 85087
Percentage of Changed States: 68.76995320342367


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Car.wav'
noise_data = read_audio(noise)
snr = 25
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 84818
Percentage of Changed States: 68.55253905776428


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Livingroom.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 104853
Percentage of Changed States: 84.74544763875306


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Livingroom.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 96048
Percentage of Changed States: 77.62897346577546


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Livingroom.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 90369
Percentage of Changed States: 73.03902947618546


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Livingroom.wav'
noise_data = read_audio(noise)
snr = 15
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 87794
Percentage of Changed States: 70.95783458743847


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Livingroom.wav'
noise_data = read_audio(noise)
snr = 20
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 86540
Percentage of Changed States: 69.9443128823943


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Livingroom.wav'
noise_data = read_audio(noise)
snr = 25
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 86111
Percentage of Changed States: 69.59758177277394


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Shopping.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 107980
Percentage of Changed States: 87.27278605316545


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Shopping.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 96642
Percentage of Changed States: 78.10906269448058


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Shopping.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 123727
Number of Noise States (2nd run): 123727
Number of Different States: 90793
Percentage of Changed States: 73.38171943068207


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Shopping.wav'
noise_data = read_audio(noise)
snr = 15
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 120799
Number of Noise States (2nd run): 120799
Number of Different States: 87993
Percentage of Changed States: 72.84249041796703


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Shopping.wav'
noise_data = read_audio(noise)
snr = 20
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 120799
Number of Noise States (2nd run): 120799
Number of Different States: 86740
Percentage of Changed States: 71.80523017574649


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Shopping.wav'
noise_data = read_audio(noise)
snr = 25
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 120799
Number of Noise States (2nd run): 120799
Number of Different States: 86103
Percentage of Changed States: 71.2779079297014


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Traffic.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 120799
Number of Noise States (2nd run): 120799
Number of Different States: 96905
Percentage of Changed States: 80.22003493406402


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Traffic.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 120799
Number of Noise States (2nd run): 120799
Number of Different States: 90105
Percentage of Changed States: 74.5908492619972


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Traffic.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 120799
Number of Noise States (2nd run): 120799
Number of Different States: 87715
Percentage of Changed States: 72.61235606255019


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Traffic.wav'
noise_data = read_audio(noise)
snr = 15
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

Number of Original States: 120799
Number of Noise States (2nd run): 120799
Number of Different States: 86519
Percentage of Changed States: 71.62228164140431


In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Traffic.wav'
noise_data = read_audio(noise)
snr = 20
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Traffic.wav'
noise_data = read_audio(noise)
snr = 25
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Train_Station.wav'
noise_data = read_audio(noise)
snr = 0
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Train_Station.wav'
noise_data = read_audio(noise)
snr = 5
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Train_Station.wav'
noise_data = read_audio(noise)
snr = 10
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Train_Station.wav'
noise_data = read_audio(noise)
snr = 15
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Train_Station.wav'
noise_data = read_audio(noise)
snr = 20
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)

In [None]:
new_files_subset = flac_files[:500]
noise = '/content/Train_Station.wav'
noise_data = read_audio(noise)
snr = 25
offset_ms = 10
num_original_states, num_noise_states, num_diff_states, percentage_changed_states = compare_clean_noisy_files(new_files_subset, kmeans, noise_data, snr, offset_ms)

print("Number of Original States:", num_original_states)
print("Number of Noise States (2nd run):", num_noise_states)
print("Number of Different States:", num_diff_states)
print("Percentage of Changed States:", percentage_changed_states)