In [1]:
# Imports
import sys

sys.path.insert(0, "../src")
from main import load_database

import audeer
import audiofile as af
import os
import yaml

In [2]:
# Load the labels DataFrame
# Load the experiment configuration from the YAML file
with open("../src/experiment_parameters.yaml", "r") as stream:
    config = yaml.safe_load(stream)

df_files = load_database(config)

Number of participants before filtering out too few sessions: 27; Shape of the DataFrame before filtering: (3346, 152)
Number of participants after filtering out too few sessions: 25; Shape of the DataFrame after filtering: (3304, 152)
Shape of the DataFrame after filtering prompts: (3204, 152)


In [3]:
def detect_clipping(samples, clipping_threshold, min_duration_millis, sample_rate):
    def convert_time_ms_to_samples(time_ms, rate):
        return int((time_ms / 1000.0) * rate)

    min_duration_samples = convert_time_ms_to_samples(min_duration_millis, sample_rate)

    is_clipping = False
    num_clipped_samples = 0
    for sample in samples:
        cur_val = abs(sample)
        if cur_val >= clipping_threshold:
            num_clipped_samples += 1
            if num_clipped_samples >= min_duration_samples:
                is_clipping = True
                break
        else:
            num_clipped_samples = 0

    return is_clipping

In [4]:
# Define the sampling rate
sample_rate = 16000

# Define the parameters for detect_clipping
clipping_threshold = 0.99
min_duration_millis = 0.5

# Get the unique files
unique_files = df_files.index.get_level_values("file").unique()

# Initialize a dictionary to store the clipping results
clipping_results = {}

# Loop over the unique files
for file in audeer.progress_bar(unique_files, desc="Processing prompts"):
    # Read the audio file
    samples, file_sample_rate = af.read(file)

    # Check if the file's sample rate matches the expected sample rate
    if file_sample_rate != sample_rate:
        print(
            f"Warning: Sample rate of file {file} is {file_sample_rate}, but expected {sample_rate}."
        )
        continue

    # Detect clipping
    is_clipping = detect_clipping(
        samples, clipping_threshold, min_duration_millis, sample_rate
    )

    # Store the result
    clipping_results[file] = is_clipping

# Add a column "clipping" to df_files
df_files["clipping"] = df_files.index.get_level_values("file").map(clipping_results)

                                                                                                    

In [5]:
df_files["clipping"].value_counts(dropna=False)

clipping
False    3187
True       17
Name: count, dtype: int64

In [6]:
print(list(df_files[df_files["clipping"] == True].index.get_level_values("file")))

['/data/share/aisoundlab-mental_wellbeing_at_sea/data_mwas_processed-final_data/raw/pilot-mental_wellbeing_at_sea/data/13547a-32ceb67/384/080318c8-6933-4b48-a018-aba97374ab01.wav', '/data/share/aisoundlab-mental_wellbeing_at_sea/data_mwas_processed-final_data/raw/pilot-mental_wellbeing_at_sea/data/13547a-32ceb67/384/44829b1b-36ff-4e6e-9705-caa179ed4fe5.wav', '/data/share/aisoundlab-mental_wellbeing_at_sea/data_mwas_processed-final_data/raw/pilot-mental_wellbeing_at_sea/data/13547a-32ceb67/384/44b68a4b-e9cb-4811-93e1-3a200bbb5a41.wav', '/data/share/aisoundlab-mental_wellbeing_at_sea/data_mwas_processed-final_data/raw/pilot-mental_wellbeing_at_sea/data/13547a-32ceb67/384/5341738b-9901-4b5b-babe-8be602808992.wav', '/data/share/aisoundlab-mental_wellbeing_at_sea/data_mwas_processed-final_data/raw/pilot-mental_wellbeing_at_sea/data/13547a-32ceb67/384/ad5e1c7a-aea3-4230-8dbf-eddca615aaf7.wav', '/data/share/aisoundlab-mental_wellbeing_at_sea/data_mwas_processed-final_data/raw/pilot-mental_wel

In [7]:
len(list(df_files[df_files["clipping"] == True].index.get_level_values("file")))

17

## Clipping in denoised files

In [8]:
# Load the denoised data with dithering
config["database"][
    "path_data"
] = "/data/share/aisoundlab-mental_wellbeing_at_sea/data_mwas_processed-final_data/facebook_denoiser-master64-converted_int16_dithering/pilot-mental_wellbeing_at_sea"
df_files_facebook_converted_dither = load_database(config)

Number of participants before filtering out too few sessions: 27; Shape of the DataFrame before filtering: (3346, 152)
Number of participants after filtering out too few sessions: 25; Shape of the DataFrame after filtering: (3304, 152)
Shape of the DataFrame after filtering prompts: (3204, 152)


In [9]:
# Define the sampling rate
sample_rate = 16000

# Define the parameters for detect_clipping
clipping_threshold = 0.99
min_duration_millis = 0.5

# Get the unique files
unique_files_denoised = df_files_facebook_converted_dither.index.get_level_values(
    "file"
).unique()

# Initialize a dictionary to store the clipping results
clipping_results_denoised = {}

# Loop over the unique files
for file in audeer.progress_bar(unique_files_denoised, desc="Processing prompts"):
    # Read the audio file
    samples, file_sample_rate = af.read(file)

    # Check if the file's sample rate matches the expected sample rate
    if file_sample_rate != sample_rate:
        print(
            f"Warning: Sample rate of file {file} is {file_sample_rate}, but expected {sample_rate}."
        )
        continue

    # Detect clipping
    is_clipping = detect_clipping(
        samples, clipping_threshold, min_duration_millis, sample_rate
    )

    # Store the result
    clipping_results_denoised[file] = is_clipping

# Add a column "clipping" to df_files_facebook_converted_dither
df_files_facebook_converted_dither["clipping"] = (
    df_files_facebook_converted_dither.index.get_level_values("file").map(
        clipping_results_denoised
    )
)

                                                                                                    

In [10]:
df_files_facebook_converted_dither["clipping"].value_counts(dropna=False)

clipping
False    3204
Name: count, dtype: int64

In [11]:
print(
    list(
        df_files_facebook_converted_dither[
            df_files_facebook_converted_dither["clipping"] == True
        ].index.get_level_values("file")
    )
)

[]


In [12]:
len(
    list(
        df_files_facebook_converted_dither[
            df_files_facebook_converted_dither["clipping"] == True
        ].index.get_level_values("file")
    )
)

0