#### SET DIRECTORIES !!!

In [2]:
data_directory="path/to/data"
directory="path/to/sens-sensor"
print(f"Data directory is {data_directory}")
print(f"Working directory is {directory}")

Data directory is data
Working directory is /Users/amaiasagastimartinez/Desktop/SENS-Soundlights/code/sens-sensor


# USM dataset generation - preparing original USM

USM includes 24k 5-seconds-long polyphonic stereo soundscapes composed of sounds taken from the FSD50k dataset. Once downloaded (https://github.com/jakobabesser/USM):

- **Dataset organisation and content:**
  
  The dataset is split into 3 folders or sets: /train (20k), /val (2k), /eval (2k)

  Each of this folders not only includes the mixed polyphonic audios named as ```number_mix.wav``` but also the monophonic audios ```bynber_mix_stem_id.wav```. These last ones are not needed --> we remove them in this script

- **Dataset sound classes:**

  The USM dataset includes 26 sound classes:

  airplane, alarm, birds, bus, car, cheering, church bell, dogs, drilling, glass break, gunshot, hammer, helicopter, jackhammer, lawn mower, motorcycle, music, rain, sawing, scream, siren, speech, thunderstorm, train, truck, wind

  These are given in .npy files associated with each polyphonic audio in the form of a 26-numbers-length binary vector. However, we do not consider that many classes, we apply the following mapping and cleaning process:
  
  "airplane", --> vehicles

  "alarm",  --> siren

  "birds", --> birds

  "bus", --> vehicles
  
  "car", --> vehicles

  "cheering", --> human

  "church bell", ---------none--------

  "dogs", --> dogs

  "drilling", --> construction

  "glass break",---------none--------

  "gunshot",---------none--------

  "hammer",  --> construction

  "helicopter", --> vehicles

  "jackhammer",  --> construction

  "lawn mower",---------none--------

  "motorcycle", --> vehicles

  "music", --> music

  "rain", --> nature

  "sawing", --> construction

  "scream", --> human

  "siren",  --> siren

  "speech", --> human

  "thunderstorm", --> nature

  "train", --> vehicles

  "truck", --> vehicles

  "wind", --> nature
  
  
  

In [3]:
import os
import numpy as np
import pandas as pd

# Set the working directory
import os
os.chdir(directory)

# Imports from this project
from development.lib.auxiliars import USM_sources, USM_sound_assigns

### First, listen to some audios to understand

In [6]:
from IPython.display import Audio, display

# Define the path to the folder containing the files
folder_paths = ['USM/val'] #, 'USM/eval', 'USM/train'
numbers=[2000] # ,2000,20000

for i,folder_path in enumerate(folder_paths):
    # Complete path
    folder_path=os.path.join(data_directory, folder_path)
    # List all files in the directory
    all_files = os.listdir(folder_path)
    # Sort the list by extracting the numeric part before the underscore
    all_files = sorted(all_files, key=lambda x: int(x.split('_')[0]))
    all_files=all_files[0:20]

    # Iterate over all files in the directory
    for file_name in all_files:
        if file_name.endswith("_mix_target.npy"):
            # Construct the full path to the file
            file_path = os.path.join(folder_path, file_name)
            # Load the data
            data = np.load(file_path)
            # Get the names where the vector value is 1
            selected_names = [name for value, name in zip(data, USM_sources) if value == 1]
            print(selected_names)
            # Import corresponding audio
            audio_file_path= os.path.join(folder_path,file_name.split("_")[0]+"_mix.wav")
            # Display the audio player
            display(Audio(audio_file_path, autoplay=False))

['dogs', 'gunshot', 'scream', 'train']


['cheering', 'sawing']


['alarm', 'bus', 'church bell']


['bus', 'hammer', 'speech']


['bus', 'scream', 'speech', 'thunderstorm']


## Clean folders

First remove redundant files. We are only interested in the mixed audios and their multi-label targets

In [12]:
# Define the path to the folder containing the files
folder_paths = ['USM/val', 'USM/eval', 'USM/train']
numbers=[2000,2000,20000]

for i,folder_path in enumerate(folder_paths):
    number=numbers[i]

    # Complete path
    folder_path=os.path.join(data_directory, folder_path)

    # Generate the list of desired filenames
    desired_files = [f"{i}_mix.wav" for i in range(number)] + [f"{i}_mix_target.npy" for i in range(number)]

    # List all files in the directory
    all_files = os.listdir(folder_path)

    # Iterate over all files in the directory
    for file_name in all_files:
        # Check if the file is not in the list of desired files
        if file_name not in desired_files:
            # Construct the full path to the file
            file_path = os.path.join(folder_path, file_name)
            # Remove the file
            os.remove(file_path)
            print(f"Removed: {file_path}")

    print("Cleanup complete.")

Cleanup complete.
Cleanup complete.
Cleanup complete.


How to read and understand the npy files

In [18]:
# Load the .npy file
data = np.load(os.path.join(data_directory, 'USM/val/0_mix_target.npy'))

# Print the loaded data
print(data)

sources=pd.read_csv(os.path.join(data_directory, 'USM/class_labels.csv'), header=None).squeeze()
# Extract the names corresponding to positions with a 1 in the vector
selected_names = [name for name, flag in zip(sources, data) if flag == 1]
print(selected_names)


[0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0]
['dogs', 'gunshot', 'scream', 'train']


## Simplify USM dataset --> less sound sources

We do not need 26 different sound sources. We are performing a simplification creating new .npy labels vectors

In [6]:
# Define the path to the folder containing the files
folder_paths = ['USM/val', 'USM/eval', 'USM/train']
numbers=[2000,2000,20000]

for i,folder_path in enumerate(folder_paths):

    # Get all relevant files (audios) in the directory and sort them numerically
    all_files = [f for f in os.listdir(os.path.join(data_directory, folder_path)) if f.endswith("_mix.wav")]
    all_files.sort(key=lambda f: int(f.split("_")[0]))
    # Iterate over all files in the directory
    for file_name in all_files:
        if file_name.endswith("_mix.wav"):

            # Construct the full path to the wav file
            wav_file_path = os.path.join(data_directory, folder_path, file_name)

            # Extract the index from the file name (e.g., "0" from "0_mix.wav")
            index = file_name.split("_")[0]

            # Construct the corresponding target file name
            target_file_name = f"{index}_mix_target.npy"
            target_file_path = os.path.join(data_directory, folder_path, target_file_name)

            # Check if the corresponding target file exists
            if os.path.exists(target_file_path):

                # Import multi-label array
                multiclass_vector = np.load(target_file_path)

                print(multiclass_vector)

                # Create a category index to handle equivalence
                simplified_vector = {
                    "birds":0,
                    "construction":0,
                    "dogs":0,
                    "human":0,
                    "music":0,
                    "nature":0,
                    "siren":0,
                    "vehicles":0,
                }

                # Analise which sources are marked as 1 and map them to new sources
                for i,active in enumerate(multiclass_vector):
                    if active==1:
                        active_source=USM_sources[i] # USM active source
                        assigned_source=USM_sound_assigns[active_source] # Equivalent source
                        if assigned_source!="":
                            simplified_vector[assigned_source] = 1
                
                # Convert the category_vector values to a list
                output_vector = np.array(list(simplified_vector.values()))
                print(output_vector)

                # Save the NumPy array to a .npy file
                target_file_path_simplified=target_file_path.split(".npy")[0]+"_simp2.npy"
                np.save(target_file_path_simplified, output_vector)




                

[0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0]
[0 0 1 1 0 0 0 0]
[0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]
[0 1 0 1 0 0 0 0]
[0 1 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
[0 0 0 0 0 0 1 1]
[0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
[0 1 0 1 0 0 0 1]
[0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0]
[0 0 0 1 0 1 0 1]
[0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 1 1 0 0 0]
[0 0 1 1 0 1 1 1]
[0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
[0 0 0 1 0 0 1 1]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 1 0 0 0]
[0 1 0 0 0 1 0 1]
[0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0]
[0 1 0 0 0 1 1 1]
[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0]
[0 0 1 0 0 1 1 0]
[0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0]
[0 0 0 1 0 1 0 1]
[1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0]
[0 0 0 1 1 1 0 1]
[0 1 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0]
[0 1 0 0 0 1 1 0]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 0 0 0 0]
[0 1 0 1 1