In [1]:
from modules.PostgresDBHandler import PostgresDBHandler
import os
from joblib import Parallel, delayed
import librosa
from collections import defaultdict

In [2]:
def process_file(file_path, db_params):
    try:
        db = PostgresDBHandler(**db_params)
        db.connect()

        folder_name = os.path.basename(os.path.dirname(os.path.dirname(file_path)))
        instrument_type = folder_name.split("_")[0]
        microphone_type = os.path.basename(os.path.dirname(file_path))
        audio, sr = librosa.load(file_path, sr=None)

        # Check if the instrument exists in the database
        instrument_id = db.get_instrument_id(instrument_type)

        # Insert the audio file data into the AudioFiles table
        db.insert_audio_file(file_path, sr, len(audio), instrument_id)

        db.close()

        return {
            "instrument_type": instrument_type,
            "microphone_type": microphone_type,
            "original_file": file_path,
            "sample_rate": sr,
        }
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

In [3]:
base_dir = "./good-sounds"
dbParams = {
    "dbname": "mydatabase",
    "user": "myuser",
    "password": "mypassword",
    "host": "postgres_server",
    "port": "5432",
}
MAX_FILES_PER_INSTRUMENT = 250
n_jobs = -1

In [4]:
classes = os.listdir(os.path.join(base_dir, "sound_files"))
instrument_types = {className.split("_")[0] for className in classes}

In [5]:
instrument_types

{'bass',
 'cello',
 'clarinet',
 'flute',
 'oboe',
 'piccolo',
 'sax',
 'trumpet',
 'violin'}

In [6]:
db = PostgresDBHandler(**dbParams)
db.connect()
for instrument in instrument_types:
    if db.get_instrument_id(instrument) is None:
        db.insert_instrument(instrument, 0)
db.close()

Load files

In [None]:
db = PostgresDBHandler(**dbParams) 
db.connect()
audio_files = []
for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".wav") and db.check_audio_file_exists(os.path.join(root, file)) is False:
            file_path = os.path.join(root, file)
            audio_files.append(file_path)
db.close()

In [7]:
db = PostgresDBHandler(**dbParams) 
db.connect()
audio_files_by_class = defaultdict(list)

for root, dirs, files in os.walk(base_dir):
    for file in files:
        if file.endswith(".wav"):
            file_path = os.path.join(root, file)
            # Extract instrument type from the path
            # Assuming structure: ./good-sounds/sound_files/<class_name>/...
            parts = file_path.split(os.sep)
            try:
                class_name = parts[parts.index("sound_files") + 1]
                instrument_type = class_name.split("_")[0]
            except (ValueError, IndexError):
                continue  # skip if path structure is unexpected

            if len(audio_files_by_class[instrument_type]) < MAX_FILES_PER_INSTRUMENT:
                if not db.check_audio_file_exists(file_path):
                    audio_files_by_class[instrument_type].append(file_path)
db.close()

# Flatten the list
audio_files = [f for files in audio_files_by_class.values() for f in files]

In [8]:
print(f"Processing {len(audio_files)} audio files")

Processing 2159 audio files


In [9]:
Parallel(n_jobs=n_jobs)(
    delayed(process_file)(file_path, dbParams)
    for file_path in audio_files
)

[{'instrument_type': 'violin',
  'microphone_type': 'akg',
  'original_file': './good-sounds/sound_files/violin_raquel_richness/akg/0006.wav',
  'sample_rate': 48000},
 {'instrument_type': 'violin',
  'microphone_type': 'akg',
  'original_file': './good-sounds/sound_files/violin_raquel_richness/akg/0029.wav',
  'sample_rate': 48000},
 {'instrument_type': 'violin',
  'microphone_type': 'akg',
  'original_file': './good-sounds/sound_files/violin_raquel_richness/akg/0010.wav',
  'sample_rate': 48000},
 {'instrument_type': 'violin',
  'microphone_type': 'akg',
  'original_file': './good-sounds/sound_files/violin_raquel_richness/akg/0038.wav',
  'sample_rate': 48000},
 {'instrument_type': 'violin',
  'microphone_type': 'akg',
  'original_file': './good-sounds/sound_files/violin_raquel_richness/akg/0019.wav',
  'sample_rate': 48000},
 {'instrument_type': 'violin',
  'microphone_type': 'akg',
  'original_file': './good-sounds/sound_files/violin_raquel_richness/akg/0015.wav',
  'sample_rate': 