# Importazione e configurazione del logging

In [43]:
import os
import librosa
import soundfile as sf
import numpy as np
import sys
import logging
from concurrent.futures import ThreadPoolExecutor, as_completed
import multiprocessing
from threading import Lock

# Configura il logging per monitorare lo stato
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Definizione dei file da escludere

In [44]:
# Definisci i file da escludere
exclude_files = {'.DS_Store', 'metadata-Target.csv', 'metadata-NonTarget.csv'}

# Funzione per caricare e convertire file MP3

In [45]:
def load_and_convert_mp3(file_path, target_sample_rate):
    y, sr = librosa.load(file_path, sr=target_sample_rate, mono=True)
    wav_file_path = file_path.replace(".mp3", ".wav")
    sf.write(wav_file_path, y, sr, subtype='PCM_16')
    os.remove(file_path)
    return True

# Funzione per caricare file .wav e standardizzare la profondità del bit

In [46]:
def load_and_standardize(file_path, target_sample_rate):
    y, sr = librosa.load(file_path, sr=target_sample_rate, mono=True)
    sf.write(file_path, y, sr, subtype='PCM_16')
    return True

# Campionamento dei file

In [47]:
def process_file(file_path):
    target_sample_rate = 86400
    try:
        if file_path.endswith(".mp3"):
            return load_and_convert_mp3(file_path, target_sample_rate)
        else:
            return load_and_standardize(file_path, target_sample_rate)
    except Exception as e:
        logging.error(f"Error processing file {file_path}: {e}")
    return False

# Definizione della funzione che conta i file nel dataset

In [48]:
def count_files(directory, exclude_files):
    return sum(1 for root, _, files in os.walk(directory) for file in files if file not in exclude_files)

# Caricamento ed elaborazione parallela dei file audio

In [49]:
def lettura_audio(directories):
    total_files = sum(count_files(directory, exclude_files) for directory in directories)
    file_count = 0

    # Determina il numero di core disponibili e imposta il numero di thread
    num_cores = multiprocessing.cpu_count()
    num_threads = max(1, num_cores // 2)  # Utilizza la metà dei core disponibili
    logging.info(f"Numero di core disponibili: {num_cores}, utilizzando {num_threads} thread")

    with ThreadPoolExecutor(max_workers=num_threads) as executor:
        futures = []
        for directory in directories:
            for root, _, files in os.walk(directory):
                for file_name in files:
                    if file_name in exclude_files:
                        continue
                    file_path = os.path.join(root, file_name)
                    futures.append(executor.submit(process_file, file_path))

                    # Limita la coda delle attività per evitare sovraccarico
                    if len(futures) >= num_threads * 2:  # Limita a due volte il numero di thread
                        for future in as_completed(futures):
                            result = future.result()
                            if result:
                                file_count += 1
                            progress = (file_count / total_files) * 100
                            sys.stdout.write(f"\rProgresso: {progress:.2f}%")
                            sys.stdout.flush()
                        futures = []

        # Completa le rimanenti attività
        for future in as_completed(futures):
            result = future.result()
            if result:
                file_count += 1
            progress = (file_count / total_files) * 100
            sys.stdout.write(f"\rProgresso: {progress:.2f}%")
            sys.stdout.flush()

        sys.stdout.write('\n')


# Main

In [50]:
def main():
    dataset_folder_path = os.path.abspath("Dataset")  # Path alla cartella Dataset
    subfolders = ["Target", "Non-Target"]
    subfolder_paths = [os.path.join(dataset_folder_path, subfolder) for subfolder in subfolders]
    lettura_audio(subfolder_paths)

In [None]:
main()