# Preprocessing
### Imports

In [1]:
#Set Dir 
import sys, os
sys.path.append(os.path.abspath('..'))

# Torch
import torch
from torch.utils.data import DataLoader, Dataset
# Utils
import numpy as np
from numpy import ndarray
import logging
# Base Scripts
from Libraries.Utils import *
from Conf import *

### Config

In [None]:
training_data_name: str = "training_1280"
n_samples: int = 1280
noise: bool = False
noise_percentage: float = 0.2
overlap: int = 3
logging_level: int = LIGHT_DEBUG
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
logger: logging.Logger = logging.getLogger(__name__)

### Processing

In [5]:
filenames = get_filenames_from_folder(DATA_PATH, "wav")[:1]
data: list = []
remaining_samples: int = n_samples
for i in range(len(filenames)):
    file = load_audio_file(os.path.join(DATA_PATH, filenames[i]), SAMPLE_RATE, True)
    file = split_audiofile(file, TIME_FRAME_S, SAMPLE_RATE, overlap)
    file = audio_splits_to_spectograms(file, LEN_FFT, LEN_HOP)
    file = normalize(file)
    data.append(file)
    if file.shape[0] < remaining_samples:
        remaining_samples -= file.shape[0]
    else: break

data: ndarray = np.vstack(data)
if noise:
    n: int = int(data.shape[0] * noise_percentage)
    indicies: list = np.random.choice(data.shape[0], size=n, replace=False)
    data[indicies,...] = add_noise(data[indicies,...])
data = dimension_for_VAE(data)[:n_samples]
logger.info(f"Processed data of shape: {data.shape}")

2025-02-23 13:45:36,408 - LIGHT_DEBUG - Got filenames ['Am Bach 2024.wav', 'Cutting Mixes mix.wav', 'DA2407_ADO.wav'] from ../Data
2025-02-23 13:45:50,196 - LIGHT_DEBUG - Loaded audio form ../Data\Am Bach 2024.wav of dimensions: (333370680,), sr: 44100
2025-02-23 13:45:51,228 - LIGHT_DEBUG - Split audio to: (1511, 352800)
2025-02-23 13:45:51,435 - LIGHT_DEBUG - Started STFT on splits


2025-02-23 13:47:05,000 - LIGHT_DEBUG - Processed Splits: 1510


2025-02-23 13:48:01,804 - LIGHT_DEBUG - Created spectograms of splits: (1511, 1025, 690)
2025-02-23 13:50:15,919 - LIGHT_DEBUG - Normalized to range: [0,1]
2025-02-23 13:50:26,550 - INFO - Processed data of shape: (1280, 1024, 672)


### Saving

In [6]:
save_training_data(data, f"{DATA_PATH}/{training_data_name}.npy")

2025-02-23 14:15:54,098 - LIGHT_DEBUG - Saved ndarray to:../Data/training_1280.npy
