# Preprocessing
### Imports

In [1]:
#Set Dir 
import sys, os
sys.path.append(os.path.abspath('..'))

# Torch
import torch
from torch.utils.data import DataLoader, Dataset
# Utils
import numpy as np
from numpy import ndarray
import logging
# Base Scripts
from Libraries.Utils import *
from Conf import *

### Config

In [None]:
training_data_name: str = "training_640"
noise: bool = True
noise_percentage: float = 0.2
overlap: int = 3
logging_level: int = LIGHT_DEBUG
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
logger: logging.Logger = logging.getLogger(__name__)

### Processing

In [None]:
filenames = get_filenames_from_folder(DATA_PATH, "wav")[:1]
data: list = []
for i in range(len(filenames)):
    file = load_audio_file(os.path.join(DATA_PATH, filenames[i]), SAMPLE_RATE, True)
    file = split_audiofile(file, TIME_FRAME_S, SAMPLE_RATE, overlap)
    file = audio_splits_to_spectograms(file, LEN_FFT, LEN_HOP)
    file = normalize(file)
    data.append(file)
data: ndarray = np.vstack(data)
if noise:
    n: int = int(data.shape[0] * noise_percentage)
    indicies: list = np.random.choice(data.shape[0], size=n, replace=False)
    data[indicies,...] = add_noise(data[indicies,...])
data = dimension_for_VAE(data)[:640]
logger.info(f"Processed data of shape: {data.shape}")

2025-02-23 12:51:00,051 - LIGHT_DEBUG - Got filenames ['Am Bach 2024.wav', 'Cutting Mixes mix.wav', 'DA2407_ADO.wav'] from ../Data
2025-02-23 12:51:31,731 - LIGHT_DEBUG - Loaded audio form ../Data\Am Bach 2024.wav of dimensions: (333370680,), sr: 44100
2025-02-23 12:51:32,627 - LIGHT_DEBUG - Split audio to: (1511, 352800)
2025-02-23 12:51:32,824 - LIGHT_DEBUG - Started STFT on splits


2025-02-23 12:52:21,000 - LIGHT_DEBUG - Processed Splits: 1510


2025-02-23 12:52:52,091 - LIGHT_DEBUG - Created spectograms of splits: (1511, 129, 5513)
2025-02-23 12:53:57,668 - LIGHT_DEBUG - Normalized to range: [0,1]
2025-02-23 12:53:58,925 - LIGHT_DEBUG - Started STFT on splits


2025-02-23 12:55:12,000 - LIGHT_DEBUG - Processed Splits: 1510


2025-02-23 12:55:28,067 - LIGHT_DEBUG - Created spectograms of splits: (1511, 513, 1379)
2025-02-23 12:56:29,338 - LIGHT_DEBUG - Normalized to range: [0,1]
2025-02-23 12:56:30,494 - LIGHT_DEBUG - Started STFT on splits


2025-02-23 12:57:51,000 - LIGHT_DEBUG - Processed Splits: 1510


2025-02-23 12:58:06,375 - LIGHT_DEBUG - Created spectograms of splits: (1511, 2049, 345)
2025-02-23 12:58:45,050 - LIGHT_DEBUG - Normalized to range: [0,1]


ValueError: all input arrays must have the same shape

### Saving

In [15]:
save_training_data(data, f"{DATA_PATH}/{training_data_name}.npy")

2025-02-22 12:07:58,523 - LIGHT_DEBUG - Saved ndarray to:../Data/training_640.npy
