# Preprocessing
### Imports

New Python versions sometimes require some extra installations found in the code below

In [1]:
!pip install standard-sunau standard-aifc




[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
#Set Dir 
import sys, os
sys.path.append(os.path.abspath('..'))

# Torch
import torch
from torch.utils.data import DataLoader, Dataset
# Utils
import numpy as np
from numpy import ndarray
import logging, os
# Base Scripts
from Libraries.Utils import *
from Conf import conf


### Config

In [2]:
remote_kernel: bool = False
training_data_name: str = "unseen_test_data"
n_samples: int = 20000
logging_level: int = LIGHT_DEBUG
logging.basicConfig(level=logging_level, format='%(asctime)s - %(levelname)s - %(message)s')
logger: logging.Logger = logging.getLogger(__name__)
filenames_wav = OS().get_filenames_from_folder(OS().path_to_remote_path("../Data/Unknown Test Data", remote_kernel), "wav")

2025-07-17 12:57:24,744 - LIGHT_DEBUG - Got filenames ['1989.wav', '27 Laser Mix.wav', 'Etapp Kyle - Alpha.wav', 'Luca Lozano - Prognosis WRECD2.wav', 'Transition.wav', 'Waiting For Your Love.wav'] from ../Data/Unknown Test Data


### Processing .wav

In [5]:
data: list = []
remaining_samples: int = n_samples
for i in range(len(filenames_wav)):
    ad = AudioData(sr=32000)
    ad.load_audio_file(os.path.join(OS().path_to_remote_path("../Data/Unknown Test Data", remote_kernel), filenames_wav[i]))
    file = ad.split_audiofile(8.192, overlap_s=0, norm=True)
    data.append(file)
    if file.shape[0] < remaining_samples:
        remaining_samples -= file.shape[0]
    else: break

data: ndarray = np.vstack(data)
data = AudioData().normalize_filewise(data, -1, 1)
logger.info(f"Processed data of shape: {data.shape}")

2025-07-17 12:59:14,280 - LIGHT_DEBUG - Loaded audio from ../Data/Unknown Test Data\1989.wav of dimensions: (14663123,), sr: 32000
2025-07-17 12:59:14,640 - LIGHT_DEBUG - Normalized to range: [-1,1]
2025-07-17 12:59:14,656 - LIGHT_DEBUG - Split audio to: (56, 262144)
2025-07-17 12:59:16,031 - LIGHT_DEBUG - Loaded audio from ../Data/Unknown Test Data\27 Laser Mix.wav of dimensions: (12156854,), sr: 32000
2025-07-17 12:59:16,311 - LIGHT_DEBUG - Normalized to range: [-1,1]
2025-07-17 12:59:16,320 - LIGHT_DEBUG - Split audio to: (47, 262144)
2025-07-17 12:59:17,888 - LIGHT_DEBUG - Loaded audio from ../Data/Unknown Test Data\Etapp Kyle - Alpha.wav of dimensions: (13760331,), sr: 32000
  scaled_file: ndarray = (file - min_file) / (max_file - min_file)
2025-07-17 12:59:18,130 - LIGHT_DEBUG - Normalized to range: [-1,1]
2025-07-17 12:59:18,138 - LIGHT_DEBUG - Split audio to: (53, 262144)
2025-07-17 12:59:19,843 - LIGHT_DEBUG - Loaded audio from ../Data/Unknown Test Data\Luca Lozano - Prognosis

### Saving

In [6]:
NPData(data).save_training_data(OS().path_to_remote_path("../Data/{}".format(training_data_name), remote_kernel))

2025-07-17 12:59:40,211 - LIGHT_DEBUG - Saved ndarray to:../Data/unseen_test_data.npy
