In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import librosa
import librosa.display
import pickle
import random

In [None]:
dataset_dir = os.path.join(os.getcwd(), 'datasets')

fsc22_dataset_dir = os.path.join(dataset_dir, 'fsc22')

FSC22_metadata = os.path.join(fsc22_dataset_dir, 'FSC22.csv')
base_data_path = os.path.join(fsc22_dataset_dir, 'wav44')

meta_data = pd.read_csv(FSC22_metadata)

full_path = meta_data['filename'].apply(lambda row: os.path.join(base_data_path, row))
meta_data = meta_data.assign(filename=full_path)

meta_data.head()

In [None]:
category_group = meta_data['target'].value_counts()
plot = category_group.plot(kind='bar', title="Number of Audio Samples per Category", figsize=(15,5))
plot.set_xlabel("Category")
plot.set_ylabel("Number of Samples");

In [None]:
input_length = 20000 * 5

def random_crop(sound, size):
    org_size = len(sound)
    start = random.randint(0, org_size - size)
    return sound[start: start + size]

def padding(sound, size):
    diff = size - len(sound)
    return np.pad(sound, (diff//2, diff-(diff//2)), 'constant')

In [None]:
comp = []

for index_num,row in tqdm(meta_data.iterrows()):

    file_name = row["filename"]
    final_class_labels = row["target"] - 1 # Here

    normalised_audio, sample_rate = librosa.load(file_name, sr=20000, mono=True)
    
    if (len(normalised_audio) < input_length):
        normalised_audio = padding(normalised_audio, input_length)
    
    if (len(normalised_audio) > input_length):
        normalised_audio = random_crop(normalised_audio, input_length)
    
    comp.append([normalised_audio, final_class_labels])
    
print(f'data length : {len(comp)}')

pickle_dir = os.path.join(os.getcwd(), 'datasets/fsc22/Pickle Files')

if not os.path.exists(pickle_dir):
    os.makedirs(pickle_dir)

save_path = os.path.join(pickle_dir, 'audios_5_20')

with open(save_path, 'wb') as file:
    pickle.dump(comp, file)