In [None]:
import os
import numpy as np
import pandas as pd
import librosa
import librosa.display
import pickle
import random
import audiomentations as aa
import gc
from sklearn.model_selection import StratifiedKFold

In [None]:
valid_feature = False
feature = None

while not valid_feature:
    feature = input(
        "Which feature extraction do you want:\n"
        " 1. MEL\n"
        " 2. MFCC\n"
        " 3. MIX\n :")
    if feature == '1':
        feature = 'MEL'
        valid_feature = True
    elif feature =='2':
        feature = 'MFCC'
        valid_feature = True
    elif feature =='3':
        feature = 'MIX'
        valid_feature = True

In [None]:
valid_augment = False
augment = None

while not valid_augment:
    augment = input(
        "Do you need to augment the data:\n"
        " 0. No\n"
        " 1. Time Stretch Only\n"
        " 2. Pitch Shift Only\n"
        " 3. Both Pitch Shift and Time Stretch\n"
        " 4. PS, TS and GN\n :")

    if augment in ['0', '1', '2', '3', '4']:
        augment = int(augment)
        valid_augment = True

augment = int(augment)

In [None]:
input_length = 20000 * 5

def random_crop(sound, size):
    org_size = len(sound)
    start = random.randint(0, org_size - size)
    return sound[start: start + size]

def padding(sound, size):
    diff = size - len(sound)
    return np.pad(sound, (diff//2, diff-(diff//2)), 'constant')

In [None]:
gaussian_noise = aa.AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=0.5)

def augmentor(audio, augmentation):
    if augmentation == 1:
        return audio
    elif augmentation == 2:
        spedup_sound = librosa.effects.time_stretch(y=audio, rate=1.5)
        return padding(spedup_sound, input_length)
    elif augmentation == 3:
        slowed_sound = librosa.effects.time_stretch(y=audio, rate=0.667)
        return random_crop(slowed_sound, input_length)
    elif augmentation == 4:
        return librosa.effects.pitch_shift(audio, sr=20000, n_steps=2)
    elif augmentation == 5:
        return librosa.effects.pitch_shift(audio, sr=20000, n_steps=-2)
    elif augmentation == 6:
        reversed_audio = audio[::-1]
        return gaussian_noise(reversed_audio, sample_rate=20000) 


def mel_features_extractor(raw_audio):
    feature_1 = librosa.power_to_db(librosa.feature.melspectrogram(y=raw_audio, sr=20000, n_mels=128, n_fft=2048, hop_length=512))
    feature_2 = librosa.power_to_db(librosa.feature.melspectrogram(y=raw_audio, sr=20000, n_mels=128, n_fft=1024, hop_length=512))
    feature_3 = librosa.power_to_db(librosa.feature.melspectrogram(y=raw_audio, sr=20000, n_mels=128, n_fft=512, hop_length=512))

    three_chanel = np.stack((feature_1, feature_2, feature_3), axis=2)

    return three_chanel

def mfcc_features_extractor(raw_audio):
    feature_1 = librosa.feature.mfcc(y=raw_audio, n_mfcc=128, n_fft=2048, hop_length=512)
    feature_2 = librosa.feature.mfcc(y=raw_audio, n_mfcc=128, n_fft=1024, hop_length=512)
    feature_3 = librosa.feature.mfcc(y=raw_audio, n_mfcc=128, n_fft=512, hop_length=512)

    three_chanel = np.stack((feature_1, feature_2, feature_3), axis=2)

    return three_chanel

def mixed_features_extractor(raw_audio):
    feature_1 = librosa.feature.mfcc(y=raw_audio, n_mfcc=128, n_fft=1024, hop_length=512)
    feature_2 = librosa.power_to_db(librosa.feature.melspectrogram(y=raw_audio, sr=20000, n_mels=128, n_fft=1024, hop_length=512))
    feature_3 = librosa.feature.chroma_stft(y=raw_audio, sr=20000, n_chroma=128, n_fft=1024, hop_length=512)

    three_chanel = np.stack((feature_1, feature_2, feature_3), axis=2)

    return three_chanel


def get_pickle(file_name):
    fold_dir = 'datasets/fsc22/Pickle Files/' + file_name
    infile = open(fold_dir,'rb')
    fold = pickle.load(infile)
    infile.close()

    return fold

In [None]:
os.chdir('../..')

audios = get_pickle('audios_5_20')

spects = []

for element in audios:
    
    if (feature == 'MEL'):
        spects.append([mel_features_extractor(element[0]), element[1], True])
    elif (feature == 'MFCC'):
        spects.append([mfcc_features_extractor(element[0]), element[1], True])
    elif (feature == "MIX"):
        spects.append([mixed_features_extractor(element[0]), element[1], True])
    else:
        exit(1)
    
    if augment == 1:
        if (feature == 'MEL'):
            spects.append([mel_features_extractor(augmentor(element[0], 2)), element[1], False])
            spects.append([mel_features_extractor(augmentor(element[0], 3)), element[1], False])
        elif (feature == 'MFCC'):
            spects.append([mfcc_features_extractor(augmentor(element[0], 2)), element[1], False])
            spects.append([mfcc_features_extractor(augmentor(element[0], 3)), element[1], False])
        elif (feature == "MIX"):
            spects.append([mixed_features_extractor(augmentor(element[0], 2)), element[1], False])
            spects.append([mixed_features_extractor(augmentor(element[0], 3)), element[1], False])

    elif augment == 2:
        if (feature == 'MEL'):
            spects.append([mel_features_extractor(augmentor(element[0], 4)), element[1], False])
            spects.append([mel_features_extractor(augmentor(element[0], 5)), element[1], False])
        elif (feature == 'MFCC'):
            spects.append([mfcc_features_extractor(augmentor(element[0], 4)), element[1], False])
            spects.append([mfcc_features_extractor(augmentor(element[0], 5)), element[1], False])
        elif (feature == "MIX"):
            spects.append([mixed_features_extractor(augmentor(element[0], 4)), element[1], False])
            spects.append([mixed_features_extractor(augmentor(element[0], 5)), element[1], False])

    elif augment == 3:
        if (feature == 'MEL'):
            spects.append([mel_features_extractor(augmentor(element[0], 2)), element[1], False])
            spects.append([mel_features_extractor(augmentor(element[0], 3)), element[1], False])
            spects.append([mel_features_extractor(augmentor(element[0], 4)), element[1], False])
            spects.append([mel_features_extractor(augmentor(element[0], 5)), element[1], False])
        elif (feature == 'MFCC'):
            spects.append([mfcc_features_extractor(augmentor(element[0], 2)), element[1], False])
            spects.append([mfcc_features_extractor(augmentor(element[0], 3)), element[1], False])
            spects.append([mfcc_features_extractor(augmentor(element[0], 4)), element[1], False])
            spects.append([mfcc_features_extractor(augmentor(element[0], 5)), element[1], False])
        elif (feature == "MIX"):
            spects.append([mixed_features_extractor(augmentor(element[0], 2)), element[1], False])
            spects.append([mixed_features_extractor(augmentor(element[0], 3)), element[1], False])
            spects.append([mixed_features_extractor(augmentor(element[0], 4)), element[1], False])
            spects.append([mixed_features_extractor(augmentor(element[0], 5)), element[1], False])
            
    elif augment == 4:
        if (feature == 'MEL'):
            spects.append([mel_features_extractor(augmentor(element[0], 2)), element[1], False])
            spects.append([mel_features_extractor(augmentor(element[0], 3)), element[1], False])
            spects.append([mel_features_extractor(augmentor(element[0], 4)), element[1], False])
            spects.append([mel_features_extractor(augmentor(element[0], 5)), element[1], False])
            spects.append([mel_features_extractor(augmentor(element[0], 6)), element[1], False])
        elif (feature == 'MFCC'):
            spects.append([mfcc_features_extractor(augmentor(element[0], 2)), element[1], False])
            spects.append([mfcc_features_extractor(augmentor(element[0], 3)), element[1], False])
            spects.append([mfcc_features_extractor(augmentor(element[0], 4)), element[1], False])
            spects.append([mfcc_features_extractor(augmentor(element[0], 5)), element[1], False])
            spects.append([mfcc_features_extractor(augmentor(element[0], 6)), element[1], False])
        elif (feature == "MIX"):
            spects.append([mixed_features_extractor(augmentor(element[0], 2)), element[1], False])
            spects.append([mixed_features_extractor(augmentor(element[0], 3)), element[1], False])
            spects.append([mixed_features_extractor(augmentor(element[0], 4)), element[1], False])
            spects.append([mixed_features_extractor(augmentor(element[0], 5)), element[1], False])
            spects.append([mixed_features_extractor(augmentor(element[0], 6)), element[1], False])

print(len(spects))

In [None]:
pickle_filename = 'features_5_20'

if (feature == 'MEL'):
    pickle_filename = 'mel_' + pickle_filename
elif (feature == 'MFCC'):
    pickle_filename = 'mfcc_' + pickle_filename
elif (feature == 'MIX'):
    pickle_filename = 'mixed_' + pickle_filename
else:
    exit(1)

if (augment == 1):
    pickle_filename = 'aug_ts_' + pickle_filename
elif (augment == 2):
    pickle_filename = 'aug_ps_' + pickle_filename
elif (augment == 3):
    pickle_filename = 'aug_ts_ps_' + pickle_filename
elif (augment == 4):
    pickle_filename = 'aug_ts_ps_gn_' + pickle_filename

In [None]:
features_df = pd.DataFrame(spects, columns=['feature', 'class', 'status'])

del spects

gc.collect()

X = np.array(features_df['feature'].tolist())
y = np.array(features_df['class'].tolist())

print(f'X shape: {np.shape(X)}')
print(f'y shape: {np.shape(y)}')

In [None]:
# Create an instance of StratifiedKFold with 5 folds
stratified_kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

spect_folds = []

# Iterate over the folds
for fold, (train_index, test_index) in enumerate(stratified_kfold.split(X, y)):
    # Split the data into training and testing sets
    X_train, X_test = [X[i] for i in train_index], [X[i] for i in test_index]
    y_train, y_test = [y[i] for i in train_index], [y[i] for i in test_index]

    # Now, X_train, y_train contain the training data for the current fold
    # and X_test, y_test contain the testing data for the current fold

    print(f"Fold {fold + 1}:")
    print(f"  Training samples: {len(X_train)}")
    print(f"  Testing samples: {len(X_test)}")
    
    print('Testing bin count -  {}'.format(np.bincount(y_test)))
    
    test_comp = [list(e) for e in zip(X_test, y_test)]
    
    spect_folds.append(test_comp)

In [None]:
print(f'len spect_folds: {len(spect_folds)}') # num folds
print(f'len spect_folds[0]: {len(spect_folds[0])}') # samples in a fold
print(f'len spect_folds[0][0]: {len(spect_folds[0][0])}') # elements in a sample - should be 2
print(f'shape spect_folds[0][0][0]: {np.shape(spect_folds[0][0][0])}') # spectrogram shape

In [None]:
pickle_dir = os.path.join(os.getcwd(), 'datasets/fsc22/Pickle Files')

if not os.path.exists(pickle_dir):
    os.makedirs(pickle_dir)

for fold in range(5):
    filename = pickle_filename
        
    save_path = os.path.join(pickle_dir, filename)

    if not os.path.exists(save_path):
        os.makedirs(save_path)
        
    filename = filename + '_fold' + str(fold+1)
    save_file = os.path.join(save_path, filename)

    with open(save_file, 'wb') as file:
        pickle.dump(spect_folds[fold], file)