Import

In [None]:
from librosa import load
from librosa.effects import split
from librosa.feature import melspectrogram
from librosa.display import specshow
from IPython import display as Ipd
from pyloudnorm import Meter as loudMeter
from pyloudnorm.normalize import loudness as loudnorm
from math import floor,sqrt
from os import listdir
from os.path import isfile
from random import randrange

import numpy as np
import matplotlib.pyplot as plt
import csv
import soundfile as sf

Processing setup

In [None]:
# setup
DATASET     = "6"
SAMPLE_SIZE = 6000

Constant

In [None]:
# CONSTANT
import os
import sys
parent_dir = os.path.abspath(os.path.join(os.getcwd(), "..\\00_Constant"))
if sys.path.count(parent_dir) == 0:
    sys.path.append(parent_dir)
import fyp_constants as constants


DATASET_FOLDER_PATH         = f"{constants.DATA_FOLDER_PATH}\\{DATASET}"
PROCESS_DATA_FOLDER_PATH    = f"{DATASET_FOLDER_PATH}\\{constants.DATA_PROCESSED_FOLDER_NM}"
RAWWAVFOLDER_PATH           = f"{DATASET_FOLDER_PATH}\\{constants.DATA_RAW_FOLDER_NM}"
SAMPLEWAVFODLER_PATH        = f"{DATASET_FOLDER_PATH}\\{constants.DATA_SAMPLEWAV_FOLDER_NM}"
ALLSAMPLE_CSV_PATH          = f"{DATASET_FOLDER_PATH}\\{constants.DATA_CSV_FOLDER_NM}\\{constants.ALLSAMPLE_CSVFILE_NM}"

DATAX_MEL64_NM      = constants.DATAX_MEL64_NM
DATAX_MEL128_NM     = constants.DATAX_MEL128_NM
DATAX_MEL256_NM     = constants.DATAX_MEL256_NM
DATAX_WAVE_NM       = constants.DATAX_WAVE_NM
DATAX_WAVE_HALF_NM  = constants.DATAX_WAVE_HALF_NM

DATAY_NO_PROC_NM            = constants.DATAY_NO_PROC_NM
DATAY_NO_PROC_HALF_NM       = constants.DATAY_NO_PROC_HALF_NM
DATAY_PROC_VIB_NM           = constants.DATAY_PROC_VIB_NM
DATAY_PROC_VIB_GEN_NM       = constants.DATAY_PROC_VIB_GEN_NM
DATAY_PROC_VIB_GEN_BRE_NM   = constants.DATAY_PROC_VIB_GEN_BRE_NM
DATAY_PROC_VIB_GEN_BRI_NM   = constants.DATAY_PROC_VIB_GEN_BRI_NM
DATAY_PROC_VIB_GEN_GWL_NM   = constants.DATAY_PROC_VIB_GEN_GWL_NM
DATAY_PROC_GEN_MINMAXNORM   = constants.DATAY_PROC_GEN_MINMAXNORM

SAMPLING_RATE   = constants.SAMPLING_RATE
LOUDNORM_LUFS   = constants.LOUDNORM_LUFS
MEL_MAX_FREQ    = constants.MEL_MAX_FREQ

In [None]:
print(RAWWAVFOLDER_PATH)
print(SAMPLEWAVFODLER_PATH)
print(PROCESS_DATA_FOLDER_PATH)

Process raw generated
 audio

In [None]:
# clip raw generate audio to samples
dh = Ipd.display(display_id=True)

# get file name of generated raw audios in this dataset
ls = listdir(RAWWAVFOLDER_PATH)
ls.sort(key=len)

sample_idx = 0      # for tracing the clipping process
pervious_idx = 0    # the last sample index of pervious raw file
for wavFile_nm in ls:
    path = f"{RAWWAVFOLDER_PATH}\\{wavFile_nm}"
    if(not(isfile(path))):
        continue
    
    # the file name of the raw audio is the idx of the last sample
    last_sample_idx = int(wavFile_nm[0:-4])
    
    audio, sr = load(path, sr=SAMPLING_RATE)
        
    sample_clipped = 0
    sample_cnt = last_sample_idx-pervious_idx # the amount of sample should be clipped
    while (sample_clipped < sample_cnt):
        sample = audio[(6*sample_clipped+2)*sr:(6*sample_clipped+6)*sr]
        
        # normalize the sample loudness to -23 LUFS
        loudness = loudMeter(sr).integrated_loudness(sample)
        sample_norm = loudnorm(sample, loudness, LOUDNORM_LUFS)
        
        sample_idx += 1
        sample_clipped += 1
        
        sf.write(f"{SAMPLEWAVFODLER_PATH}\\{sample_idx}.wav", sample_norm, sr)    
    
    pervious_idx = last_sample_idx

Prepare data x

In [None]:
# function of prepare data x
# x_type:       the type of audio to be processed
#   0: waveform
#   1: 64 mel
#   2: 128 mel
#   3: 256 mel
# data_nm:      the npz file name of the processed data
# half_sample:  only process and save even sample
def prepareX(x_type:int, data_nm:str, half_sample:bool=False):
    data_x = []
    for sample_idx in range(1,SAMPLE_SIZE+1):
        audio, sr = load(f"{SAMPLEWAVFODLER_PATH}/{sample_idx}.wav", sr=SAMPLING_RATE)
        
        # skip even sample if half_sample is True
        if half_sample and sample_idx%2 == 0:
            continue
        
        # processing data x
        if x_type == 0:
            record = audio
        elif x_type == 1:
            record = melspectrogram(y=audio, sr=sr, n_mels=64, fmax=MEL_MAX_FREQ)
        elif x_type == 2:
            record = melspectrogram(y=audio, sr=sr, n_mels=128, fmax=MEL_MAX_FREQ)
        elif x_type == 3:
            record = melspectrogram(y=audio, sr=sr, n_mels=256, fmax=MEL_MAX_FREQ)
                        
        data_x.append(record)

    np.savez(f"{PROCESS_DATA_FOLDER_PATH}/{data_nm}", data=data_x)

In [None]:
# -- prue waveform, no processing, only save even sample
# prepareX(x_type=0, data_nm=DATAX_WAVE_HALF_NM)
# -- process to mel scale spectrogram, with 64 filter banks
# prepareX(x_type=1, data_nm=DATAX_MEL64_NM)
# -- process to mel scale spectrogram, with 128 filter banks
prepareX(x_type=2, data_nm=DATAX_MEL128_NM)
# -- process to mel scale spectrogram, with 256 filter banks
# prepareX(x_type=3, data_nm=DATAX_MEL256_NM)

Prepare data y

In [None]:
# Setup for perparing Data Y
BRE_PROC = {
    "1": lambda x: x,
    "2": lambda x: x-15,
    "3": lambda x: x,
    "4": lambda x: x+10,
    "5": lambda x: x,
    "6": lambda x: x-5
}

BRI_PROC = {
    "1": lambda x: x,
    "2": lambda x: x+23,
    "3": lambda x: x+20,
    "4": lambda x: x-5,
    "5": lambda x: x-12,
    "6": lambda x: x+10
}

GEN_PROC = {
    "1": lambda x: x,
    "2": lambda x: x+25,
    "3": lambda x: x+25,
    "4": lambda x: x+36,
    "5": lambda x: x+15,
    "6": lambda x: x+25
}

GWL_PROC = {
    "1": lambda x: x,
    "2": lambda x: x,
    "3": lambda x: x+8,
    "4": lambda x: x+25,
    "5": lambda x: x,
    "6": lambda x: x
}

VIBRATE_SCALE = lambda x: (int(x)-1)*32


# min max normalization, just for experiment 3
MINMAX = [
    (0,95),     # BRE
    (15,127),   # BRI
    (0,42),     # CLE
    (44,142),   # GEN
    (0,127),    # GWL
    (0,127),    # OPE
    (1,5)       # Vibrate
]
def minmaxNorm(record):
    res = []
    for idx, value in enumerate(record):
        (min_v, max_v) = MINMAX[idx]
        norm_v = (value-min_v)/(max_v-min_v)*127
        res.append(norm_v)
    return res

In [None]:
# perpare Data Y
def perpareY(
        data_nm:str,
        do_bre_proc:bool=False,
        do_bri_proc:bool=False,
        do_gen_proc:bool=False,
        do_gwl_proc:bool=False,
        do_scale_vib:bool=False,
        do_minmax_norm:bool=False,
        do_half_sample:bool=False
    ):
    data_y = []
    with open(ALLSAMPLE_CSV_PATH,"r") as paralist_csvfile:
        csv_reader = csv.reader(paralist_csvfile, delimiter=",")
        #skip header
        next(csv_reader)
        
        # for each record
        for r in csv_reader:
            record_idx = int(r[0])
            # skip even sample if do_half_sample is True
            if do_half_sample and record_idx%2 == 0:
                continue
            
            singer_id = str(r[1])
            para_list = [int(value) for value in r[8:16]]
            
            # processing bre
            if(do_bre_proc):
                para_list[0] = BRE_PROC[singer_id](para_list[0])
            
            # processing bri
            if(do_bri_proc):
                para_list[1] = BRI_PROC[singer_id](para_list[1])
            
            # processing gen
            if(do_gen_proc):
                para_list[3] = GEN_PROC[singer_id](para_list[3])
                
            # processing gwl
            if(do_gwl_proc):
                para_list[4] = GWL_PROC[singer_id](para_list[4])
            
            # scale up vibrate
            if(do_scale_vib and not do_minmax_norm):
                para_list[6] = VIBRATE_SCALE(para_list[6])
                    
            # min max normalization, just for experiment 3
            if(do_minmax_norm):
                para_list = minmaxNorm(para_list)
                    
            data_y.append(para_list)

    np.savez(f"{PROCESS_DATA_FOLDER_PATH}\\{data_nm}", data=data_y)

In [None]:
# -- prue Data Y, no processing
# perpareY(DATAY_NO_PROC_NM)
# -- prue Data Y, but skip even sample
# perpareY(DATAY_NO_PROC_HALF_NM, do_half_sample=True)
# -- scale up vibrate
# perpareY(DATAY_PROC_VIB_NM, do_scale_vib=True)
# -- process vibrate + gen
# perpareY(DATAY_PROC_VIB_GEN_NM, do_scale_vib=True, do_gen_proc=True)
# -- process vibrate + gen + bre
# perpareY(DATAY_PROC_VIB_GEN_BRE_NM, do_scale_vib=True, do_gen_proc=True, do_bre_proc=True)
# -- process vibrate + gen + bri
# perpareY(DATAY_PROC_VIB_GEN_BRI_NM, do_scale_vib=True, do_gen_proc=True, do_bri_proc=True)
# -- process vibrate + gen + gwl
# perpareY(DATAY_PROC_VIB_GEN_GWL_NM, do_scale_vib=True, do_gen_proc=True, do_gwl_proc=True)
# -- process gen + minmax normalization
perpareY(DATAY_PROC_GEN_MINMAXNORM, do_gen_proc=True, do_minmax_norm=True)

In [None]:
# for checking the processed Data Y
y = np.load(f"{PROCESS_DATA_FOLDER_PATH}\\{DATAY_PROC_GEN_MINMAXNORM}.npz")["data"]
print(len(y))
print(y[12000])

print("")

y = np.load(f"{PROCESS_DATA_FOLDER_PATH}\\{DATAY_NO_PROC_NM}.npz")["data"]
print(len(y))
print(y[12000])