# 1. Import Libraries

In [None]:
import numpy as np
import pandas as pd
import os
from pathlib import Path
import librosa
import csv
import soundfile as sf
from scipy import signal
import statistics

# 2. Read Training Data

In [None]:
def read_training_data():
    root = "TrainingData\\"
    raw = []

    # Read the audio file
    for person in os.listdir(root):
        person_path = os.path.join(root, person)

        if os.path.isdir(person_path):
            for sub_dir in os.listdir(person_path):
                new_path = os.path.join(person_path, sub_dir)

                if os.path.isdir(new_path):
                    for audio_file in os.listdir(new_path):
                        audio_path = os.path.join(new_path, audio_file)

                        if audio_file.endswith(('.wav', '.mp3', '.ogg', '.flac')):
                            y, sr = librosa.load(audio_path)
                            raw.append([person, y, sr])

    min_len = min([len(i[1]) for i in raw])

    # raw_data is data, but not crop the audio length
    raw_data = []
    for id, y, sr in raw:
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=512)
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        beat_frames = librosa.frames_to_time(librosa.util.fix_frames(librosa.onset.onset_detect(y=y, sr=sr)))
        zcr = librosa.feature.zero_crossing_rate(y)
        centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        rms = librosa.feature.rms(y=y)

        raw_data.append([id, mel_spec, mfccs, tempo, beat_frames, zcr, centroid, chroma, rms])

    # data with cropped length
    data = []
    for id, y_, sr in raw:
        y = y_[:min_len]
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=512)
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        beat_frames = librosa.frames_to_time(librosa.util.fix_frames(librosa.onset.onset_detect(y=y, sr=sr)))
        zcr = librosa.feature.zero_crossing_rate(y)
        centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        rms = librosa.feature.rms(y=y)

        data.append([id, mel_spec, mfccs, tempo, beat_frames, zcr, centroid, chroma, rms])
    '''
    df = pd.DataFrame(data, columns=['name' , 'audio'])
    df_audio_split = df['audio'].apply(pd.Series)
    df = pd.concat([df, df_audio_split], axis=1)
    df = df.drop('audio', axis=1)
    '''

    # range the same for beat_frames
    mid_bf = int(statistics.median([len(i[4]) for i in data]))
    for i in range(len(data)):
        bf = data[i][4]
        if(len(bf) < mid_bf):
            bf =  np.append(bf, np.zeros(mid_bf))
        bf = bf[0:mid_bf]
        data[i][4] = bf

    col = ["id", "mel_spec", "mfccs", "tempo", "beat_frames", "zcr", "centroid", "chroma", "rms"]
    df_raw = pd.DataFrame(raw, columns=["id", "y", "sr"])
    df_raw_data = pd.DataFrame(raw_data, columns=col)
    df_data = pd.DataFrame(data, columns=col)


    return df_raw, df_raw_data, df_data

df_raw, df_raw_data, df_data = read_training_data()

In [None]:
def check_size(df):
    for i in range(df.shape[0]):
        for col in df.columns:
            try:
                if df[col][i].shape != df[col][0].shape:
                    print("err")
                    return
            except:
                continue
    print("All dimension input is the same")

check_size(df_data)

All dimension input is the same


## 3. Save as Pickle

In [None]:
df_raw.to_pickle("raw.pkl")
df_raw_data.to_pickle("raw_data.pkl")
df_data.to_pickle("data.pkl")

In [None]:
# Read Back
df = pd.read_pickle("data.pkl")
print(df.head())

   id                                           mel_spec  \
0  19  [[0.029659186, 0.084653825, 0.09452135, 0.0959...   
1  19  [[0.027643714, 0.07534823, 0.08755229, 0.07689...   
2  19  [[0.023094, 0.07977633, 0.08986355, 0.08174720...   
3  19  [[0.025472675, 0.07010062, 0.0794156, 0.079057...   
4  19  [[0.032792516, 0.07860809, 0.09123399, 0.08673...   

                                               mfccs       tempo  \
0  [[-496.9096, -471.9636, -473.43643, -470.2355,...   95.703125   
1  [[-488.66556, -461.78857, -462.1105, -458.1133...  103.359375   
2  [[-462.32526, -435.57428, -443.42703, -451.951...  123.046875   
3  [[-501.2917, -462.0459, -453.44577, -463.79306...  129.199219   
4  [[-500.5669, -478.8916, -476.9676, -474.32098,...  135.999178   

                                         beat_frames  \
0  [0.0, 0.06965986394557823, 0.626938775510204, ...   
1  [0.0, 0.06965986394557823, 0.603718820861678, ...   
2  [0.0, 0.06965986394557823, 0.23219954648526078...   
3  [0.