# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import os
from pathlib import Path
import librosa
import csv
import soundfile as sf
from scipy import signal
import statistics

# Read Training Data & Convert

In [2]:
def read_training_data():
    root = "TrainingData\\"
    raw = []

    # Read the audio file
    for person in os.listdir(root):
        print("person :", person)
        person_path = os.path.join(root, person)
        
        if os.path.isdir(person_path):
            for sub_dir in os.listdir(person_path):
                new_path = os.path.join(person_path, sub_dir)
                
                if os.path.isdir(new_path):
                    for audio_file in os.listdir(new_path):
                        audio_path = os.path.join(new_path, audio_file)
                        
                        if audio_file.endswith(('.wav', '.mp3', '.ogg', '.flac')):
                            y, sr = librosa.load(audio_path)
                            buf, buf_cur = 0, sr
                            
                            while buf + buf_cur < len(y):                     
                                new_y = y[buf : buf + buf_cur]
                                raw.append([person, new_y, sr])
                                buf += buf_cur

    # raw_data is data, but not crop the audio length
    data = []
    print(len(raw))
    cnt = 0
    for id, y, sr in raw:
        print("{}".format(cnt), "\r", end="")
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13, hop_length=512)
        tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
        beat_frames = librosa.frames_to_time(librosa.util.fix_frames(librosa.onset.onset_detect(y=y, sr=sr)))
        zcr = librosa.feature.zero_crossing_rate(y)
        centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        rms = librosa.feature.rms(y=y)
        
        data.append([id, mel_spec, mfccs, tempo, beat_frames, zcr, centroid, chroma, rms])
        cnt = cnt+1

    col = ["id", "mel_spec", "mfccs", "tempo", "beat_frames", "zcr", "centroid", "chroma", "rms"]
    df = pd.DataFrame(data, columns=col)

    return df

df = read_training_data()

person : 19
person : 26
person : 27
person : 32
person : 39
person : text.txt
6859
0.9998542061525003/100     

In [6]:
df.to_pickle("new_data.pkl")

def check_size(df):
    for i in range(df.shape[0]):
        for col in df.columns:
            try:
                if df[col][i].shape != df[col][0].shape and col != "beat_frames":
                    print("err :", col, df[col][i].shape, df[col][0].shape)
                    return
            except:
                continue
    print("All dimension input is the same")
    for col in df.columns:
        try:
            print(col, df[col][0].shape)
        except:
            print(col, "double")
        

check_size(df)

All dimension input is the same
id double
mel_spec (128, 44)
mfccs (13, 44)
tempo ()
beat_frames (5,)
zcr (1, 44)
centroid (1, 44)
chroma (12, 44)
rms (1, 44)
