In [None]:
import os
import pandas as pd
import librosa
import copy

In [None]:
SEGMENT_DURATION = 3 # in sec
SAVE_LOCATION = "Audio_Features_full"

csv_template= {"chroma_stft_mean": [], "chroma_stft_var": [], "rms_mean": [], "rms_var": [], "spectral_centroid_mean": [],
        "spectral_centroid_var": [], "spectral_bandwidth_mean": [], "spectral_bandwidth_var": [], "rolloff_mean": [], "rolloff_var": [],
        "zero_crossing_rate_mean": [], "zero_crossing_rate_var": [], "harmony_mean": [], "harmony_var": [], "perceptr_mean": [],
        "perceptr_var": [], "tempo": [], "mfcc1_mean": [], "mfcc1_var" : [], "mfcc2_mean" : [], "mfcc2_var" : [],
        "mfcc3_mean" : [], "mfcc3_var" : [], "mfcc4_mean" : [], "mfcc4_var" : [], "mfcc5_mean" : [], 
        "mfcc5_var" : [], "mfcc6_mean" : [], "mfcc6_var" : [], "mfcc7_mean" : [], "mfcc7_var" : [],
        "mfcc8_mean" : [], "mfcc8_var" : [], "mfcc9_mean" : [], "mfcc9_var" : [], "mfcc10_mean" : [], 
        "mfcc10_var" : [], "mfcc11_mean" : [], "mfcc11_var" : [], "mfcc12_mean" : [], "mfcc12_var" : [], 
        "mfcc13_mean" : [], "mfcc13_var" : [], "mfcc14_mean" : [], "mfcc14_var" : [], "mfcc15_mean" : [], 
        "mfcc15_var" : [], "mfcc16_mean" : [], "mfcc16_var" : [], "mfcc17_mean" : [], "mfcc17_var" : [], 
        "mfcc18_mean" : [], "mfcc18_var" : [], "mfcc19_mean" : [], "mfcc19_var" : [], "mfcc20_mean" : [], 
        "mfcc20_var":[]}
audio_files = {
        'blues': "gtzan_dataset/blues",
        'classical': "gtzan_dataset/classical",
        'country': "gtzan_dataset/country",
        'disco': "gtzan_dataset/disco",
        'hiphop': "gtzan_dataset/hiphop",
        'jazz': "gtzan_dataset/jazz",
        'metal': "gtzan_dataset/metal",
        'pop':"gtzan_dataset/pop",
        'reggae': "gtzan_dataset/reggae",
        'rock': "gtzan_dataset/rock"
}

In [None]:
for genre in audio_files:
    i = 0
    for f in os.listdir(audio_files[genre]):  
        csv = copy.deepcopy(csv_template)
        f = audio_files[genre] + "/" + f
        print("Processing:", f)

        audio, sample_rate = librosa.load(f)
        audio_duration = librosa.get_duration(y=audio, sr=sample_rate)
        num_segment = int(1)
        samples_per_segment = int(sample_rate*audio_duration/num_segment)

        for n in range(num_segment):
            audio_seg = audio[samples_per_segment*n: samples_per_segment*(n+1)]

            # Chromagram
            chromagram = librosa.feature.chroma_stft(y=audio_seg, sr=sample_rate)
            csv["chroma_stft_mean"].append(chromagram.mean())
            csv["chroma_stft_var"].append(chromagram.var())

            # Root Mean Square Energy
            RMSEn= librosa.feature.rms(y=audio_seg)
            csv["rms_mean"].append(RMSEn.mean())
            csv["rms_var"].append(RMSEn.var())

            # Spectral Centroid
            spec_cent=librosa.feature.spectral_centroid(y=audio_seg)
            csv["spectral_centroid_mean"].append(spec_cent.mean())
            csv["spectral_centroid_var"].append(spec_cent.var())

            #Spectral Bandwith
            spec_band=librosa.feature.spectral_bandwidth(y=audio_seg,sr=sample_rate)
            csv["spectral_bandwidth_mean"].append(spec_band.mean())
            csv["spectral_bandwidth_var"].append(spec_band.var())

            # Spectral Rolloff
            spec_roll=librosa.feature.spectral_rolloff(y=audio_seg,sr=sample_rate)
            csv["rolloff_mean"].append(spec_roll.mean())
            csv["rolloff_var"].append(spec_roll.var())

            # Zero Crossing Rate
            zero_crossing=librosa.feature.zero_crossing_rate(y=audio_seg)
            csv["zero_crossing_rate_mean"].append(zero_crossing.mean())
            csv["zero_crossing_rate_var"].append(zero_crossing.var())

            # Harmonics and Perceptrual 
            harmony, perceptr = librosa.effects.hpss(y=audio_seg)
            csv["harmony_mean"].append(harmony.mean())
            csv["harmony_var"].append(harmony.var())
            csv["perceptr_mean"].append(perceptr.mean())
            csv["perceptr_var"].append(perceptr.var())
    
            # Tempo
            tempo = librosa.feature.rhythm.tempo(y = audio_seg, sr = sample_rate)
            csv["tempo"].append(tempo.item())

            # Mfcc
            mfcc=librosa.feature.mfcc(y=audio_seg,sr=sample_rate)
            mfcc=mfcc.T
            for x in range(20):
                feat1 = "mfcc" + str(x+1) + "_mean"
                feat2 = "mfcc" + str(x+1) + "_var"
                csv[feat1].append(mfcc[:,x].mean())
                csv[feat2].append(mfcc[:,x].var())
           
        df = pd.DataFrame(csv)
        df.to_csv(f"{SAVE_LOCATION}/{genre}/{genre}{i}.csv", index=False)
        i += 1