In [None]:
# You should run this after preprocessing
# Iterate the feature_extract function according to the "tracks_with_genre_small.csv"

In [None]:
import librosa as lb
import csv
import numpy as np
import pandas as pd
import os

In [None]:
AUDIO_FILE_HOME='../datasets/fma_small_flatten/'
FEATURE_OUTPUT_HOME='./features'
META_FILE = '../preprocessing/datasets/tracks_with_genre_small.csv'

In [None]:
def extract_zero_crossings(y, sr, filename, output_filename):
    zero_crossings = np.sum(lb.zero_crossings(y))
    df = pd.DataFrame([[filename, zero_crossings]], columns=['track_id', 'zero_crossings'])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_tempo(y, sr, filename, output_filename):
    onset_env = lb.onset.onset_strength(y=y, sr=sr)
    tempo, _ = lb.beat.beat_track(onset_envelope=onset_env, sr=sr)
    df = pd.DataFrame([[filename, tempo]], columns=['track_id', 'tempo'])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_spectral_centroid(y, sr, filename, output_filename):
    spectral_centroid = np.mean(lb.feature.spectral_centroid(y=y, sr=sr))
    df = pd.DataFrame([[filename, spectral_centroid]], columns=['track_id', 'spectral_centroid'])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_spectral_rolloff(y, sr, filename, output_filename):
    spectral_rolloff = np.mean(lb.feature.spectral_rolloff(y=y, sr=sr))
    df = pd.DataFrame([[filename, spectral_rolloff]], columns=['track_id', 'spectral_rolloff'])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_chroma_stft(y, sr, filename, output_filename):
    chroma_stft = np.mean(lb.feature.chroma_stft(y=y, sr=sr), axis=1)
    data = [filename] + list(chroma_stft)
    df = pd.DataFrame([data], columns=['track_id'] + [f'chroma_stft_{i}' for i in range(1, 13)])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_mfccs(y, sr, filename, output_filename):
    mfccs = lb.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs, axis=1)
    data = [filename] + list(mfccs_mean)
    df = pd.DataFrame([data], columns=['track_id'] + [f'MFCC_{i}' for i in range(1, 14)])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_harmony_percussive(y, filename, output_filename):
    # Decompose the audio time series into harmonic and percussive components
    y_harmonic, y_percussive = lb.effects.hpss(y)
    
    # Compute the root mean square (RMS) value for both components for summarization
    rms_harmonic = np.mean(lb.feature.rms(y=y_harmonic))
    rms_percussive = np.mean(lb.feature.rms(y=y_percussive))
    
    # Create a DataFrame and save to CSV
    data = [filename, rms_harmonic, rms_percussive]
    df = pd.DataFrame([data], columns=['filename', 'rms_harmonic', 'rms_percussive'])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)



In [37]:
# Create CSV headers
headers = {
    'zero_crossings.csv': ['track_id', 'zero_crossings'],
    'tempo.csv': ['track_id', 'tempo'],
    'spectral_centroid.csv': ['track_id', 'spectral_centroid'],
    'spectral_rolloff.csv': ['track_id', 'spectral_rolloff'],
    'chroma_stft.csv': ['track_id'] + [f'chroma_stft_{i}' for i in range(1, 13)],
    'mfccs.csv': ['track_id'] + [f'MFCC_{i}' for i in range(1, 14)]
}

for key, value in headers.items():
    pd.DataFrame(columns=value).to_csv(f"{FEATURE_OUTPUT_HOME}/{key}", index=False)

In [38]:
file_list = os.listdir(AUDIO_FILE_HOME)

for index, filename in enumerate(file_list):
    track_id = filename[:-4]
    # print(f"track_id: ", track_id)
    # print(f"filename: ", filename)
    if index % 100 == 0:
        print('complete: ', index)
    try:
        y, sr = lb.load(f"{AUDIO_FILE_HOME}{filename}") 
        extract_zero_crossings(y, sr, track_id, 'zero_crossings.csv')
        extract_tempo(y, sr, track_id, 'tempo.csv')
        extract_spectral_centroid(y, sr, track_id, 'spectral_centroid.csv')
        extract_spectral_rolloff(y, sr, track_id, 'spectral_rolloff.csv')
        extract_chroma_stft(y, sr, track_id, 'chroma_stft.csv')
        extract_mfccs(y, sr, track_id, 'mfccs.csv')   
    except Exception as e:
        print(index)
        print(e)
        

complete:  0
complete:  100
complete:  200
complete:  300
complete:  400
complete:  500
complete:  600
complete:  700


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!


complete:  800
complete:  900
complete:  1000
complete:  1100
complete:  1200
complete:  1300
complete:  1400
complete:  1500
complete:  1600
complete:  1700
complete:  1800


  return f(*args, **kwargs)



complete:  1900
complete:  2000





complete:  2100
complete:  2200





complete:  2300
complete:  2400


Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


complete:  2500
complete:  2600


Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
  return f(*args, **kwargs)


complete:  2700
complete:  2800


[src/libmpg123/layer3.c:INT123_do_layer3():1773] error: part2_3_length (3360) too large for available bit count (3240)


complete:  2900
complete:  3000
complete:  3100
complete:  3200
complete:  3300


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!


complete:  3400
complete:  3500
complete:  3600
complete:  3700
complete:  3800
complete:  3900
complete:  4000
complete:  4100
complete:  4200
complete:  4300


[src/libmpg123/layer3.c:INT123_do_layer3():1773] error: part2_3_length (3328) too large for available bit count (3240)


complete:  4400
complete:  4500
complete:  4600
complete:  4700
complete:  4800
complete:  4900
complete:  5000
complete:  5100
complete:  5200
complete:  5300
complete:  5400
complete:  5500
complete:  5600
complete:  5700
complete:  5800
complete:  5900


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


complete:  6000
complete:  6100
complete:  6200
complete:  6300
complete:  6400
complete:  6500
complete:  6600
complete:  6700
complete:  6800
complete:  6900
complete:  7000
complete:  7100
complete:  7200
complete:  7300
complete:  7400
complete:  7500
complete:  7600


[src/libmpg123/layer3.c:INT123_do_layer3():1841] error: dequantization failed!


complete:  7700
complete:  7800
complete:  7900
