In [None]:
# You should run this after preprocessing
# Iterate the feature_extract function according to the "tracks_with_genre_small.csv"

In [7]:
import librosa as lb
import csv
import numpy as np
import pandas as pd
import os

In [8]:
AUDIO_FILE_HOME='../datasets/fma_small_flatten/'
FEATURE_OUTPUT_HOME='./features'
META_FILE = '../preprocessing/datasets/tracks_with_genre_small.csv'

In [9]:
def save_to_csv(data, columns, output_filename):
    df = pd.DataFrame([data], columns=columns)
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_feature(y, sr, feature_func, *args, **kwargs):
    return feature_func(y=y, sr=sr, *args, **kwargs)

def extract_zero_crossings(y, sr, filename, output_filename):
    zero_crossings = np.sum(lb.zero_crossings(y))
    save_to_csv([filename, zero_crossings], ['track_id', 'zero_crossings'], output_filename)

def extract_tempo(y, sr, filename, output_filename):
    onset_env = lb.onset.onset_strength(y=y, sr=sr)
    tempo, _ = lb.beat.beat_track(onset_envelope=onset_env, sr=sr)
    save_to_csv([filename, tempo], ['track_id', 'tempo'], output_filename)

def extract_spectral_centroid(y, sr, filename, output_filename):
    spectral_centroid = extract_feature(y, sr, lb.feature.spectral_centroid)
    save_to_csv([filename, np.mean(spectral_centroid)], ['track_id', 'spectral_centroid'], output_filename)

def extract_spectral_rolloff(y, sr, filename, output_filename):
    spectral_rolloff = extract_feature(y, sr, lb.feature.spectral_rolloff)
    save_to_csv([filename, np.mean(spectral_rolloff)], ['track_id', 'spectral_rolloff'], output_filename)

def extract_chroma_stft(y, sr, filename, output_filename):
    chroma_stft = extract_feature(y, sr, lb.feature.chroma_stft)
    data = [filename] + list(np.mean(chroma_stft, axis=1))
    save_to_csv(data, ['track_id'] + [f'chroma_stft_{i}' for i in range(1, 13)], output_filename)

def extract_mfccs(y, sr, filename, output_filename):
    mfccs = extract_feature(y, sr, lb.feature.mfcc, n_mfcc=20)
    data = [filename] + list(np.mean(mfccs, axis=1))
    save_to_csv(data, ['track_id'] + [f'MFCC_{i}' for i in range(1, 21)], output_filename)

def extract_harmony_percussive(y, sr, filename, output_filename):
    y_harmonic, y_percussive = lb.effects.hpss(y)
    rms_harmonic = np.mean(lb.feature.rms(y=y_harmonic))
    rms_percussive = np.mean(lb.feature.rms(y=y_percussive))
    save_to_csv([filename, rms_harmonic, rms_percussive], ['track_id', 'rms_harmonic', 'rms_percussive'], output_filename)



In [51]:
# Create CSV headers
headers = {
    'zero_crossings.csv': ['track_id', 'zero_crossings'],
    'tempo.csv': ['track_id', 'tempo'],
    'spectral_centroid.csv': ['track_id', 'spectral_centroid'],
    'spectral_rolloff.csv': ['track_id', 'spectral_rolloff'],
    'chroma_stft.csv': ['track_id'] + [f'chroma_stft_{i}' for i in range(1, 13)],
    'mfccs.csv': ['track_id'] + [f'MFCC_{i}' for i in range(1, 21)],
    'hpss.csv': ['track_id', 'rms_harmonic', 'rms_percussive']
}

for key, value in headers.items():
    pd.DataFrame(columns=value).to_csv(f"{FEATURE_OUTPUT_HOME}/{key}", index=False)

In [52]:
file_list = os.listdir(AUDIO_FILE_HOME)

for index, filename in enumerate(file_list):
    track_id = filename[:-4]
    # print(f"track_id: ", track_id)
    # print(f"filename: ", filename)
    if index % 100 == 0:
        print('complete: ', index)
    try:
        y, sr = lb.load(f"{AUDIO_FILE_HOME}{filename}") 
        extract_zero_crossings(y, sr, track_id, 'zero_crossings.csv')
        extract_tempo(y, sr, track_id, 'tempo.csv')
        extract_spectral_centroid(y, sr, track_id, 'spectral_centroid.csv')
        extract_spectral_rolloff(y, sr, track_id, 'spectral_rolloff.csv')
        extract_chroma_stft(y, sr, track_id, 'chroma_stft.csv')
        extract_mfccs(y, sr, track_id, 'mfccs.csv')  
        extract_harmony_percussive(y, sr, track_id, 'hpss.csv') 
    except Exception as e:
        print(index)
        print(e)
        

complete:  0
complete:  100
complete:  200
complete:  300
complete:  400
complete:  500
complete:  600
complete:  700


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!


complete:  800
complete:  900
complete:  1000
complete:  1100
complete:  1200
complete:  1300
complete:  1400
complete:  1500
complete:  1600
complete:  1700
complete:  1800


  return f(*args, **kwargs)


1829

complete:  1900
complete:  2000




2090

complete:  2100
complete:  2200




2241

complete:  2300
complete:  2400


Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


complete:  2500
complete:  2600


Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


complete:  2700
complete:  2800


[src/libmpg123/layer3.c:INT123_do_layer3():1773] error: part2_3_length (3360) too large for available bit count (3240)


complete:  2900
complete:  3000
complete:  3100
complete:  3200
complete:  3300


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!


complete:  3400
complete:  3500
complete:  3600
complete:  3700
complete:  3800
complete:  3900
complete:  4000
complete:  4100
complete:  4200
complete:  4300


[src/libmpg123/layer3.c:INT123_do_layer3():1773] error: part2_3_length (3328) too large for available bit count (3240)


complete:  4400
complete:  4500
complete:  4600
complete:  4700
complete:  4800
complete:  4900
complete:  5000
complete:  5100
complete:  5200
complete:  5300
complete:  5400
complete:  5500
complete:  5600
complete:  5700
complete:  5800
complete:  5900


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


complete:  6000
complete:  6100
complete:  6200
complete:  6300
complete:  6400
complete:  6500
complete:  6600
complete:  6700
complete:  6800
complete:  6900
complete:  7000
complete:  7100
complete:  7200
complete:  7300
complete:  7400
complete:  7500
complete:  7600


[src/libmpg123/layer3.c:INT123_do_layer3():1841] error: dequantization failed!


complete:  7700
complete:  7800
complete:  7900


In [54]:
## merge features into a single file

file_names = ["tempo", "hpss", "spectral_centroid", "spectral_rolloff", "zero_crossings", "chroma_stft", "mfccs"]

# Using a list comprehension to read all dataframes into a list
dfs = [pd.read_csv(f"{FEATURE_OUTPUT_HOME}/{file_name}.csv") for file_name in file_names]

# Refactoring the merging process to be more concise
merged_df = dfs[0]
for df in dfs[1:]:
    merged_df = merged_df.merge(df, on='track_id', how='outer')

In [55]:
merged_df.to_csv(f"{FEATURE_OUTPUT_HOME}/all_features.csv")

In [11]:
NUM_FRAMES=200

In [12]:
# Extract raw datasets
# Create CSV headers
headers = {
    'spectral_centroid_raw.csv': ['track_id'] + [f'spectral_centroid_{i}' for i in range(NUM_FRAMES)],
    'spectral_rolloff_raw.csv': ['track_id'] + [f'spectral_rolloff_{i}' for i in range(NUM_FRAMES)],
    'chroma_stft_raw.csv': ['track_id'] + [f'chroma_stft_{i}_{j}' for i in range(1, 13) for j in range(NUM_FRAMES)],
    'mfccs_raw.csv': ['track_id'] + [f'MFCC_{i}_{j}' for i in range(1, 21) for j in range(NUM_FRAMES)],
    'hpss_raw.csv': ['track_id'] +
        [f'rms_harmonic_{i}' for i in range(NUM_FRAMES)] +
        [f'rms_percussive_{i}' for i in range(NUM_FRAMES)]
}

for key, value in headers.items():
    pd.DataFrame(columns=value).to_csv(f"{FEATURE_OUTPUT_HOME}/{key}", index=False)

In [13]:
# to extract original features. (without summation)
def extract_chroma_stft_raw(y, sr, filename, output_filename, num_frames=NUM_FRAMES):
    chroma_stft = extract_feature(y, sr, lb.feature.chroma_stft)
    # Limit the number of frames
    limited_chroma_stft = chroma_stft[:, :num_frames]
    data = [filename] + list(limited_chroma_stft.flatten())
    column_names = ['track_id'] + [f'chroma_stft_{i}_{j}' for i in range(1, 13) for j in range(num_frames)]
    save_to_csv(data, column_names, output_filename)
    
def extract_mfccs_raw(y, sr, filename, output_filename, n_mfcc=20, num_frames=NUM_FRAMES):
    mfccs = extract_feature(y, sr, lb.feature.mfcc, n_mfcc=n_mfcc)
    # Limit the number of frames
    limited_mfccs = mfccs[:, :num_frames]
    data = [filename] + list(limited_mfccs.flatten())
    column_names = ['track_id'] + [f'MFCC_{i}_{j}' for i in range(1, n_mfcc+1) for j in range(num_frames)]
    save_to_csv(data, column_names, output_filename)    
    
def extract_spectral_centroid_raw(y, sr, filename, output_filename, num_frames=NUM_FRAMES):
    spectral_centroid = extract_feature(y, sr, lb.feature.spectral_centroid)
    # Limit the number of frames
    limited_spectral_centroid = spectral_centroid[:, :num_frames]
    data = [filename] + list(limited_spectral_centroid.flatten())
    column_names = ['track_id'] + [f'spectral_centroid_{i}' for i in range(num_frames)]
    save_to_csv(data, column_names, output_filename)
    
def extract_spectral_rolloff_raw(y, sr, filename, output_filename, num_frames=NUM_FRAMES):
    spectral_rolloff = extract_feature(y, sr, lb.feature.spectral_rolloff)
    # Limit the number of frames
    limited_spectral_rolloff = spectral_rolloff[:, :num_frames]
    data = [filename] + list(limited_spectral_rolloff.flatten())
    column_names = ['track_id'] + [f'spectral_rolloff_{i}' for i in range(num_frames)]
    save_to_csv(data, column_names, output_filename)
    
def extract_harmony_percussive_raw(y, sr, filename, output_filename, num_frames=NUM_FRAMES):
    y_harmonic, y_percussive = lb.effects.hpss(y)
    
    # Extract RMS features for harmonic and percussive components
    rms_harmonic = lb.feature.rms(y=y_harmonic)
    rms_percussive = lb.feature.rms(y=y_percussive)
    
    # Limit the number of frames for each feature
    limited_rms_harmonic = rms_harmonic[:, :num_frames].flatten()
    limited_rms_percussive = rms_percussive[:, :num_frames].flatten()
    
    # Combine data and create column names
    data = [filename] + list(limited_rms_harmonic) + list(limited_rms_percussive)
    column_names = (
        ['track_id'] +
        [f'rms_harmonic_{i}' for i in range(num_frames)] +
        [f'rms_percussive_{i}' for i in range(num_frames)]
    )
    
    # Save to CSV
    save_to_csv(data, column_names, output_filename)

In [14]:
file_list = os.listdir(AUDIO_FILE_HOME)

for index, filename in enumerate(file_list):
    track_id = filename[:-4]
    # print(f"track_id: ", track_id)
    # print(f"filename: ", filename)
    if index % 100 == 0:
        print('complete: ', index)
    try:
        y, sr = lb.load(f"{AUDIO_FILE_HOME}{filename}") 

        extract_spectral_centroid_raw(y, sr, track_id, 'spectral_centroid_raw.csv')
        extract_spectral_rolloff_raw(y, sr, track_id, 'spectral_rolloff_raw.csv')
        extract_chroma_stft_raw(y, sr, track_id, 'chroma_stft_raw.csv')
        extract_mfccs_raw(y, sr, track_id, 'mfccs_raw.csv')  
        extract_harmony_percussive_raw(y, sr, track_id, 'hpss_raw.csv') 
    except Exception as e:
        print(index)
        print(e)

complete:  0
complete:  100
complete:  200
complete:  300
complete:  400
complete:  500
complete:  600
complete:  700


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!


complete:  800
complete:  900
complete:  1000
complete:  1100
complete:  1200
complete:  1300
complete:  1400
complete:  1500
complete:  1600
complete:  1700
complete:  1800


  return f(*args, **kwargs)


1829

complete:  1900
complete:  2000


  return f(*args, **kwargs)


2090

complete:  2100
complete:  2200


  return f(*args, **kwargs)


2241

complete:  2300
complete:  2400


Note: Illegal Audio-MPEG-Header 0x00000000 at offset 33361.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).
  return f(*args, **kwargs)


2404
201 columns passed, passed data had 71 columns
complete:  2500
complete:  2600


Note: Illegal Audio-MPEG-Header 0x00000000 at offset 22401.
Note: Trying to resync...
  return f(*args, **kwargs)
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


2651
201 columns passed, passed data had 24 columns


  return f(*args, **kwargs)


complete:  2700
complete:  2800


[src/libmpg123/layer3.c:INT123_do_layer3():1773] error: part2_3_length (3360) too large for available bit count (3240)


complete:  2900
complete:  3000
complete:  3100
complete:  3200
complete:  3300


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!


complete:  3400
complete:  3500
complete:  3600
complete:  3700
complete:  3800
complete:  3900
complete:  4000


  return f(*args, **kwargs)


complete:  4100
complete:  4200


  return f(*args, **kwargs)


complete:  4300


[src/libmpg123/layer3.c:INT123_do_layer3():1773] error: part2_3_length (3328) too large for available bit count (3240)


complete:  4400
complete:  4500
complete:  4600
complete:  4700
complete:  4800
complete:  4900
complete:  5000
complete:  5100
complete:  5200


  return f(*args, **kwargs)


complete:  5300
complete:  5400
complete:  5500
complete:  5600
complete:  5700
complete:  5800
complete:  5900


[src/libmpg123/layer3.c:INT123_do_layer3():1801] error: dequantization failed!
  return f(*args, **kwargs)
Note: Illegal Audio-MPEG-Header 0x00000000 at offset 63168.
Note: Trying to resync...
Note: Skipped 1024 bytes in input.
[src/libmpg123/parse.c:wetwork():1365] error: Giving up resync after 1024 bytes - your stream is not nice... (maybe increasing resync limit could help).


5965
201 columns passed, passed data had 68 columns
complete:  6000
complete:  6100
complete:  6200
complete:  6300
complete:  6400
complete:  6500
complete:  6600
complete:  6700
complete:  6800
complete:  6900
complete:  7000
complete:  7100
complete:  7200
complete:  7300
complete:  7400
complete:  7500
complete:  7600


[src/libmpg123/layer3.c:INT123_do_layer3():1841] error: dequantization failed!


complete:  7700
complete:  7800
complete:  7900


In [15]:

file_names = ["hpss_raw", "spectral_centroid_raw", "spectral_rolloff_raw", "chroma_stft_raw", "mfccs_raw"]

# Using a list comprehension to read all dataframes into a list
dfs_raw = [pd.read_csv(f"{FEATURE_OUTPUT_HOME}/{file_name}.csv") for file_name in file_names]

# Refactoring the merging process to be more concise
raw_merged_df = dfs_raw[0]
for df in dfs_raw[1:]:
    raw_merged_df = raw_merged_df.merge(df, on='track_id', how='outer')

In [16]:
raw_merged_df.to_csv(f"{FEATURE_OUTPUT_HOME}/all_features_raw.csv")

In [17]:
raw_merged_df.shape

(7994, 7201)