In [None]:
# You should run this after preprocessing
# Iterate the feature_extract function according to the "tracks_with_genre_small.csv"

In [None]:
import librosa as lb
import csv
import numpy as np
import pandas as pd
import os

In [None]:
AUDIO_FILE_HOME='../datasets/fma_small_flatten/'
FEATURE_OUTPUT_HOME='./features'
META_FILE = '../preprocessing/datasets/tracks_with_genre_small.csv'

In [None]:
def extract_zero_crossings(y, sr, filename, output_filename):
    zero_crossings = np.sum(lb.zero_crossings(y))
    df = pd.DataFrame([[filename, zero_crossings]], columns=['track_id', 'zero_crossings'])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_tempo(y, sr, filename, output_filename):
    onset_env = lb.onset.onset_strength(y=y, sr=sr)
    tempo, _ = lb.beat.beat_track(onset_envelope=onset_env, sr=sr)
    df = pd.DataFrame([[filename, tempo]], columns=['track_id', 'tempo'])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_spectral_centroid(y, sr, filename, output_filename):
    spectral_centroid = np.mean(lb.feature.spectral_centroid(y=y, sr=sr))
    df = pd.DataFrame([[filename, spectral_centroid]], columns=['track_id', 'spectral_centroid'])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_spectral_rolloff(y, sr, filename, output_filename):
    spectral_rolloff = np.mean(lb.feature.spectral_rolloff(y=y, sr=sr))
    df = pd.DataFrame([[filename, spectral_rolloff]], columns=['track_id', 'spectral_rolloff'])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_chroma_stft(y, sr, filename, output_filename):
    chroma_stft = np.mean(lb.feature.chroma_stft(y=y, sr=sr), axis=1)
    data = [filename] + list(chroma_stft)
    df = pd.DataFrame([data], columns=['track_id'] + [f'chroma_stft_{i}' for i in range(1, 13)])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)

def extract_mfccs(y, sr, filename, output_filename):
    mfccs = lb.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = np.mean(mfccs, axis=1)
    data = [filename] + list(mfccs_mean)
    df = pd.DataFrame([data], columns=['track_id'] + [f'MFCC_{i}' for i in range(1, 14)])
    df.to_csv(f"{FEATURE_OUTPUT_HOME}/{output_filename}", mode='a', header=False, index=False)


In [None]:
# Create CSV headers
headers = {
    'zero_crossings.csv': ['track_id', 'zero_crossings'],
    'tempo.csv': ['track_id', 'tempo'],
    'spectral_centroid.csv': ['track_id', 'spectral_centroid'],
    'spectral_rolloff.csv': ['track_id', 'spectral_rolloff'],
    'chroma_stft.csv': ['track_id'] + [f'chroma_stft_{i}' for i in range(1, 13)],
    'mfccs.csv': ['track_id'] + [f'MFCC_{i}' for i in range(1, 14)]
}

for key, value in headers.items():
    pd.DataFrame(columns=value).to_csv(f"{FEATURE_OUTPUT_HOME}/{key}", index=False)

In [None]:
file_list = os.listdir(AUDIO_FILE_HOME)

for filename in file_list:
    track_id = filename[:-4]
    print(f"track_id: ", track_id)
    print(f"filename: ", filename)
    
    y, sr = lb.load(f"{AUDIO_FILE_HOME}{filename}") 
    extract_zero_crossings(y, sr, track_id, 'zero_crossings.csv')
    extract_tempo(y, sr, track_id, 'tempo.csv')
    extract_spectral_centroid(y, sr, track_id, 'spectral_centroid.csv')
    extract_spectral_rolloff(y, sr, track_id, 'spectral_rolloff.csv')
    extract_chroma_stft(y, sr, track_id, 'chroma_stft.csv')
    extract_mfccs(y, sr, track_id, 'mfccs.csv')   
        