In [1]:
import numpy as np
import pandas as pd
import librosa
import librosa.display
from pathlib import Path
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
import warnings
from PIL import Image
import gc

In [2]:
# https://queirozf.com/entries/suppressing-ignoring-warnings-in-python-reference-and-examples

# warning happens when reading mp3 files. It is routine and can be ignored
warnings.filterwarnings('ignore', message="PySoundFile failed. Trying audioread instead.")

In [3]:
def load_data_str_idx(filepath):
    """Loads csv and converts index to string."""
    df = pd.read_csv(filepath, index_col=0)
    df.index = df.index.astype('string')
    return df
    
df = load_data_str_idx('../data/features_with_genres.csv')
df.head()

Unnamed: 0_level_0,chroma_cens_kurtosis_01,chroma_cens_kurtosis_02,chroma_cens_kurtosis_03,chroma_cens_kurtosis_04,chroma_cens_kurtosis_05,chroma_cens_kurtosis_06,chroma_cens_kurtosis_07,chroma_cens_kurtosis_08,chroma_cens_kurtosis_09,chroma_cens_kurtosis_10,...,zcr_kurtosis_01,zcr_max_01,zcr_mean_01,zcr_median_01,zcr_min_01,zcr_skew_01,zcr_std_01,track_genre_top,track_title,artist_name
track_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,7.180653,5.230309,0.249321,1.34762,1.482478,0.531371,1.481593,2.691455,0.866868,1.341231,...,5.75889,0.459473,0.085629,0.071289,0.0,2.089872,0.061448,Hip-Hop,Food,AWOL
5,0.527563,-0.077654,-0.27961,0.685883,1.93757,0.880839,-0.923192,-0.927232,0.666617,1.038546,...,6.808415,0.375,0.053114,0.041504,0.0,2.193303,0.044861,Hip-Hop,This World,AWOL
10,3.702245,-0.291193,2.196742,-0.234449,1.367364,0.998411,1.770694,1.604566,0.521217,1.982386,...,21.434212,0.452148,0.077515,0.071777,0.0,3.542325,0.0408,Pop,Freeway,Kurt Vile
140,0.533579,-0.623885,-1.086205,-1.081079,-0.765151,-0.072282,-0.882913,-0.582376,-0.884749,-0.645214,...,11.052547,0.379395,0.052379,0.036621,0.001953,3.143968,0.057712,Folk,Queen Of The Wires,Alec K. Redfearn & the Eyesores
141,0.172898,-0.284804,-1.169662,-1.062855,-0.706868,-0.708281,-0.204884,0.023624,-0.64277,-0.786291,...,32.994659,0.415527,0.040267,0.034668,0.00293,4.204097,0.028665,Folk,Ohio,Alec K. Redfearn & the Eyesores


In [16]:
def generate_mfcc_spectrograms(data):
    """Generates spectrograms of the mel frequency cepstral coefficients. Mp3
    files from the free music archive should be stored in an an adjacent
    directory named 'data'. Returns a dictionary of the flattened MFCCs. To
    return the MFCCs back into a two dimensional array reshape to (20,1291).
    
    Parameters
    ----------
    data - dataframe
        - Dataframe with track ids as index and a column named 'track_genre_top'
          which contains the target genre."""
    
    data_dict = {}
    
    for i, track_id in enumerate(data.index):
        
        top_genre = data.loc[track_id]['track_genre_top']

        track_str = str(track_id).zfill(6)

        y, sr = librosa.load(f'../data/fma_small/{track_str[:3]}/{track_str}.mp3')

        mfcc = librosa.feature.mfcc(y, sr)

        data_dict[track_str] = mfcc.flatten()
        
        librosa.display.specshow(mfcc, cmap='binary')
        plt.savefig(f'../mfcc_specs/{top_genre}/{top_genre}_{track_id}.png')
        plt.close()

    #     making it verbose
        if i % 300 == 0:
            print(f'currently on round {i}, track {track_str}, only\
 {len(data.index)-i} left')
    
    return data_dict

In [17]:
mfcc_dict = generate_mfcc_spectrograms(df)

currently on round 0, track 000002, only7994 left
currently on round 300, track 006439, only7694 left
currently on round 600, track 012390, only7394 left
currently on round 900, track 021587, only7094 left
currently on round 1200, track 029738, only6794 left
currently on round 1500, track 038888, only6494 left
currently on round 1800, track 044806, only6194 left
currently on round 2100, track 052001, only5894 left
currently on round 2400, track 056640, only5594 left
currently on round 2700, track 062529, only5294 left
currently on round 3000, track 068600, only4994 left
currently on round 3300, track 073366, only4694 left
currently on round 3600, track 080755, only4394 left
currently on round 3900, track 086799, only4094 left
currently on round 4200, track 093704, only3794 left
currently on round 4500, track 099440, only3494 left
currently on round 4800, track 108305, only3194 left
currently on round 5100, track 111376, only2894 left
currently on round 5400, track 114411, only2594 left

In [20]:
counter = 0
for track_idx, mfcc_array in mfcc_dict.items():
    if len(mfcc_array) > 25820:
        counter += 1
counter

3767

In [25]:
smallest_array = 10*1000000
for track_idx, array in mfcc_dict.items():
    if array.shape[0] < smallest_array:
        smallest_array = array.shape[0]
        smallest_array_idx = track_idx
smallest_array_idx, smallest_array

('000002', 25820)

In [21]:
counter = 0
for track_idx, mfcc_array in mfcc_dict.items():
    if len(mfcc_array) > 25820:
        counter += len(mfcc_array) - 25820
counter

142340

In [28]:
# create equal track length
for track_idx, array in mfcc_dict.items():
    mfcc_dict[track_idx] = array[:25820]

In [30]:
mfcc_df = pd.DataFrame(mfcc_dict).T
mfcc_df.to_csv('../data/mfcc_df.csv')

In [6]:
def generate_spectrograms(data):
    """Generates spectrograms of the mel frequency scale. Mp3 files from the
    free music archive should be stored in an an adjacent directory named
    'data'. Returns None.
    
    Parameters
    ----------
    data - dataframe
        - Dataframe with track ids as index and a column named 'track_genre_top'
          which contains the target genre."""
    
    for i, track_id in enumerate(data.index):

        top_genre = data.loc[track_id]['track_genre_top']

        track_str = str(track_id).zfill(6)

        y, sr = librosa.load(f'../data/fma_small/{track_str[:3]}/{track_str}.mp3')

        mel_spec = librosa.feature.melspectrogram(y, sr)

        librosa.display.specshow(librosa.power_to_db(mel_spec,
                                                     ref=np.max), cmap='binary')
        plt.savefig(f'../mel_specs/{top_genre}/{top_genre}_{track_id}.png')
        plt.close()

        #     making it verbose
        if i % 300 == 0:
            print(f'currently on round {i}, track {track_str}, only {len(track_ids)-i} left');

        #     attempted memory management

        gc.collect()
            
    return None

generate_mel_specs(df.index)

NameError: name 'generate_mel_specs' is not defined

In [7]:
def generate_mel_specs(data):
    """Generates grayscale spectrograms of the mel frequency cepstral
    coefficients. Mp3 files from the free music archive should be stored in an
    an adjacent directory named 'data'. Returns None
    
    Parameters
    ----------
    data - dataframe
        - Dataframe with track ids as index and a column named 'track_genre_top'
          which contains the target genre."""
    
    for i, track_id in enumerate(data.index):

        top_genre = data.loc[track_id]['track_genre_top']

        track_str = str(track_id).zfill(6)

        y, sr = librosa.load(f'../data/fma_small/{track_str[:3]}/{track_str}.mp3')

        mel_spec = librosa.feature.melspectrogram(y, sr)

        librosa.display.specshow(librosa.power_to_db(mel_spec,
                                                     ref=np.max), cmap='binary')
        plt.savefig(f'../mel_specs/{top_genre}/{top_genre}_{track_id}.png')
        plt.close()

        rgb_img = Image.open(f'../mel_specs/{top_genre}/{top_genre}_{track_id}.png')

        grey_img = rgb_img.convert('L')

        grey_img.save(f'../mel_specs/{top_genre}/{top_genre}_{track_id}.png')


        #     making it verbose
        if i % 300 == 0:
            print(f'currently on round {i}, track {track_str}, only {len(track_ids)-i} left');

        #     memory management

        gc.collect()
            
    return None

generate_mel_specs(df.index)

currently on round 0, track 000002, only 7994 left


KeyboardInterrupt: 

above cell currently uses up memory over time and crashes. requires manual restart. figure it out