In [4]:
# import ast
# from pandas.api.types import CategoricalDtype
import ipyparallel
# import librosa
# import numpy as np
# import os
# import pandas as pd
# import pywt
# from scipy.stats import skew
import sys
MODULE_PATH = '/home/macbookretina/automatic-music-genre-classification/feature_extraction_deep_learning'
sys.path.insert(1, MODULE_PATH)
from custom_module.utilities import *

In [2]:
# FMA
# collect track id and genres of tracks in the small subset.

print('collecting track id and genres of tracks in the small subset of fma dataset')

tracks = load(MOUNTED_DATASET_PATH + '/fma_metadata/tracks.csv')
fma_full = tracks[[('set', 'subset'), ('track', 'genre_top')]]
small_subset = fma_full[('set', 'subset')] == 'small'
fma_small = fma_full[small_subset]
fma_small = pd.DataFrame({
    'subset': fma_small[('set', 'subset')],
    'label': fma_small[('track', 'genre_top')]
})

print('done')

collecting track id and genres of tracks in the small subset of fma dataset
done


In [12]:
# attach to a running cluster to notebook
cluster = ipyparallel.Client()

# Print profile name and process id numbers
print('profile:', cluster.profile)
print("IDs:", cluster.ids)

profile: default
IDs: [0, 1, 2, 3, 4, 5, 6, 7]


In [13]:
# split into tow set of 4 engines each and get DirectView object of each set.
direct_views_first_4_engines = cluster[:4]
direct_views_last_4_engines = cluster[5:]

In [14]:
# sync import for each DirectView object
with direct_views_first_4_engines.sync_imports():
    remote_imports()
    
with direct_views_last_4_engines.sync_imports():
    remote_imports()

importing ast on engine(s)
importing CategoricalDtype from pandas.api.types on engine(s)
importing ipyparallel on engine(s)
importing librosa on engine(s)
importing numpy on engine(s)
importing os on engine(s)
importing pandas on engine(s)
importing pywt on engine(s)
importing skew from scipy.stats on engine(s)


KeyboardInterrupt: 

In [None]:
# initialize shared resouces for each DirectView object
direct_views_first_4_engines.push({
    'MOUNTED_DATASET_PATH': MOUNTED_DATASET_PATH,
    'LOCAL_MOUNTED_DATASET_PATH': LOCAL_MOUNTED_DATASET_PATH,
    'SAMPLE_FILE': SAMPLE_FILE,
    'GENRES': GENRES,
    'extract_audio_features': extract_audio_features,
    'dataframe': dataframe,
    'fma_small': fma_small,
})

direct_views_last_4_engines.push({
    'MOUNTED_DATASET_PATH': MOUNTED_DATASET_PATH,
    'LOCAL_MOUNTED_DATASET_PATH': LOCAL_MOUNTED_DATASET_PATH,
    'SAMPLE_FILE': SAMPLE_FILE,
    'GENRES': GENRES,
    'extract_audio_features': extract_audio_features,
    'dataframe': dataframe,
    'fma_small': fma_small,
})

In [None]:
# activate DirectView object and set suffix
direct_views_first_4_engines.activate('_first')
direct_views_last_4_engines.activate('_last')

In [8]:
# GTZAN
# extract features

print('extracting features from gtzan dataset')

for file in os.scandir(MOUNTED_DATASET_PATH + '/gtzan/wavfiles'):
    if file.is_file():
        genre_label = str(file.name).split('.')[0]
        if genre_label in GENRES:
            dataframe = extract_audio_features(dataframe, file, genre_label, 'gtzan')

print('done')

extracting features from gtzan dataset
appended features extracted from hiphop.00000.wav with genre: hiphop
appended features extracted from hiphop.00001.wav with genre: hiphop
appended features extracted from hiphop.00002.wav with genre: hiphop
appended features extracted from hiphop.00003.wav with genre: hiphop


KeyboardInterrupt: 

In [9]:
# FMA
# extract features

print('extracting features from fma dataset')

for directory in os.scandir(MOUNTED_DATASET_PATH + '/fma_small'):
    if directory.is_dir():
        for file in os.scandir(directory.path):
            if file.is_file():
                track_id = int(file.name[:-4].lstrip('0'))
                # map track id to genre label
                genre_label = fma_small.at[track_id, 'label'].lower().replace('-', '')
                if genre_label in GENRES:
                    dataframe = extract_audio_features(dataframe, file, genre_label, 'fma')

print(done)

extracting features from fma dataset




appended features extracted from 000002.mp3 with genre: hiphop




appended features extracted from 000005.mp3 with genre: hiphop


KeyboardInterrupt: 

In [41]:
# check size of  log-mel / constant-Q transform in gtzan

# creating an empty list to store sizes in
sizes_1 = []

print('extracting log-mel from gtzan dataset')

for file in os.scandir(MOUNTED_DATASET_PATH + '/gtzan/wavfiles'):
    if file.is_file():
        
        # extract genre label
        genre_label = str(file.name).split('.')[0]
        
        if genre_label in GENRES:
            
            # get sample rate of audio file
            sample_rate = librosa.core.get_samplerate(file.path)

            # load audio file as time series
            time_series, _ = librosa.core.load(file.path, sample_rate)

            # compute cqt
            cqt = librosa.cqt(time_series, sample_rate)

            # convert from amplitude to decibels unit
            scaled_cqt = librosa.amplitude_to_db(cqt, ref=np.max) 

            # adding the size to the list
            sizes.append(scaled_cqt.shape)

# check if all sizes are the same
print('The sizes of all the log-mel in our data set are equal: ' +  str(len(set(sizes)) == 1))

# check the max size
print('The maximum size is: ' + str(max(sizes)))

extracting log-mel from gtzan dataset




The sizes of all the log-mel in our data set are equal: False
The maximum size is: (84, 1320)


In [42]:
# check size of mel-spectogram in gtzan

# creating an empty list to store sizes in
sizes_2 = []

print('extracting mel-spectogram from gtzan dataset')

for file in os.scandir(MOUNTED_DATASET_PATH + '/gtzan/wavfiles'):
    if file.is_file():
        
        # extract genre label
        genre_label = str(file.name).split('.')[0]
        
        if genre_label in GENRES:
            
            # get sample rate of audio file
            sample_rate = librosa.core.get_samplerate(file.path)

            # load audio file as time series
            time_series, _ = librosa.core.load(file.path, sample_rate)

            # compute spectogram
            mel_spect = librosa.feature.melspectrogram(time_series, sample_rate)

            # convert spectogram to decibels unit 
            scaled_mel_spect = librosa.power_to_db(mel_spect, ref=np.max)

            # adding the size to the list
            sizes.append(scaled_mel_spect.shape)
    
# check if all sizes are the same
print('The sizes of all the log-mel in our data set are equal: ' +  str(len(set(sizes)) == 1))

# check the max size
print('The maximum size is: ' + str(max(sizes)))

extracting mel-spectogram from gtzan dataset
The sizes of all the log-mel in our data set are equal: False
The maximum size is: (128, 1320)


In [11]:
%%px_first --targets 0 --noblock
# check size of  log-mel / constant-Q transform in fma
    
# creating an empty list to store sizes in
sizes = []

print('extracting log-mel from fma dataset')

for directory in os.scandir(MOUNTED_DATASET_PATH + '/fma_small'):
    if directory.is_dir():
        for file in os.scandir(directory.path):
            if file.is_file():

                # extract track id
                track_id = int(file.name[:-4].lstrip('0'))

                # map track id to genre label
                genre_label = fma_small.at[track_id, 'label'].lower().replace('-', '')

                if genre_label in GENRES:

                    # get sample rate of audio file
                    sample_rate = librosa.core.get_samplerate(str(file.path))

                    # load audio file as time series
                    time_series, _ = librosa.core.load(file.path, sample_rate)

                    # compute cqt
                    cqt = librosa.cqt(time_series, sample_rate)

                    # convert from amplitude to decibels unit
                    scaled_cqt = librosa.amplitude_to_db(cqt, ref=numpy.max) 

                    # adding the size to the list
                    sizes.append(scaled_cqt.shape)

# check if all sizes are the same
print('The sizes of all the log-mel in our data set are equal: ' +  str(len(set(sizes)) == 1))

# check the max size
print('The maximum size is: ' + str(max(sizes)))

UsageError: Cell magic `%%px_first` not found.


In [17]:
%%px_last --targets 4 --noblock
# check size of  mel-spectogram  in fma

# creating an empty list to store sizes in
sizes = []

print('extracting mel-spectogram  from fma dataset')

for directory in os.scandir(MOUNTED_DATASET_PATH + '/fma_small'):
    if directory.is_dir():
        for file in os.scandir(directory.path):
            if file.is_file():

                # extract track id
                track_id = int(file.name[:-4].lstrip('0'))

                # map track id to genre label
                genre_label = fma_small.at[track_id, 'label'].lower().replace('-', '')

                if genre_label in GENRES:

                    # get sample rate of audio file
                    sample_rate = librosa.core.get_samplerate(file.path)

                    # load audio file as time series
                    time_series, _ = librosa.core.load(file.path, sample_rate)

                    # compute spectogram
                    mel_spect = librosa.feature.melspectrogram(time_series, sample_rate)

                    # convert spectogram to decibels unit 
                    scaled_mel_spect = librosa.power_to_db(mel_spect, ref=numpy.max)

                    # adding the size to the list
                    sizes.append(scaled_mel_spect.shape)                

# check if all sizes are the same
print('The sizes of all the log-mel in our data set are equal: ' +  str(len(set(sizes)) == 1))

# check the max size
print('The maximum size is: ' + str(max(sizes)))

<AsyncResult: execute>

In [None]:
%pxresult_first

In [None]:
%pxresult_last

In [None]:
# extract log-mel / constant-Q transform in gtzan

# create an empty list to store extract feature and label
cqts = []
genre_labels_1 = []

print('extracting log-mel from gtzan dataset')

for file in os.scandir(MOUNTED_DATASET_PATH + '/gtzan/wavfiles'):
    if file.is_file():
        # extract genre label and append to list
        genre_label = str(file.name).split('.')[0]
        genre_labels_1.append(genre_label)
        
        if genre_label in GENRES:
        
            # get sample rate of audio file
            sample_rate = librosa.core.get_samplerate(file.path)

            # load audio file as time series
            time_series, _ = librosa.core.load(file.path, sample_rate)

            # compute cqt
            cqt = librosa.cqt(time_series, sample_rate)

            # convert from amplitude to decibels unit
            scaled_cqt = librosa.amplitude_to_db(cqt, ref=np.max) 

            # adjust the size to (84, 1320) as it is the max size
            if scaled_cqt.shape[1] != 1320:
                scaled_cqt.resize(84, 1320, refcheck=False)

            # flatten to fit into dataframe and add to the list
            scaled_cqt = scaled_cqt.flatten()
            cqts.append(scaled_cqt)
        
# convert the lists to arrays so it can be stacked
cqts = np.array(cqts)
genre_labels = np.array(genre_labels).reshape(1000, 1)

# create dataframe
cqt_df = pd.DataFrame(np.hstack((genre_labels, cqts)))    

In [None]:
# extract mel-spectogram in gtzan

# create an empty list to store extract feature and label
mel_spects = []
genre_labels_2 = []

print('extracting mel-spectogram from gtzan dataset')

for file in os.scandir(MOUNTED_DATASET_PATH + '/gtzan/wavfiles'):
    if file.is_file():
        # extract genre label and append to list
        genre_label = str(file.name).split('.')[0]
        genre_labels_2.append(genre_label)
        
        if genre_label in GENRES:
        
            # get sample rate of audio file
            sample_rate = librosa.core.get_samplerate(file.path)

            # load audio file as time series
            time_series, _ = librosa.core.load(file.path, sample_rate)

            # compute spectogram
            mel_spect = librosa.feature.melspectrogram(time_series, sample_rate)

            # convert spectogram to decibels unit 
            scaled_mel_spect = librosa.power_to_db(mel_spect, ref=np.max)

            # adjust the size to (128, 1320) as it is the max size
            if scaled_mel_spect.shape[1] != 1320:
                scaled_mel_spect.resize(128, 1320, refcheck=False)

            # flatten to fit into dataframe and add to the list
            scaled_mel_spect = scaled_mel_spect.flatten()
            mel_spects.append(scaled_mel_spect)
        
# convert the lists to arrays so it can be stacked
mel_spects = np.array(mel_spects)
genre_labels = np.array(genre_labels).reshape(1000, 1)

# create dataframe
mel_spect_df = pd.DataFrame(np.hstack((genre_labels, mel_spects)))

In [12]:
%%px_first --targets 0 --noblock
print('start 1')
import time
time.sleep(10)
print(time.localtime())

<AsyncResult: execute>

In [14]:
%pxresult_first

start 1
time.struct_time(tm_year=2020, tm_mon=5, tm_mday=25, tm_hour=16, tm_min=29, tm_sec=16, tm_wday=0, tm_yday=146, tm_isdst=0)


In [13]:
%%px_last --targets 4 --noblock
print('start 2')
import time
time.sleep(6)
print(time.localtime())

<AsyncResult: execute>

In [15]:
%pxresult_last

start 2
time.struct_time(tm_year=2020, tm_mon=5, tm_mday=25, tm_hour=16, tm_min=29, tm_sec=14, tm_wday=0, tm_yday=146, tm_isdst=0)


In [None]:
%px_ %connect_info