In [1]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os,IPython, librosa, mir_eval
from sys import platform
from os import listdir
from os.path import isfile, join,isdir
from IPython.display import Audio
from librosa.display import waveplot,specshow
from librosa.onset import onset_strength, onset_detect
from librosa.feature import melspectrogram, mfcc
from librosa import load

from collections import defaultdict,OrderedDict
import sklearn
from sklearn.preprocessing import StandardScaler,LabelEncoder
import scipy
from pandas import HDFStore,DataFrame



# Song Extraction from fma_small

1. Find the song folder path relative to the current computer
2. Retrieve the different genre classifications
3. Identify each song via its full path to song using index, to guarantee one-to-one mapping 
4. Sort in alphabetical order

In [None]:
#retrieving path to the fma_small directory and the corresponding meta data
HOME_DIR = IPython.utils.path.get_home_dir()

temp = join(HOME_DIR, 'Documents')
path_to_small_fma = join(temp, 'fma_small')
json_file = join(path_to_small_fma,'fma_small.json')
#locate meta_dta
print(path_to_small_fma)
df = pd.read_json(json_file)
print(json_file)


In [None]:
#only choose top genre as the label
df = df.loc[:,['top_genre']]

#ensure that the genre name matches file name in fma_small, 
#i.e Oldtime / Historian conflict issue
df['top_genre']=df['top_genre'].apply(lambda y: y.split(os.sep)[0].strip())
#locate each individual song by its full path
df['temp'] = path_to_small_fma
str_index = ["%.2d" % x for x in df.index]
complete_genre_list = df['top_genre'].unique()

#create full path to file and store as a single array
df['full_path_to_song'] = df.temp.map(str)+ "/"+ df['top_genre'].values+ "/"+ str_index+ ".mp3"
del df['temp']
#keep songs according to alphabetical order of songs 
df.sort_values(by = 'top_genre', inplace = True)
df.head()


In [None]:
complete_genre_list_df = pd.DataFrame(complete_genre_list, columns = ['Genre'])

In [None]:
#retrieve number of songs per genre
genre_and_count = df['top_genre'].value_counts().sort_index()
all_songs_path = df['full_path_to_song'].values  
print(genre_and_count)

In [None]:
song_counts = [] 
ordered_genres = [] 
full_song_df = OrderedDict()
genre_to_song_dict = {}


#retrieve number of songs per genre with the order preserved in two lists
#ordered_genres
#song_counts
for i,genre in enumerate(genre_and_count.index):
    ordered_genres.append(genre)
    temp = df['top_genre'].value_counts()[genre]
    song_counts.append(temp)

genre_to_song_zipped = zip(ordered_genres,song_counts)

num_of_genre = np.shape(genre_and_count)[0]
print("The number of genres is:", num_of_genre)

#dictionary with each song count and its corresponding genre
for genre,song_count in genre_to_song_zipped:
    genre_to_song_dict[genre] = song_count

genre_to_song_dict

In [None]:
num_of_genre = np.shape(genre_and_count)[0]
all_songs_path = df['full_path_to_song'].values

paths_dict = OrderedDict()

#prepend zero so we have a start point for all_songs_path 
#and avoid messing with indices

#use the cumulative sum to find none uniform ranges
song_counts.insert(0,0)
cumulative_sum = np.cumsum(song_counts,dtype=int)

#creates a dictionary of the genres and its corresponding path
for i,genre in enumerate(ordered_genres):
    str1=genre
    str2 = "_paths"
    genre_paths = "".join((str1,str2))
    paths_dict[genre_paths] = all_songs_path[cumulative_sum[i]:cumulative_sum[i+1]]

#paths_dict
#{genre_path_name: genre_paths}
print("{'Electronic_paths:[array_of_all_electronic_paths]}")

In [None]:
%%time
num_of_songs = 3
sampling_rate = 44100

genre_signals_dict = OrderedDict()
#creates a dictionary of the signals in a genre and their raw file
for genre_path_name,genre_paths in paths_dict.items():
    str1=genre_path_name[:-5]
    str2 = "signals"
    genre_signals = "".join((str1,str2))       
    try:
        first_three = genre_paths[:num_of_songs]
        genre_signals_dict[genre_signals] = [
        load(p,sr=None)[0] for p in first_three]
    except IOError as exc:
        print("Unable to locate folder")
        #raise IOError("%s: %s" % (genre_paths, exc.strerror))
        
#genre_signals_dict
#{genre_signals_name:genre_signals_paths}
print("{'Electronic_signals:[array_of_all_electronic_paths]}")

In [None]:
#plot the time series for each song according to the genres

#sig_lengths = []
for genre_signal_name,genre_signals in genre_signals_dict.items(): 
    for i, sig_amp in enumerate(genre_signals):
        plt.subplot(1, num_of_songs, i+1)
#        sig_lengths.append(len(sig_amp))
        waveplot(sig_amp)
        plt.ylim(-1, 1)
        plt.title(genre_signal_name)
    plt.figure()
        
    

## Testing the complete feature extraction on a single song

In [2]:
#home laptop
if "Ubuntu" in os.uname().version:
    song, sampling_rate = librosa.load("/home/chib/Documents/fma_small/Electronic/99289.mp3")

#lab macbook
elif "Darwin" in os.uname().version:
    song, sampling_rate = librosa.load("/Users/chibmac/Documents/fma_small/Electronic/99289.mp3")

else: 
    song, sampling_rate = librosa.load(librosa.util.example_audio_file())


In [3]:
song_length = len(song)
#number of chunks to split the song into
num_of_hops_per_section = 7
num_of_sections = 19

tot_num_of_hops = num_of_hops_per_section*num_of_sections

tot_num_of_hops

133

In [4]:
hop_width = int(np.floor(song_length/tot_num_of_hops))
hop_width

4971

In [5]:
section_width = hop_width*num_of_hops_per_section
section_width

34797

In [6]:
#make song a multiple of the hops
song = song[:(hop_width*tot_num_of_hops)]

song.shape

(661143,)

In [7]:
section_per_song = np.reshape(song,(num_of_sections,section_width))
section_per_song.shape

(19, 34797)

In [8]:
one_song_section = section_per_song[0,:]
one_song_section.shape

(34797,)

In [9]:
hop_per_section = np.reshape(one_song_section,(num_of_hops_per_section,hop_width))
hop_per_section.shape

(7, 4971)

In [11]:
num_of_mfcc = 12

In [12]:
mfcc_per_hop = librosa.feature.mfcc(y=one_song_section, sr=sampling_rate,
                                    n_fft = section_width, n_mfcc=num_of_mfcc,hop_length = hop_width).T

mfcc_per_hop.shape

(7, 12)

In [14]:
zcr_per_hop = np.sum(librosa.core.zero_crossings(hop_per_section),axis =1 )

zcr_per_hop.shape

(7,)

In [None]:
ind_unit_features=np.concatenate((unit_mfcc,np.array([unit_zcr]).T),axis=1)
ind_unit_features.shape

In [None]:
def complete_extract_features(song,num_of_sections,num_of_mfcc,genre):  
    complete_split = num_of_sections *num_of_mfcc        
    song_splitter = np.arange(0,song_length+1,song_length/num_of_sections,dtype=int)
    genres = np.ones(num_of_sections,dtype=int) * genre
    onset_frames = librosa.onset.onset_detect(y=song, sr=sampling_rate)
    for i in range(len(song_splitter)):
        unit_song = song[song_splitter[i]:song_splitter[i+1]]
        unit_song_length = unit_song.shape[0]
        window_width = np.int(unit_song_length/num_of_sections)+1
        #unit_mfcc = librosa.feature.mfcc(y=unit_song, sr=sampling_rate,n_mfcc=num_of_mfcc,hop_length = window_width).T
        
        split = int(len(unit_song)/num_of_sections)*num_of_sections
        unit_song= unit_song[:split]
    
        split_song = np.reshape(unit_song,(num_of_sections,int(unit_song_length/num_of_sections)))
        unit_zcr = np.sum(librosa.core.zero_crossings(split_song),axis =1)
        #ind_unit_features=np.concatenate((unit_mfcc,np.array([unit_zcr]).T),axis=1)
        ind_unit_features=np.array([unit_zcr]).T
        ind_unit_features = np.reshape(ind_unit_features,(1,np.size(ind_unit_features)))
               
        genre = np.matrix(genre)
        
        #rep_genre =np.array([np.ones(num_of_sections,dtype=int)* 24]).T 
        onset_frames = np.matrix(librosa.onset.onset_detect(y=unit_song, sr=sampling_rate))
        #print(onset_frames.shape)
        #print(genre.shape)
        #print(num_of_sections)
        #print(ind_unit_features.shape)
        temp = np.concatenate((ind_unit_features,onset_frames),axis=1)

        #print(full_feat_gen)
        
        cent = np.matrix(librosa.feature.spectral_centroid(y=unit_song, sr=sampling_rate, hop_length= window_width)[0]).mean(axis =1)
        #print(cent.shape)
        temp1 = np.concatenate((temp,cent),axis = 1)
        oenv = librosa.onset.onset_strength(y=unit_song, sr=sampling_rate, hop_length=window_width)
        tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sampling_rate,
                                      hop_length=window_width)
        ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
        ac_global = np.matrix(librosa.util.normalize(ac_global))
        #print ("ok",temp.shape)
        #print(tempogram.shape)
        #print("ok",temp1.shape)
        #print("on",ac_global.shape)
        temp2 = np.concatenate((temp1,ac_global),axis = 1)
        full_feat_gen = np.concatenate((temp2,genre),axis=1)
        #print(tempogram.shape)
        return full_feat_gen



In [None]:
test_out = complete_extract_features(song,num_of_sections,num_of_mfcc,10)

test_out.shape
test_out

## Generalisation of feature extraction for all the songs in the fma_small dataset


In [None]:
song_num = 0
tot_num_of_songs = cumulative_sum[-1]
indiv_song_path= []
for genre_path_name,genre_paths in paths_dict.items(): 
    song_num=song_num+1
    indiv_song_path.append(genre_paths)
  

indiv_song_path_list = np.array(indiv_song_path).reshape(tot_num_of_songs,)

In [None]:
indiv_song_path_list


In [None]:
genres = []
for song_num in range(len(indiv_song_path_list)):
    temp = indiv_song_path_list[song_num].split(os.sep)[-2]
    genres.append(temp)
        

In [None]:
le = LabelEncoder()
encoded_genres= le.fit(genres)
encoded_genres

#Label Encoding Mapping
encoder_df= pd.DataFrame(data= {'Genre':genres,
                   'Encoded_Genre':le.transform(genres)})
encoder_df.head()

In [None]:
print("I AM HERE")

In [None]:
%%time
tot_num_of_songs = cumulative_sum[-1]
final_d = np.zeros((tot_num_of_songs,test_out.shape[1]))
print(final_d.shape)

for song_num in range(len(indiv_song_path_list)):
    try:
        indiv_song_path = indiv_song_path_list[song_num]
        song_signal = librosa.load(indiv_song_path,sr=None)[0]
        curr_song_genre = encoder_df['Encoded_Genre'][song_num]
        final_d[song_num]= complete_extract_features(song_signal,num_of_sections,num_of_mfcc,curr_song_genre)

    except IOError as exc:
        print("Unable to locate folder")
    


            

In [None]:
final_d.shape

In [None]:
print("I AM HERE")

In [None]:

complete_df = pd.DataFrame(data=final_d.T,index =range(final_d.shape[1]))
complete_df = complete_df.T

complete_df.head()

In [None]:
complete_df.shape

In [None]:
encoder_df.head()

In [None]:
encoder_df.shape

In [None]:
final_df = pd.concat([complete_df,encoder_df],axis =1)

In [None]:
final_df.shape

In [None]:
final_df = pd.concat([complete_df,encoder_df],axis =1)
final_df.head()


In [None]:
final_df.shape

In [None]:
final_df.to_csv("complete_extract_with_onset.csv",sep=',')