In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os,IPython, librosa, mir_eval
from sys import platform
from os import listdir
from os.path import isfile, join,isdir
from IPython.display import Audio
from librosa.display import waveplot,specshow
from librosa.onset import onset_strength, onset_detect
from librosa.feature import melspectrogram, mfcc
from librosa import load

from collections import defaultdict,OrderedDict
import sklearn
from sklearn.preprocessing import StandardScaler,LabelEncoder
import scipy
from pandas import HDFStore,DataFrame



# Song Extraction from fma_small

1. Find the song folder path relative to the current computer
2. Retrieve the different genre classifications
3. Identify each song via its full path to song using index, to guarantee one-to-one mapping 
4. Sort in alphabetical order

In [None]:
#retrieving path to the fma_small directory and the corresponding meta data
HOME_DIR = IPython.utils.path.get_home_dir()

temp = join(HOME_DIR, 'Documents')
path_to_small_fma = join(temp, 'fma_small')
json_file = join(path_to_small_fma,'fma_small.json')
#locate meta_dta
print(path_to_small_fma)
df = pd.read_json(json_file)
print(json_file)


In [None]:
#only choose top genre as the label
df = df.loc[:,['top_genre']]

#ensure that the genre name matches file name in fma_small, 
#i.e Oldtime / Historian conflict issue
df['top_genre']=df['top_genre'].apply(lambda y: y.split(os.sep)[0].strip())
#locate each individual song by its full path
df['temp'] = path_to_small_fma
str_index = ["%.2d" % x for x in df.index]
complete_genre_list = df['top_genre'].unique()

#create full path to file and store as a single array
df['full_path_to_song'] = df.temp.map(str)+ "/"+ df['top_genre'].values+ "/"+ str_index+ ".mp3"
del df['temp']
#keep songs according to alphabetical order of songs 
df.sort_values(by = 'top_genre', inplace = True)
df.head()


In [None]:
complete_genre_list_df = pd.DataFrame(complete_genre_list, columns = ['Genre'])

In [None]:
#retrieve number of songs per genre
genre_and_count = df['top_genre'].value_counts().sort_index()
all_songs_path = df['full_path_to_song'].values  
print(genre_and_count)

In [None]:
song_counts = [] 
ordered_genres = [] 
full_song_df = OrderedDict()
genre_to_song_dict = {}


#retrieve number of songs per genre with the order preserved in two lists
#ordered_genres
#song_counts
for i,genre in enumerate(genre_and_count.index):
    ordered_genres.append(genre)
    temp = df['top_genre'].value_counts()[genre]
    song_counts.append(temp)

genre_to_song_zipped = zip(ordered_genres,song_counts)

num_of_genre = np.shape(genre_and_count)[0]
print("The number of genres is:", num_of_genre)

#dictionary with each song count and its corresponding genre
for genre,song_count in genre_to_song_zipped:
    genre_to_song_dict[genre] = song_count

genre_to_song_dict

In [None]:
num_of_genre = np.shape(genre_and_count)[0]
all_songs_path = df['full_path_to_song'].values

paths_dict = OrderedDict()

#prepend zero so we have a start point for all_songs_path 
#and avoid messing with indices

#use the cumulative sum to find none uniform ranges
song_counts.insert(0,0)
cumulative_sum = np.cumsum(song_counts,dtype=int)

#creates a dictionary of the genres and its corresponding path
for i,genre in enumerate(ordered_genres):
    str1=genre
    str2 = "_paths"
    genre_paths = "".join((str1,str2))
    paths_dict[genre_paths] = all_songs_path[cumulative_sum[i]:cumulative_sum[i+1]]

#paths_dict
#{genre_path_name: genre_paths}
print("{'Electronic_paths:[array_of_all_electronic_paths]}")

In [None]:
%%time
num_of_songs = 3
sampling_rate = 44100

genre_signals_dict = OrderedDict()
#creates a dictionary of the signals in a genre and their raw file
for genre_path_name,genre_paths in paths_dict.items():
    str1=genre_path_name[:-5]
    str2 = "signals"
    genre_signals = "".join((str1,str2))       
    try:
        first_three = genre_paths[:num_of_songs]
        genre_signals_dict[genre_signals] = [
        load(p,sr=None)[0] for p in first_three]
    except IOError as exc:
        print("Unable to locate folder")
        #raise IOError("%s: %s" % (genre_paths, exc.strerror))
        
#genre_signals_dict
#{genre_signals_name:genre_signals_paths}
print("{'Electronic_signals:[array_of_all_electronic_paths]}")

In [None]:
#plot the time series for each song according to the genres

#sig_lengths = []
for genre_signal_name,genre_signals in genre_signals_dict.items(): 
    for i, sig_amp in enumerate(genre_signals):
        plt.subplot(1, num_of_songs, i+1)
#        sig_lengths.append(len(sig_amp))
        waveplot(sig_amp)
        plt.ylim(-1, 1)
        plt.title(genre_signal_name)
    plt.figure()
        
    

## Testing the complete feature extraction on a single song

In [None]:
#home laptop
if "Ubuntu" in os.uname().version:
    song, sampling_rate = librosa.load("/home/chib/Documents/fma_small/Electronic/99289.mp3")

#lab macbook
elif "Darwin" in os.uname().version:
    song, sampling_rate = librosa.load("/Users/chibmac/Documents/fma_small/Electronic/99289.mp3")

else: 
    song, sampling_rate = librosa.load(librosa.util.example_audio_file())
    
song.shape


In [None]:
song_length = len(song)
#number of chunks to split the song into
num_of_hops_per_section = 3
num_of_sections = 7
num_of_features = 5

tot_num_of_hops = num_of_hops_per_section*num_of_sections

tot_num_of_hops

In [None]:
hop_width = int(np.floor(song_length/tot_num_of_hops))
hop_width

In [None]:
section_width = hop_width*num_of_hops_per_section
section_width

In [None]:
#make song a multiple of the hops
song = song[:(hop_width*tot_num_of_hops)]

song.shape

In [None]:
#sps - section per song
section_per_song = np.reshape(song,(1,num_of_sections,section_width))
sps_song,sps_sect,sps_width =  section_per_song.shape

print("Song number: %d, Number of sections:%d, Song Width: %d" %(sps_song,sps_sect,sps_width) )

section_per_song.shape

In [None]:
#ons - one song section
one_song_section = section_per_song[0,0,:]
one_song_section = np.reshape(one_song_section,(1,1,one_song_section.shape[0]))
ons_song,ons_sect,ons_sect_width = one_song_section.shape


print("Song number: %d, Section Number:%d, Section Width: %d" %(ons_sect,ons_song,ons_sect_width) )

one_song_section.shape

In [None]:
#hps - hops per section
hop_per_section = np.reshape(one_song_section,(1,1,num_of_hops_per_section,hop_width))
hps_song, hps_sect,hps_hops,hps_hop_width = hop_per_section.shape

print("Song number: %d, Section Number:%d, Number of hops: %d, Hop Width:%d" %(hps_song,hps_sect,hps_hops,hps_hop_width))                                                                                                          
hop_per_section.shape

In [None]:
num_of_mfcc = 12

In [None]:
one_song_section.shape

In [None]:
hop_width

In [None]:
section_width

In [None]:
int(section_width/num_of_hops_per_section) - 1 

In [None]:
one_song_section_squeezed = np.squeeze(one_song_section)
complete_n_fft = hop_width+1
mfcc_per_section = librosa.feature.mfcc(y=one_song_section_squeezed, sr=sampling_rate,
                                    n_fft = complete_n_fft , n_mfcc=num_of_mfcc,hop_length = hop_width).T

mfcc_per_section = np.reshape(mfcc_per_section,(1,1,mfcc_per_section.shape[0],mfcc_per_section.shape[1]))
mps_song,mps_sect,mps_hops,mps_mfccs = mfcc_per_section.shape

print("Song number: %d, Section Number:%d, Number of hops: %d, Number of mfccs:%d" %(mps_song,mps_sect,mps_hops, mps_mfccs))  
mfcc_per_section.shape

In [None]:
hop_per_section.shape

In [None]:
#zps - zero crossing rate per section
zcr_per_section = np.sum(librosa.core.zero_crossings(np.squeeze(hop_per_section)),axis =1 )

zcr_per_section= np.reshape(zcr_per_section,(1,1,len(zcr_per_section),1))
zps_song,zps_sect,zps_hops,zps_per_hop= zcr_per_section.shape
print("Song number: %d, Section Number:%d, Number of hops: %d, ZCR per hop:%d" %(zps_song,zps_sect,zps_hops,zps_per_hop)) 


In [None]:
onset_frames = librosa.onset.onset_detect(y=one_song_section_squeezed, sr=sampling_rate,units = 'samples')
onset_length = len(onset_frames)
onset_frames

In [None]:
song_splitter = np.arange(0,len(one_song_section_squeezed)+1, hop_width,dtype=int)
split_length = len(song_splitter)
song_splitter

In [None]:
total_onsets = [0]
for i in range(1,len(song_splitter)):
    total_onsets.append(np.sum(onset_frames<song_splitter[i]))

total_onsets

In [None]:
onsets_per_section = np.reshape(np.diff(total_onsets),(one_song_section.shape[0],one_song_section.shape[1],len(np.diff(total_onsets)),1))
ops_song,ops_sect,ops_hops,ops_per_hop = onsets_per_section.shape
print("Song number: %d, Section Number:%d, Number of hops: %d, Number of Onsets per hop:%d" %(ops_song,ops_sect,ops_hops,ops_per_hop)) 

onsets_per_section


In [None]:
one_song_section_squeezed.shape

In [None]:
hop_width

In [None]:
librosa.stft(y=one_song_section_squeezed,  hop_length= hop_width,n_fft=complete_n_fft).shape

In [None]:
spectral_centroid_per_section = librosa.feature.spectral_centroid(y=one_song_section_squeezed, sr=sampling_rate, hop_length= hop_width,n_fft=complete_n_fft).T
spectral_centroid_per_section = np.reshape(spectral_centroid_per_section,(1,1,spectral_centroid_per_section.shape[0],spectral_centroid_per_section.shape[1]))
scps_song,scps_sect,scps_hops, scps_per_hop =  spectral_centroid_per_section.shape
print("Song number: %d, Section Number:%d, Number of hops: %d, Number of spectral centroids per hop:%d" %(scps_song,scps_sect,scps_hops,scps_per_hop)) 


In [None]:
oenv = librosa.onset.onset_strength(y=one_song_section_squeezed, sr=sampling_rate, hop_length=hop_width, n_fft =complete_n_fft)
tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sampling_rate,
                              hop_length=hop_width)
ac_global_temp = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
ac_global = np.reshape(ac_global_temp, (one_song_section.shape[0],one_song_section.shape[1],len(ac_global_temp),1))
ac_global.shape

In [None]:
print("MFCC Per section: ",mfcc_per_section.shape,"ZCR per section:",zcr_per_section.shape,
      "SPC Per section: ",spectral_centroid_per_section.shape,"Onsets Per section: ",onsets_per_section.shape,
      "AC global Per section: ",ac_global.shape
     )

In [None]:
features_per_section = np.concatenate((mfcc_per_section,zcr_per_section,onsets_per_section,spectral_centroid_per_section,ac_global),axis =3)

fps_song,fps_sect,fps_hops,fps_per_hop= features_per_section.shape

features_per_section.shape

In [None]:
features_per_section = np.reshape(features_per_section,(1,1,fps_hops*fps_per_hop))
features_per_section.shape

In [None]:
def complete_extract_features(song,num_of_sections,num_of_hops_per_section,num_of_mfcc,num_of_features,genre):
    total_num_of_features = (num_of_features-1 + num_of_mfcc)* num_of_hops_per_section
    song_length = len(song)
    tot_num_of_hops = num_of_hops_per_section*num_of_sections
    hop_width = int(np.floor(song_length/tot_num_of_hops))
    section_width = hop_width*num_of_hops_per_section
    song = song[:(hop_width*tot_num_of_hops)]
    section_per_song = np.reshape(song,(1,num_of_sections,section_width))
    sps_song,sps_sect,sps_width =  section_per_song.shape
    
    #print(section_per_song.shape)
    try:
        for i in range(sps_sect):
            #print(i)
            complete_n_fft = hop_width+1
            one_song_section =section_per_song[0,i,:]
            one_song_section = np.reshape(one_song_section,(1,1,one_song_section.shape[0]))
            #print(one_song_section.shape)
            hop_per_section = np.reshape(one_song_section,(1,1,num_of_hops_per_section,hop_width))
            #print(hop_per_section.shape)
            one_song_section_squeezed = np.squeeze(one_song_section)
            #print(one_song_section_squeezed.shape)

            mfcc_per_section = librosa.feature.mfcc(y=one_song_section_squeezed, sr=sampling_rate,
                                        n_fft = complete_n_fft, n_mfcc=num_of_mfcc,hop_length = hop_width).T
            mfcc_per_section = np.reshape(mfcc_per_section,(1,1,mfcc_per_section.shape[0],mfcc_per_section.shape[1]))
            #print("mfcc",mfcc_per_section.shape)
            zcr_per_section = np.sum(librosa.core.zero_crossings(np.squeeze(hop_per_section)),axis =1 )
            #print(zcr_per_section.shape)
            zcr_per_section= np.reshape(zcr_per_section,(1,1,len(zcr_per_section),1))
            #print("zcr",zcr_per_section.shape)
            onset_frames = librosa.onset.onset_detect(y=one_song_section_squeezed, sr=sampling_rate,units = 'samples')
            song_splitter = np.arange(0,len(one_song_section_squeezed)+1, hop_width,dtype=int)
            total_onsets = [0]
            for j in range(1,len(song_splitter)):
                total_onsets.append(np.sum(onset_frames<song_splitter[j]))
            onsets_per_section = np.reshape(np.diff(total_onsets),(1,1,len(np.diff(total_onsets)),1))
            #print("onset",onsets_per_section.shape)
            spectral_centroid_per_section = librosa.feature.spectral_centroid(y=one_song_section_squeezed, sr=sampling_rate, hop_length= hop_width,n_fft=complete_n_fft).T
            spectral_centroid_per_section = np.reshape(spectral_centroid_per_section,(one_song_section.shape[0],one_song_section.shape[1],spectral_centroid_per_section.shape[0],spectral_centroid_per_section.shape[1]))
            #print("spectral centroid",spectral_centroid_per_section.shape)
            #oenv = librosa.onset.onset_strength(y=one_song_section_squeezed, sr=sampling_rate, hop_length=hop_width, n_fft = (2*hop_width))
            #tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sampling_rate,
            #                              hop_length=hop_width)
            #ac_global_temp = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])
            #ac_global = np.reshape(ac_global_temp, (one_song_section.shape[0],one_song_section.shape[1],len(ac_global_temp),1))
            #print("ac glob",ac_global.shape)
            feats_per_section = np.concatenate((mfcc_per_section,zcr_per_section,onsets_per_section,spectral_centroid_per_section),axis =3)
            fps_song,fps_sect,fps_hops,fps_per_hop= feats_per_section.shape
            #print("fps",feats_per_section.shape)
            feats_per_section = np.reshape(feats_per_section,(1,1,fps_hops*fps_per_hop))
            feats_per_section= np.squeeze(feats_per_section,axis = 0)
            #print()

    except:
        feats_per_section = np.zeros((1,total_num_of_features))
        print("There was an error whilst computing the features")
    return feats_per_section
    

In [None]:
test_out = complete_extract_features(song,num_of_sections,num_of_hops_per_section,num_of_mfcc,num_of_features,10)

test_out.shape


## Generalisation of feature extraction for all the songs in the fma_small dataset


In [None]:
song_num = 0
tot_num_of_songs = cumulative_sum[-1]
indiv_song_path= []
for genre_path_name,genre_paths in paths_dict.items(): 
    song_num=song_num+1
    indiv_song_path.append(genre_paths)
  

indiv_song_path_list = np.array(indiv_song_path).reshape(tot_num_of_songs,)

In [None]:
indiv_song_path_list


In [None]:
genres = []
for song_num in range(len(indiv_song_path_list)):
    temp = indiv_song_path_list[song_num].split(os.sep)[-2]
    genres.append(temp)
        

In [None]:
le = LabelEncoder()
encoded_genres= le.fit(genres)
encoded_genres

#Label Encoding Mapping
encoder_df= pd.DataFrame(data= {'Genre':genres,
                   'Encoded_Genre':le.transform(genres)})
encoder_df.head()

In [None]:
print("I AM HERE")

In [None]:
%%time

tot_num_of_songs = cumulative_sum[-1]
final_d = np.zeros((tot_num_of_songs,test_out.shape[1]))
print(final_d.shape)

for song_num in range(len(indiv_song_path_list)):
    try:
        indiv_song_path = indiv_song_path_list[song_num]
        song_signal = librosa.load(indiv_song_path,sr=None)[0]
        curr_song_genre = encoder_df['Encoded_Genre'][song_num]
        final_d[song_num]= complete_extract_features(song_signal,num_of_sections,num_of_hops_per_section,num_of_mfcc,num_of_features,curr_song_genre)
                
        print(complete_extract_features(song_signal,num_of_sections,num_of_hops_per_section,num_of_mfcc,num_of_features,curr_song_genre).shape)
        
        
        
    except IOError as exc:
        print("Unable to locate folder")
    


            

In [None]:
final_d.shape

In [None]:
print("I AM HERE")

In [None]:

complete_df = pd.DataFrame(data=final_d.T,index =range(final_d.shape[1]))
complete_df = complete_df.T

complete_df.head()

In [None]:
complete_df.shape

In [None]:
encoder_df.head()

In [None]:
encoder_df.shape

In [None]:
final_df = pd.concat([complete_df,encoder_df],axis =1)
final_df.head()


In [None]:
final_df.shape

In [None]:
final_df.to_csv("FullSongFeatureExtractionFinallyDone.csv",sep=',')