In [1]:
import librosa
from librosa import feature
import numpy as np
import os
from getpass import getpass
import urllib
from glob import glob
import pathlib


In [14]:
genre = 'bhajan'

#directories of the genre
bhajan_dir =  "Dataset/bhajan/"
bhajan_audio_files = glob(bhajan_dir  + '*.mp3')
for i in bhajan_audio_files:
    i = pathlib.Path(i)

In [15]:
bhajan_audio_files

['Dataset/bhajan\\Anup Jalota - Ae Malik Tere Bande Hum.mp3',
 'Dataset/bhajan\\Anup Jalota - Aisi Laagi Lagan.mp3',
 'Dataset/bhajan\\Anup Jalota - Bol Pinjare Ka Tota Ram.mp3',
 'Dataset/bhajan\\Anup Jalota - Hridaya Banalo Bhakto.mp3',
 'Dataset/bhajan\\Anup Jalota - Kanha Teri Murli Ki (1).mp3',
 'Dataset/bhajan\\Anup Jalota - Kanha Teri Murli Ki.mp3',
 'Dataset/bhajan\\Anup Jalota - Payoji Maine Ram Ratan Dhan.mp3',
 'Dataset/bhajan\\Anup Jalota - Sukhkarta Dukhharta (Siddhivinayakji Ki Aarti).mp3',
 'Dataset/bhajan\\Anup Jalota - Thumak Chalat Ramchandra.mp3',
 'Dataset/bhajan\\Anuradha Paudwal - Naam Hai Tera Taranhaar.mp3',
 'Dataset/bhajan\\AR Rahman - O Paalanhaare.mp3',
 'Dataset/bhajan\\Asha Bhosle - Sancha Naam Tera - From Julie - Soundtrack Version.mp3',
 'Dataset/bhajan\\Bhaiya Krishan Das Ji - Mera Aapki Kripa Se.mp3',
 'Dataset/bhajan\\Bhakti Marga - Sriman Narayana.mp3',
 'Dataset/bhajan\\Bhimsen Joshi - Chalo Ri Murali Suniye.mp3',
 'Dataset/bhajan\\Bhimsen Joshi - J

In [16]:
print(f'Number of audios : {len(bhajan_audio_files)}')
print(type(bhajan_audio_files[0]))

Number of audios : 48
<class 'str'>


In [17]:
# Created list of all the functions
fn_list_i = [
    librosa.onset.onset_strength,              # it is spectral_flux
    feature.chroma_stft,                       # chromagram from a waveform or power spectrogram
    feature.chroma_cqt,
    feature.chroma_cens,
    feature.melspectrogram,
    feature.mfcc,
    feature.spectral_centroid,
    feature.spectral_bandwidth,
    feature.spectral_contrast,
    feature.spectral_rolloff,
    feature.tonnetz
]

fn_list_ii = [
    feature.zero_crossing_rate
]

def get_feature_vector(y,sr): 
   feat_vect_i = [ np.mean(funct(y,sr)) for funct in fn_list_i]
   feat_vect_ii = [ np.mean(funct(y)) for funct in fn_list_ii] 
   feature_vector = feat_vect_i + feat_vect_ii 
   return feature_vector

In [18]:
#build the matrix with genre audios featurized
song_features = []

for file in bhajan_audio_files:
  '''
  y is the time series array of the audio file, a 1D np.ndarray
  sr is the sampling rate, a number
  '''  
  y,sr = librosa.load(file,sr=None)   
  feature_vector = get_feature_vector(y, sr)

  song_features.append([file] + feature_vector)  
  print([file] + feature_vector) 



['Dataset/bhajan\\Anup Jalota - Ae Malik Tere Bande Hum.mp3', 1.328786, 0.36408594, 0.45517012, 0.23918957199146645, 10.937271, 2.3901787, 2900.150562788673, 3756.721747295903, 20.75060743037618, 6292.573851203501, 0.006288998593018963, 0.053945667553862986]




['Dataset/bhajan\\Anup Jalota - Aisi Laagi Lagan.mp3', 1.3471452, 0.37439203, 0.39704597, 0.23075534606134449, 6.2366743, -3.730755, 2094.4617763331703, 2174.652093081218, 22.10539835890742, 3856.137309388714, -0.021664007336277536, 0.05756259973846358]
['Dataset/bhajan\\Anup Jalota - Bol Pinjare Ka Tota Ram.mp3', 1.4377344, 0.3496862, 0.44744575, 0.24345947542366536, 3.6874547, -1.8327248, 2987.7502328778423, 3087.7459552653745, 21.459267504963936, 6066.588346387486, 0.012001312487423338, 0.07345205467307137]
['Dataset/bhajan\\Anup Jalota - Hridaya Banalo Bhakto.mp3', 1.4331586, 0.3336786, 0.43622375, 0.24144135346121737, 0.29643512, -8.927594, 2153.824651787026, 2156.3364646792256, 20.858720505358388, 4412.498834458796, -0.0031947510627920576, 0.05977361125765576]
['Dataset/bhajan\\Anup Jalota - Kanha Teri Murli Ki (1).mp3', 1.468072, 0.32697466, 0.43036333, 0.24439189755484125, 0.41431373, -8.681396, 2073.7523316688003, 2093.348514778965, 20.457710268553654, 4240.431099108104, 0.024

In [19]:
columns = ["Song_Name", "onset_strength", "chroma_stft", "chroma_cqt", "chroma_cens", "melspectrogram", "mfcc",	
           "spectral_centroid" ,"spectral_bandwidth", "spectral_contrast", "spectral_rolloff",	"tonnetz", "zero_crossing_rate"]

In [20]:
import pandas as pd
df = pd.DataFrame(song_features, columns=columns)

In [21]:
file_name = genre + '_featres.csv'
df.to_csv(file_name)   