In [1]:
import librosa
from librosa import feature
import numpy as np
import os
from getpass import getpass
import urllib
from glob import glob
import pathlib


In [2]:
genre = 'bhojpuri'

#directories of the genre
bhajan_dir =  "Dataset/bhojpuri/"
bhajan_audio_files = glob(bhajan_dir  + '*.mp3')
for i in bhajan_audio_files:
    i = pathlib.Path(i)

In [3]:
bhajan_audio_files

['Dataset/bhojpuri\\Aditya - Suna Suna Sanam.mp3',
 'Dataset/bhojpuri\\Ajay Anand - Hamar Machhari.mp3',
 'Dataset/bhojpuri\\Ajay Nishad - Bhatar Bina Kuchh Na.mp3',
 'Dataset/bhojpuri\\Ankit Tiwari - Aj Fulawna Fat Jai.mp3',
 'Dataset/bhojpuri\\Ankur Singh - Joban Duno.mp3',
 'Dataset/bhojpuri\\Ankur Singh - Shikar Ho Jaibu.mp3',
 'Dataset/bhojpuri\\Ankur Singh - Tohra Pe Raji Na.mp3',
 'Dataset/bhojpuri\\Annu - Balam Ji Dihale.mp3',
 'Dataset/bhojpuri\\Annu - Tohar Jhol Piya.mp3',
 'Dataset/bhojpuri\\Arun Babua - Lad Gail Najariya.mp3',
 'Dataset/bhojpuri\\Arun Babuwa - Gail Rahi Dekhe.mp3',
 'Dataset/bhojpuri\\Ashok Sonkar - Dewareu.mp3',
 'Dataset/bhojpuri\\Ashok Sonkar - Yadav Ji Se Pahile.mp3',
 'Dataset/bhojpuri\\D K Dasrath - Tang Le Gail Gonda Ke Pardhan.mp3',
 'Dataset/bhojpuri\\Khesari Lal Yadav - Bhatijwa Ke Mausi Jindabad.mp3',
 'Dataset/bhojpuri\\Khesari Lal Yadav - Daal De Kewadi Mein Killi.mp3',
 'Dataset/bhojpuri\\Khesari Lal Yadav - Laga Ke Fair Lovely.mp3',
 'Dataset

In [4]:
print(f'Number of audios : {len(bhajan_audio_files)}')
print(type(bhajan_audio_files[0]))

Number of audios : 75
<class 'str'>


In [5]:
# Created list of all the functions
fn_list_i = [
    librosa.onset.onset_strength,              # it is spectral_flux
    feature.chroma_stft,                       # chromagram from a waveform or power spectrogram
    feature.chroma_cqt,
    feature.chroma_cens,
    feature.melspectrogram,
    feature.mfcc,
    feature.spectral_centroid,
    feature.spectral_bandwidth,
    feature.spectral_contrast,
    feature.spectral_rolloff,
    feature.tonnetz
]

fn_list_ii = [
    feature.zero_crossing_rate
]

def get_feature_vector(y,sr): 
   feat_vect_i = [ np.mean(funct(y,sr)) for funct in fn_list_i]
   feat_vect_ii = [ np.mean(funct(y)) for funct in fn_list_ii] 
   feature_vector = feat_vect_i + feat_vect_ii 
   return feature_vector

In [6]:
#build the matrix with genre audios featurized
song_features = []

for file in bhajan_audio_files:
  '''
  y is the time series array of the audio file, a 1D np.ndarray
  sr is the sampling rate, a number
  '''  
  y,sr = librosa.load(file,sr=None)   
  feature_vector = get_feature_vector(y, sr)

  song_features.append([file] + feature_vector)  
  print([file] + feature_vector) 



['Dataset/bhojpuri\\Aditya - Suna Suna Sanam.mp3', 1.3608786, 0.36196494, 0.43441865, 0.24370933454429244, 6.219118, 0.36166927, 2319.4416607663115, 2010.4998261669568, 21.210967452805207, 4294.983469508086, 0.004725245019669014, 0.06644239399953672]




['Dataset/bhojpuri\\Ajay Anand - Hamar Machhari.mp3', 1.7646353, 0.3403681, 0.47006133, 0.25447786311831666, 12.16907, -0.0039419127, 2248.833614576386, 2515.081517563231, 22.31297560756813, 4188.137730280228, 0.004537903017977732, 0.049672114199532956]
['Dataset/bhojpuri\\Ajay Nishad - Bhatar Bina Kuchh Na.mp3', 1.5738455, 0.3369453, 0.5290444, 0.2562305692742866, 17.335182, 0.5208714, 3640.167010327928, 3801.143378693096, 19.947537389663346, 7873.438551480484, -0.02405863938501017, 0.07816761529079097]
['Dataset/bhojpuri\\Ankit Tiwari - Aj Fulawna Fat Jai.mp3', 1.5630691, 0.449603, 0.6026338, 0.27028426180007936, 23.569584, 6.6674047, 3055.0640410404317, 3685.290629525751, 18.91408954743999, 6546.772417069799, 0.02119260128015205, 0.05976318576303189]
['Dataset/bhojpuri\\Ankur Singh - Joban Duno.mp3', 1.697463, 0.42615145, 0.5948106, 0.26783998026011485, 19.28844, 2.7240787, 3951.0677335467267, 3599.2639887179225, 20.261552181815926, 7834.1790894011565, -0.0032632310415615876, 0.0962

In [7]:
columns = ["Song_Name", "onset_strength", "chroma_stft", "chroma_cqt", "chroma_cens", "melspectrogram", "mfcc",	
           "spectral_centroid" ,"spectral_bandwidth", "spectral_contrast", "spectral_rolloff",	"tonnetz", "zero_crossing_rate"]

In [8]:
import pandas as pd
df = pd.DataFrame(song_features, columns=columns)

In [9]:
file_name = genre + '_featres.csv'
df.to_csv(file_name)   