In [1]:
import librosa
from librosa import feature
import numpy as np
import os
from getpass import getpass
import urllib
from glob import glob
import pathlib

In [2]:
genre = 'sufi'

#directories of the genre
bhajan_dir =  "Dataset/sufi/"
bhajan_audio_files = glob(bhajan_dir  + '*.mp3')
for i in bhajan_audio_files:
    i = pathlib.Path(i)

In [3]:
bhajan_audio_files

['Dataset/sufi\\Abida Parveen - Aaqa-Coke Studio Season 9.mp3',
 'Dataset/sufi\\Abida Parveen - Chaap Tilak.mp3',
 'Dataset/sufi\\Abida Parveen - Dost.mp3',
 'Dataset/sufi\\Adnan Sami - Bhar Do Jholi Meri.mp3',
 'Dataset/sufi\\Ahmed Jehanzeb - Khakhi Banda.mp3',
 'Dataset/sufi\\Akhil Sachdeva - Humsafar.mp3',
 'Dataset/sufi\\Ali Sethi - Ranjish Hi Sahi.mp3',
 'Dataset/sufi\\Amit Trivedi - Iktara.mp3',
 'Dataset/sufi\\Amit Trivedi - Namo Namo.mp3',
 'Dataset/sufi\\AR Rahman - Gurus of Peace (From Vande Mataram).mp3',
 'Dataset/sufi\\AR Rahman - Kun Faya Kun.mp3',
 'Dataset/sufi\\AR Rahman - Noor-Un-Ala-Noor (From Meenaxi).mp3',
 'Dataset/sufi\\AR Rahman - Piya Milenge.mp3',
 'Dataset/sufi\\Arijit Singh - Aayat (From Bajirao Mastani).mp3',
 'Dataset/sufi\\Arijit Singh - Tujhe Kitna Chahne Lage (From Kabir Singh) (1).mp3',
 'Dataset/sufi\\Arijit Singh - Tujhe Kitna Chahne Lage (From Kabir Singh).mp3',
 'Dataset/sufi\\Atif Aslam - Jeena Jeena (From Badlapur).mp3',
 'Dataset/sufi\\Atif Asla

In [4]:
print(f'Number of audios : {len(bhajan_audio_files)}')
print(type(bhajan_audio_files[0]))

Number of audios : 74
<class 'str'>


In [5]:
# Created list of all the functions
fn_list_i = [
    librosa.onset.onset_strength,              # it is spectral_flux
    feature.chroma_stft,                       # chromagram from a waveform or power spectrogram
    feature.chroma_cqt,
    feature.chroma_cens,
    feature.melspectrogram,
    feature.mfcc,
    feature.spectral_centroid,
    feature.spectral_bandwidth,
    feature.spectral_contrast,
    feature.spectral_rolloff,
    feature.tonnetz
]

fn_list_ii = [
    feature.zero_crossing_rate
]

def get_feature_vector(y,sr): 
   feat_vect_i = [ np.mean(funct(y,sr)) for funct in fn_list_i]
   feat_vect_ii = [ np.mean(funct(y)) for funct in fn_list_ii] 
   feature_vector = feat_vect_i + feat_vect_ii 
   return feature_vector

In [6]:
#build the matrix with genre audios featurized
song_features = []

for file in bhajan_audio_files:
  '''
  y is the time series array of the audio file, a 1D np.ndarray
  sr is the sampling rate, a number
  '''  
  y,sr = librosa.load(file,sr=None)   
  feature_vector = get_feature_vector(y, sr)

  song_features.append([file] + feature_vector)  
  print([file] + feature_vector) 



['Dataset/sufi\\Abida Parveen - Aaqa-Coke Studio Season 9.mp3', 1.2510421, 0.39058506, 0.36450055, 0.21776501500431578, 9.428078, 1.3463618, 2620.5446624079373, 3288.9868533959248, 21.73523314698697, 6072.371896719053, 0.05862075894377129, 0.04516554174362908]




['Dataset/sufi\\Abida Parveen - Chaap Tilak.mp3', 1.2558157, 0.36084422, 0.42658275, 0.23216031578806867, 4.0587716, -1.8007448, 2603.2952421724076, 2932.5044964543604, 21.129470279046238, 5540.174223868265, -0.018676947698405073, 0.05451958097247634]
['Dataset/sufi\\Abida Parveen - Dost.mp3', 1.229554, 0.39253807, 0.3935106, 0.22512821803968008, 4.3546534, -0.466711, 2412.372440261821, 3087.5195620427785, 20.98069461174785, 5366.2392116809415, 0.004400688263564305, 0.03903210910406048]
['Dataset/sufi\\Adnan Sami - Bhar Do Jholi Meri.mp3', 1.3982103, 0.40784556, 0.47228503, 0.2515126053034044, 11.281993, 3.999861, 2905.0594032847766, 3775.0121644433802, 20.32598186374934, 6771.360094326529, -0.009060952606897725, 0.04793693247939443]
['Dataset/sufi\\Ahmed Jehanzeb - Khakhi Banda.mp3', 1.1845939, 0.4027659, 0.39722958, 0.2275109803202581, 12.295783, 2.7449985, 2576.2124978306565, 3215.1168703109056, 21.134562024273553, 5919.319149098842, 0.021494509824403533, 0.04658962505479326]
['Data

In [7]:
print(len(song_features))

74


In [8]:
columns = ["Song_Name", "onset_strength", "chroma_stft", "chroma_cqt", "chroma_cens", "melspectrogram", "mfcc",	
           "spectral_centroid" ,"spectral_bandwidth", "spectral_contrast", "spectral_rolloff",	"tonnetz", "zero_crossing_rate"]

In [9]:
import pandas as pd
df = pd.DataFrame(song_features, columns=columns)

In [10]:
file_name = genre + '_features.csv'
df.to_csv(file_name)  