In [1]:
import librosa
from librosa import feature
import numpy as np
import os
from getpass import getpass
import urllib
from glob import glob
import pathlib


In [2]:
genre = 'Bollywood_Rap'

#directories of the genre
bhajan_dir =  "Dataset/Bollywood_Rap/"
bhajan_audio_files = glob(bhajan_dir  + '*.mp3')
for i in bhajan_audio_files:
    i = pathlib.Path(i)

In [3]:
bhajan_audio_files

['Dataset/Bollywood_Rap\\Aditi Singh Sharma - High Rated Gabru (Female Version).mp3',
 'Dataset/Bollywood_Rap\\Amaal Mallik - Zero Hour Mashup 2015(Remix By Dj Kiran Kamath).mp3',
 'Dataset/Bollywood_Rap\\Anirudh Ravichander - Pori Pathi Vizhum.mp3',
 'Dataset/Bollywood_Rap\\Badshah - Proper Patola.mp3',
 'Dataset/Bollywood_Rap\\Badshah - She Move It Like.mp3',
 'Dataset/Bollywood_Rap\\Badshah - Tere Naal Nachna.mp3',
 'Dataset/Bollywood_Rap\\Big Dhillon - Stand Up - Spoken Word.mp3',
 'Dataset/Bollywood_Rap\\Bohemia - Aja Ni Aja.mp3',
 'Dataset/Bollywood_Rap\\Bohemia - Beparwah.mp3',
 'Dataset/Bollywood_Rap\\Bohemia - Bijlee.mp3',
 'Dataset/Bollywood_Rap\\Bohemia - Bumpin My Song.mp3',
 'Dataset/Bollywood_Rap\\Bohemia - Cadillac.mp3',
 'Dataset/Bollywood_Rap\\Bohemia - Charso Bees (420).mp3',
 'Dataset/Bollywood_Rap\\Bohemia - Dada.mp3',
 'Dataset/Bollywood_Rap\\Bohemia - Desi Munde.mp3',
 'Dataset/Bollywood_Rap\\Bohemia - Desi Put Javaan.mp3',
 'Dataset/Bollywood_Rap\\Bohemia - Dil A

In [4]:
print(f'Number of audios : {len(bhajan_audio_files)}')
print(type(bhajan_audio_files[0]))

Number of audios : 100
<class 'str'>


In [5]:
# Created list of all the functions
fn_list_i = [
    librosa.onset.onset_strength,              # it is spectral_flux
    feature.chroma_stft,                       # chromagram from a waveform or power spectrogram
    feature.chroma_cqt,
    feature.chroma_cens,
    feature.melspectrogram,
    feature.mfcc,
    feature.spectral_centroid,
    feature.spectral_bandwidth,
    feature.spectral_contrast,
    feature.spectral_rolloff,
    feature.tonnetz
]

fn_list_ii = [
    feature.zero_crossing_rate
]

def get_feature_vector(y,sr): 
   feat_vect_i = [ np.mean(funct(y,sr)) for funct in fn_list_i]
   feat_vect_ii = [ np.mean(funct(y)) for funct in fn_list_ii] 
   feature_vector = feat_vect_i + feat_vect_ii 
   return feature_vector

In [6]:
#build the matrix with genre audios featurized
song_features = []

for file in bhajan_audio_files:
  '''
  y is the time series array of the audio file, a 1D np.ndarray
  sr is the sampling rate, a number
  '''  
  y,sr = librosa.load(file,sr=None)   
  feature_vector = get_feature_vector(y, sr)

  song_features.append([file] + feature_vector)  
  print([file] + feature_vector) 



['Dataset/Bollywood_Rap\\Aditi Singh Sharma - High Rated Gabru (Female Version).mp3', 1.5484107, 0.39956677, 0.4976629, 0.24897384345633433, 6.3680882, -2.0720165, 2534.6724007679686, 2779.914639649255, 21.5178052001565, 5237.502029220779, 0.01413736323265643, 0.05459478997564935]




['Dataset/Bollywood_Rap\\Amaal Mallik - Zero Hour Mashup 2015(Remix By Dj Kiran Kamath).mp3', 1.3217796, 0.42337218, 0.47426674, 0.24148642899448167, 17.11304, 4.1369166, 2695.751490982802, 3554.9718530400078, 20.75156115108796, 6223.854714161454, 0.01481815328905694, 0.04327543262691083]
['Dataset/Bollywood_Rap\\Anirudh Ravichander - Pori Pathi Vizhum.mp3', 1.4464595, 0.41080356, 0.4909693, 0.2499724965614832, 7.1243334, 0.28149822, 3440.8985030671797, 3692.01587697742, 20.331870528591306, 7711.693718085405, 0.010028612807775245, 0.0683973776987122]
['Dataset/Bollywood_Rap\\Badshah - Proper Patola.mp3', 1.5705292, 0.42516026, 0.48469463, 0.2522734135712138, 11.305251, -1.3147242, 3283.321828054424, 3618.5405023732037, 20.807283694895645, 7386.792014760269, -0.014556251154864258, 0.06668953825658763]
['Dataset/Bollywood_Rap\\Badshah - She Move It Like.mp3', 1.5868278, 0.4834765, 0.36520645, 0.22326990593802737, 13.471093, 0.7651221, 2851.5253860500316, 3128.245555149033, 21.44708776346

In [7]:
print(len(song_features))

100


In [8]:
columns = ["Song_Name", "onset_strength", "chroma_stft", "chroma_cqt", "chroma_cens", "melspectrogram", "mfcc",	
           "spectral_centroid" ,"spectral_bandwidth", "spectral_contrast", "spectral_rolloff",	"tonnetz", "zero_crossing_rate"]

In [9]:
import pandas as pd
df = pd.DataFrame(song_features, columns=columns)

In [10]:
file_name = genre + '_featres.csv'
df.to_csv(file_name)   