In [1]:
import librosa
from librosa import feature
import numpy as np
import os
from getpass import getpass
import urllib
from glob import glob
import pathlib

In [2]:
genre = 'Bollywood_Romantic'

#directories of the genre
romantic_dir =  "Dataset/Bollywood_Romantic/"
romantic_audio_files = glob(romantic_dir  + '*.mp3')
for i in romantic_audio_files:
    i = pathlib.Path(i)

In [3]:
romantic_audio_files

['Dataset/Bollywood_Romantic\\Abhijeet Srivastava - Chashni (From Bharat) (1).mp3',
 'Dataset/Bollywood_Romantic\\Akhil - Duniyaa (1).mp3',
 'Dataset/Bollywood_Romantic\\Akhil - Duniyaa (From Luka Chuppi).mp3',
 'Dataset/Bollywood_Romantic\\Akhil Sachdeva - Tera Ban Jaunga (From Kabir Singh).mp3',
 'Dataset/Bollywood_Romantic\\Alka Yagnik - Agar Tum Saath Ho (From Tamasha).mp3',
 'Dataset/Bollywood_Romantic\\Alka Yagnik - Agar Tum Saath Ho.mp3',
 'Dataset/Bollywood_Romantic\\Amaal Mallik - Chale Aana (From De De Pyaar De).mp3',
 'Dataset/Bollywood_Romantic\\Amaal Mallik - Soch Na Sake.mp3',
 'Dataset/Bollywood_Romantic\\Ami Mishra - Hasi - Female Version.mp3',
 'Dataset/Bollywood_Romantic\\AR Rahman - Enna Sona (From OK Jaanu).mp3',
 'Dataset/Bollywood_Romantic\\AR Rahman - Enna Sona.mp3',
 'Dataset/Bollywood_Romantic\\Arijit Singh - Gerua.mp3',
 'Dataset/Bollywood_Romantic\\Arijit Singh - Ghungroo (From War).mp3',
 'Dataset/Bollywood_Romantic\\Arijit Singh - Itni Si Baat Hain (From Az

In [4]:
print(f'Number of audios : {len(romantic_audio_files)}')
print(type(romantic_audio_files[0]))

Number of audios : 81
<class 'str'>


In [5]:
# Created list of all the functions
fn_list_i = [
    librosa.onset.onset_strength,              # it is spectral_flux
    feature.chroma_stft,                       # chromagram from a waveform or power spectrogram
    feature.chroma_cqt,
    feature.chroma_cens,
    feature.melspectrogram,
    feature.mfcc,
    feature.spectral_centroid,
    feature.spectral_bandwidth,
    feature.spectral_contrast,
    feature.spectral_rolloff,
    feature.tonnetz
]

fn_list_ii = [
    feature.zero_crossing_rate
]

def get_feature_vector(y,sr): 
   feat_vect_i = [ np.mean(funct(y,sr)) for funct in fn_list_i]
   feat_vect_ii = [ np.mean(funct(y)) for funct in fn_list_ii] 
   feature_vector = feat_vect_i + feat_vect_ii 
   return feature_vector

In [6]:
#build the matrix with genre audios featurized
song_features = []

for file in romantic_audio_files:
  '''
  y is the time series array of the audio file, a 1D np.ndarray
  sr is the sampling rate, a number
  '''  
  y,sr = librosa.load(file,sr=None)   
  feature_vector = get_feature_vector(y, sr)

  song_features.append([file] + feature_vector)  
  print([file] + feature_vector) 



['Dataset/Bollywood_Romantic\\Abhijeet Srivastava - Chashni (From Bharat) (1).mp3', 1.328582, 0.39661926, 0.4364781, 0.23906492858310646, 10.287682, -0.61434036, 2528.87445705972, 3245.8516003672194, 21.11228467468489, 5739.7231622046065, -0.038839543790200604, 0.04366493053932658]




['Dataset/Bollywood_Romantic\\Akhil - Duniyaa (1).mp3', 1.2556055, 0.39608938, 0.38324824, 0.22445143736648093, 18.535606, 3.4081435, 1931.5510892987627, 2735.88417505049, 20.95783666362502, 4043.5012419200384, 0.02809911544842699, 0.033159615715525496]
['Dataset/Bollywood_Romantic\\Akhil - Duniyaa (From Luka Chuppi).mp3', 1.2556055, 0.39608938, 0.38324824, 0.22445143736648093, 18.535606, 3.4081435, 1931.5510892987627, 2735.88417505049, 20.95783666362502, 4043.5012419200384, 0.02809911544842699, 0.033159615715525496]
['Dataset/Bollywood_Romantic\\Akhil Sachdeva - Tera Ban Jaunga (From Kabir Singh).mp3', 1.2426344, 0.37417236, 0.40534297, 0.22582398293627806, 9.814268, 0.0002744827, 2391.425242339641, 2961.8908211351877, 21.250127319790618, 5223.653171944572, -0.010489923101838832, 0.04519294820362241]
['Dataset/Bollywood_Romantic\\Alka Yagnik - Agar Tum Saath Ho (From Tamasha).mp3', 1.2199808, 0.34546527, 0.3396007, 0.19882553770182307, 7.8190928, -1.5233234, 1191.5122083511524, 1582.9

In [7]:
print(len(song_features))

81


In [8]:
columns = ["Song_Name", "onset_strength", "chroma_stft", "chroma_cqt", "chroma_cens", "melspectrogram", "mfcc",	
           "spectral_centroid" ,"spectral_bandwidth", "spectral_contrast", "spectral_rolloff",	"tonnetz", "zero_crossing_rate"]

In [9]:
import pandas as pd
df = pd.DataFrame(song_features, columns=columns)

In [10]:
file_name = genre + '_features.csv'
df.to_csv(file_name)  