In [35]:
import librosa
import numpy as np
import pandas as pd
from tqdm import tqdm
from glob import glob
from ast import literal_eval
import matplotlib.pyplot as plt
from scipy.io import wavfile as wav
from pydub import AudioSegment, silence

import warnings
warnings.filterwarnings('ignore')

In [17]:
'''
features db:

file_name
sample_rate
audio_duration
segments or onsets
features: 100

label: 0 or 1
'''

'\nfeatures db:\n\nfile_name\nsample_rate\naudio_duration\nsegments or onsets\nfeatures: 100\n\nlabel: 0 or 1\n'

In [44]:
def get_feature(audio_data, sr):
    mfccs = librosa.feature.mfcc(y = audio_data, sr = sr, n_mfcc = 100)
    mfccs_scaled = np.mean(mfccs.T, axis = 0)
    return mfccs_scaled   


def get_features_df():
    files = glob('../Data/0/*.wav')
    data_array = []    
    data_struct = {
        'file_name': '',
        'sample_rate': 0,
        'audio_duration': 0,
        'segments': 0,
        'features': [],
        'label': 0
    }

    for i, file in enumerate(tqdm(files)):    
        data = data_struct.copy()

        audio_data, sr = librosa.load(file)

        features = get_feature(audio_data, sr)
        duration = round(librosa.get_duration(y = audio_data, sr = sr), 4)

        audio_data = np.abs(audio_data)
        audio_data = audio_data[audio_data > 0.027]
        onset = librosa.onset.onset_detect(y = audio_data, sr = sr, units = 'time')

        data['file_name'] = file.split('/')[-1].split('.wav')[0]
        data['sample_rate'] = sr
        data['audio_duration'] = duration
        data['segments'] = len(onset)
        data['features'] = features.tolist()
        
        data['label'] = 1 if 'spotify' in file else 0

        data_array.append(data)

        if (i % 100 == 0):
            df = pd.DataFrame.from_dict(data_array)
            if (i == 100):
                df.to_csv('features_0.csv', index = False)
            else:
                df.to_csv('features_0.csv', mode = 'a', index = False, header = False)
            data_array = []


In [45]:
get_features_df()

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 91604/91604 [1:22:00<00:00, 18.62it/s]


In [46]:
df = pd.read_csv('features_0.csv', converters = {'features': literal_eval})
df.head()

Unnamed: 0,file_name,sample_rate,audio_duration,segments,features,label
0,seven_e5d2e09d_0,22050,1.0,1,"[-407.48779296875, 26.30790901184082, -40.1006...",0
1,34472100,22050,6.912,12,"[-416.98724365234375, 92.47309112548828, -8.72...",0
2,marvin_ff63ab0b_1,22050,0.9387,1,"[-399.8287048339844, 95.04105377197266, -17.59...",0
3,dog_4abefdf5_0,22050,1.0,1,"[-318.41986083984375, 138.63055419921875, -53....",0
4,33245550,22050,5.112,16,"[-341.4693908691406, 66.61758422851562, -2.174...",0
