In [8]:
import numpy as np
import pandas as pd
from pydub import AudioSegment
import librosa
import os
import fleep

  "class": algorithms.Blowfish,


In [2]:
def split_audio(audio, part_length):
    audio_length = len(audio)
    part_count = audio_length // part_length
    extra = audio_length - part_count * part_length
    start = extra // 2
    for i in range(part_count):
        yield audio[start: start + part_length], i
        start += part_length

In [3]:
def convert_files(files_path, final_path, part_length):
    for file_name in os.listdir(files_path):
        file_path = files_path + os.sep + file_name
        with open(file_path, 'rb') as file:
            info = fleep.get(file.read(128))
        if len(info.extension) == 0:
            continue
        audio = AudioSegment.from_file(file_path, format=info.extension[0])
        for part, i in split_audio(audio, part_length):
            part_name = '.'.join(file_name.split('.')[:-1]) + '_' + str(i) + '.wav'
            part_path = final_path + os.sep + part_name
            part.export(part_path, format='wav')

In [4]:
def prepare_files(data_path, final_path, categories, part_length):
    os.makedirs(final_path, exist_ok=True)
    for i in range(len(categories)):
        category_path = data_path + os.sep + 'D_' + str(i)
        final_category_path = final_path + os.sep + categories[i]
        os.makedirs(final_category_path, exist_ok=True)
        for category_dir in os.listdir(category_path):
            instrument_path = category_path + os.sep + category_dir
            convert_files(instrument_path, final_category_path, part_length)

In [5]:
categories = ['D_0', 'D_1', 'D_2', 'D_3', 'D_4', 'D_5', 'D_6']
prepare_files('./Data', './ConvertedData', categories, 30000)

In [34]:
def extract_features(data, sample_rate):
    features = []
    # MFCC
    mfcc = librosa.feature.mfcc(y=data)
    features += mfcc.mean(axis=1).tolist()
    features += mfcc.var(axis=1).tolist()

    # Zero-Crossing rate
    zcr = librosa.feature.zero_crossing_rate(y=data)
    features.append(np.mean(zcr))

    # Spectral roll off
    spectral_rolloffs = librosa.feature.spectral_rolloff(y=data, sr=sample_rate)
    features.append(np.mean(spectral_rolloffs))
    features.append(np.var(spectral_rolloffs))

    # Chroma
    chroma = librosa.feature.chroma_stft(y=data, sr=sample_rate)
    features += chroma.mean(axis=1).tolist()
    features += chroma.var(axis=1).tolist()

    # Spectral Centroid
    spectral_centroids = librosa.feature.spectral_centroid(y=data, sr=sample_rate)
    features.append(spectral_centroids.mean())
    features.append(spectral_centroids.var())

    return features

In [33]:
def prepare_csv(data_path, final_path, categories):
    write_threshold = 1000
    file_count = 0
    csv_count = 0
    data_mat = []
    os.makedirs(final_path, exist_ok=True)
    for category in categories:
        category_path = data_path + os.sep + category
        for file_name in os.listdir(category_path):
            file_path = category_path + os.sep + file_name
            data, sample_rate = librosa.load(file_path)
            new_row = [category]
            new_row += extract_features(data, sample_rate)
            data_mat.append(new_row)
            file_count += 1
            if file_count == write_threshold:
                csv_path = final_path + os.sep + 'data_' + str(csv_count) +'.csv'
                df = pd.DataFrame(data_mat)
                df.to_csv(csv_path)
                csv_count += 1
                file_count = 0

In [None]:
prepare_csv('./ConvertedData', './CSVFiles', categories)