# Подготовка датасета для определения жанра музыки

1. Используемый датасет: GTZAN dataset.  
http://marsyasweb.appspot.com/download/data_sets/
2. Используемый алгоритм для экстракции фич: mfcc - более подробно в комментариях  
Библиотека: python_speech_features 

In [64]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

from copy import copy

import python_speech_features as psf
import sunau
import librosa as lb

In [65]:
OSTYPE = !uname
    
if OSTYPE == ['Linux']:
    tmp_file_names = !find ./ -type f | sort
    file_names = list()
    for file_name in tmp_file_names:
        if not (file_name.find('.wav') == -1):
            file_names.append(file_name) 
            
else:
    OSTYPE = ['Windows']
    tmp_file_names = !dir "genres" /B /S
    curr_dir = !echo %cd%
    curr_dir = curr_dir[0]+"\\" #Чтобы удалить текущую директорию

    file_names = list()
    for file_name in tmp_file_names:
        if not (file_name.find('.wav') == -1):
            file_name = file_name.replace(curr_dir, '')
            file_names.append(file_name)

In [66]:
columns_audio_file = ['file_name', 'samplerate', 'duration', 'audio_data', 'mean', 'std', 'max', 'min', 'diff_mean', 'diff_std'] #Информация о файле и его содержимом в pd.DataFrame

In [67]:
audio_files = pd.DataFrame(columns=columns_audio_file)
for k, file_name in enumerate(file_names):
    audio_file = pd.Series(index=columns_audio_file)
    if OSTYPE == ['Linux']:
        audio_file.file_name = file_name.split('/')[1] 
    elif OSTYPE == ['Windows']:
        audio_file.file_name = file_name.split('\\')[0]
    
    audio_data_librosa, audio_file.samplerate = lb.load(file_name)
    audio_file.duration = len(audio_data_librosa)/audio_file.samplerate
    audio_file.audio_data = np.array(audio_data_librosa)
    audio_files = audio_files.append(audio_file, ignore_index=True)
    
audio_files

Unnamed: 0,file_name,samplerate,duration,audio_data,mean,std,max,min,diff_mean,diff_std
0,Metallica_master_of_puppets.wav,22050,513,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",,,,,,
1,Paul_Desmond_Take_Five_novteh_co_.wav,22050,410,"[-0.139969, -0.12851, -0.132996, -0.136215, -0...",,,,,,
2,petr-chaykovskiy-lebedinoe-ozero.wav,22050,322,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...",,,,,,


In [68]:
#mfcc делает преобразования с фрагментами длительностью winlen=0.025 секунды, промежуток между началами winstep=0.01
#получаем по numcep кепстральных коэффициентов на каждый фрагмент
#mfcc = psf.mfcc(audio_file.audio_data, samplerate=audio_file.samplerate, nfft=1024)
numcep=30
winlen=0.025
winstep=0.01

#Какие статистики будут вычеслены по результату mfcc (list из tuple: название+функция)
#Еще берутся статистики разностей в туториале (я так понимаю, что между фичами?)
#http://mirlab.org/jang/books/audioSignalProcessing/appNote/musicGenreClassification/html/goTutorial.html
#http://mirlab.org/jang/books/audioSignalProcessing/appNote/musicGenreClassification/mgcFeaExtract.m
statistics_index = ['name_statistic', 'func_statistic']
statistics = [pd.Series(data=['mean', lambda x: x.mean(axis=0)], index=statistics_index),
              pd.Series(data=['std', lambda x: x.std(axis=0)], index=statistics_index),
              pd.Series(data=['max', lambda x: x.max(axis=0)], index=statistics_index),
              pd.Series(data=['min', lambda x: x.min(axis=0)], index=statistics_index),
              pd.Series(data=['diff_mean', lambda x: np.diff(x, axis=1).mean(axis=0)], index=statistics_index),
              pd.Series(data=['diff_std',  lambda x: np.diff(x, axis=1).std(axis=0)], index=statistics_index)]

for statistic in statistics:
    audio_files.loc[:, statistic.name_statistic] = None

for i in xrange(audio_files.shape[0]):
    audio_file = audio_files.loc[i,:]
    mfcc = psf.mfcc(audio_file.audio_data, samplerate=audio_file.samplerate, numcep=numcep,
                    winlen=winlen, winstep=winstep, nfft=1024)
    for statistic in statistics:
        audio_file[statistic.name_statistic] = statistic.func_statistic(mfcc)
    audio_files.loc[i,:] = audio_file
    print audio_file.name, "processed!"

0 processed!
1 processed!
2 processed!


In [69]:
audio_files

Unnamed: 0,file_name,samplerate,duration,audio_data,mean,std,max,min,diff_mean,diff_std
0,Metallica_master_of_puppets.wav,22050,513,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[0.19211534152, -15.5770058252, -9.02888182216...","[1.72619580071, 9.32386595214, 10.0467098344, ...","[3.32784486129, 25.6733408542, 24.96020196, 57...","[-18.012364855, -46.4257489847, -48.4074432776...","[-15.7691211667, 6.54812400302, 29.9941062031,...","[10.0401355916, 17.2010422052, 17.9523037378, ..."
1,Paul_Desmond_Take_Five_novteh_co_.wav,22050,410,"[-0.139969, -0.12851, -0.132996, -0.136215, -0...","[-2.53881985632, -9.16718750835, 4.6666153982,...","[1.48130161415, 6.43685770581, 10.5298955711, ...","[2.31239826207, 14.256795113, 35.1147331999, 2...","[-6.13936367637, -30.3491227813, -34.74752246,...","[-6.62836765203, 13.8338029066, -5.29885193829...","[7.19261267466, 11.7577404128, 10.0575377457, ..."
2,petr-chaykovskiy-lebedinoe-ozero.wav,22050,322,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...","[-1.80808407201, 11.679711349, -15.8014731239,...","[2.23405948687, 10.5907896812, 10.1531653071, ...","[2.42248403214, 38.456991645, 23.0232171333, 2...","[-36.0436533891, -37.0053033183, -51.541852490...","[13.487795421, -27.4811844728, 15.7817322939, ...","[11.9357161773, 10.8184873307, 11.7457364616, ..."


In [70]:
#Сохраним датасет
audio_files_save = audio_files.drop(axis=1, labels='audio_data')
audio_files_save.to_csv('dataset_new.csv', sep=",")