In [1]:
import os
import sys
import glob

from configparser import ConfigParser
import numpy as np
import python_speech_features
%pylab inline

from globalStorage import GlobalStorage
from log import initLog, writeLog
import file_actions
import extract_features

Populating the interactive namespace from numpy and matplotlib


In [2]:
# Reading the config file
config = ConfigParser()
cfile = os.path.join(os.getcwd(), "config.ini")
config.read(cfile)

['/home/aymeric/Documents/Python/MusicLearning/src/config.ini']

In [3]:
# Logs initialization
initLog(config)
writeLog("info", "Program restarted")

# Python version info
sys.path.append(os.path.abspath(os.getcwd()))
python_version = sys.version_info.major
writeLog("debug", "Python version: {}".format(sys.version))

# Instanciationg the global storage
gs = GlobalStorage()

[96m00:49:18 - INFO - Program restarted[0m
[90m00:49:18 - DEBUG - Python version: 3.5.3 (default, May 10 2017, 15:05:55) 
[GCC 6.3.1 20161221 (Red Hat 6.3.1-1)][0m


In [4]:
folders = os.listdir("../data/samples")
print(folders)

['dragonforce', 'rammstein']


In [5]:
for i, f in enumerate(folders):
    samplesMP3 = glob.glob("../data/samples/{}/*.mp3".format(f))
    for s in samplesMP3:
        swav = os.path.splitext(s)[0] + '.wav'
        if not os.path.isfile(swav):
            file_actions.to_wav(s, swav)

In [6]:
data = []
for i, f in enumerate(folders):
    samples = glob.glob("../data/samples/{}/*.wav".format(f))
    for s in samples:
        es = file_actions.extract_sound(s)
        esm = np.array(file_actions.convert_to_mono(es[0])[0])
        data.append({"label": i, "sound": esm, "params": es[1], "file": s})

[90m00:49:20 - TIME - Timer started for "Extracting ../data/samples/dragonforce/01 - The Game.wav"[0m
[90m00:49:23 - TIME - 3.076s for "Extracting ../data/samples/dragonforce/01 - The Game.wav"[0m
[90m00:49:25 - TIME - Timer started for "Extracting ../data/samples/dragonforce/05 - Body Breakdown.wav"[0m
[90m00:49:29 - TIME - 4.351s for "Extracting ../data/samples/dragonforce/05 - Body Breakdown.wav"[0m
[90m00:49:33 - TIME - Timer started for "Extracting ../data/samples/rammstein/03 - Rosenrot.wav"[0m
[90m00:49:36 - TIME - 2.586s for "Extracting ../data/samples/rammstein/03 - Rosenrot.wav"[0m
[90m00:49:38 - TIME - Timer started for "Extracting ../data/samples/rammstein/01 - Reise, Reise.wav"[0m
[90m00:49:41 - TIME - 2.558s for "Extracting ../data/samples/rammstein/01 - Reise, Reise.wav"[0m


In [7]:
print("{} <- {}".format(data[0]["label"], data[0]["file"]))
print(data[0]["sound"][100000:100010])
print(data[0]["params"])
print(extract_features.file_length(data[0]["params"]))

0 <- ../data/samples/dragonforce/01 - The Game.wav
[-7354 -8122 -5408  -700   863 -1156 -4625 -7851 -8808 -7385]
_wave_params(nchannels=2, sampwidth=2, framerate=44100, nframes=13070639, comptype='NONE', compname='not compressed')
296.38637188208617


In [8]:
def extract_features0(sound_obj):
    soundD = sound_obj["sound"]  # raw data
    sr = sound_obj["params"][2]  # samplerate
    nf = sound_obj["params"][3]  # nframes
    fl = nf / sr                 # file length

    test_mfcc = python_speech_features.mfcc(soundD, samplerate=sr, winlen=0.025, winstep=1)
    test_mfcc_avg = np.mean(test_mfcc, axis=0)
    return np.concatenate(([fl], test_mfcc_avg))

In [9]:
extract_features0(data[0])

array([  2.96386372e+02,   2.04661181e+01,  -8.65693108e+00,
        -2.50414870e+01,   1.15075690e+01,  -9.58328060e+00,
         1.28506370e+01,  -7.35448382e-01,   9.55491669e+00,
        -3.05059941e+00,   8.63517943e+00,   1.59142760e-01,
         5.92153353e+00,   1.88925204e+00])

In [10]:
extract_features.extract_all_features(data[0])

array([  2.96386372e+02,   2.04661181e+01,  -8.65693108e+00,
        -2.50414870e+01,   1.15075690e+01,  -9.58328060e+00,
         1.28506370e+01,  -7.35448382e-01,   9.55491669e+00,
        -3.05059941e+00,   8.63517943e+00,   1.59142760e-01,
         5.92153353e+00,   1.88925204e+00])