In [48]:
import numpy as np
import matplotlib.pyplot as plt
import os

import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
%matplotlib inline

from tqdm import tqdm


DATADIR = "AudioDataM"

CATEGORIES = ["anger", "disgust", "fear", "neutral", "happy","sad","surprise"]

In [123]:
x1=np.random.randint([10])
x2=np.random.randint([10])
x3=np.random.randint([10])
x4=np.vstack([x1,x2,x3])
x4.shape
np.hstack([mfccs,chroma,mel,contrast])

(3, 1)

In [137]:
import numpy
PARAM_TYPE = numpy.float32
def compute_delta(features,
                  win=3,
                  method='filter',
                  filt=numpy.array([.25, .5, .25, 0, -.25, -.5, -.25])):
    """features is a 2D-ndarray  each row of features is a a frame
    
    :param features: the feature frames to compute the delta coefficients
    :param win: parameter that set the length of the computation window.
            The size of the window is (win x 2) + 1
    :param method: method used to compute the delta coefficients
        can be diff or filter
    :param filt: definition of the filter to use in "filter" mode, default one
        is similar to SPRO4:  filt=numpy.array([.2, .1, 0, -.1, -.2])
        
    :return: the delta coefficients computed on the original features.
    """
    # First and last features are appended to the begining and the end of the 
    # stream to avoid border effect
    x = numpy.zeros((features.shape[0] + 2 * win, features.shape[1]), dtype=PARAM_TYPE)
    x[:win, :] = features[0, :]
    x[win:-win, :] = features
    x[-win:, :] = features[-1, :]
    
    delta = numpy.zeros(x.shape, dtype=PARAM_TYPE)

    if method == 'diff':
        filt = numpy.zeros(2 * win + 1, dtype=PARAM_TYPE)
        filt[0] = -1
        filt[-1] = 1

    for i in range(features.shape[1]):
        delta[:, i] = numpy.convolve(features[:, i], filt)

    return delta[win:-win, :]

def shifted_delta_cepstral(cep, d=1, p=3, k=7):
    """
    Compute the Shifted-Delta-Cepstral features for language identification
    
    :param cep: matrix of feature, 1 vector per line
    :param d: represents the time advance and delay for the delta computation
    :param k: number of delta-cepstral blocks whose delta-cepstral 
       coefficients are stacked to form the final feature vector
    :param p: time shift between consecutive blocks.
    
    return: cepstral coefficient concatenated with shifted deltas
    """

    y = numpy.r_[numpy.resize(cep[0, :], (d, cep.shape[1])),
                 cep,
                 numpy.resize(cep[-1, :], (k * 3 + d, cep.shape[1]))]

    delta = compute_delta(y, win=d, method='diff')
    sdc = numpy.empty((cep.shape[0], cep.shape[1] * k))

    idx = numpy.zeros(delta.shape[0], dtype='bool')
    for ii in range(k):
        idx[d + ii * p] = True
    for ff in range(len(cep)):
        sdc[ff, :] = delta[idx, :].reshape(1, -1)
        idx = numpy.roll(idx, 1)
    return numpy.hstack((cep, sdc))

In [124]:
training_data = []

def create_training_data():
    for category in CATEGORIES:  # 

        xx=os.path.join(DATADIR,category)
        class_num = CATEGORIES.index(category)  # get the classification  
        yy= next(os.walk('AudioDataM/anger/'))[1]
        for arr in yy:
            zz=os.path.join(xx,arr)
            for img in os.listdir(zz):
                #print(os.path.join(zz,img))
                try:
                    X, sample_rate = librosa.load(os.path.join(zz,img))  # convert to array
                    stft = np.abs(librosa.stft(X))
                    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
                    mfcc=librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T
                    sdc=np.mean(shifted_delta_cepstral(mfcc),axis=0)
                    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
                    features=np.hstack([mfccs,sdc,mel])
                    #print(X.shape)
                    training_data.append([features, class_num])  # add this to our training_data
                except Exception as e:  # in the interest in keeping the output clean...
                    pass

In [125]:
create_training_data()

In [127]:
import random

random.shuffle(training_data)

In [129]:
X = []
y = []

for features,label in training_data:
    X.append(features)
    y.append(label)

In [131]:
import pickle

pickle_out = open("X.pickle","wb")
pickle.dump(X, pickle_out)
pickle_out.close()

pickle_out = open("y.pickle","wb")
pickle.dump(y, pickle_out)
pickle_out.close()

In [135]:
X=np.array(X)
y=np.array(y)
X.shape,y.shape

((480, 488), (480,))

In [136]:
y

array([3, 5, 0, 3, 0, 5, 3, 1, 5, 4, 0, 2, 2, 3, 0, 1, 1, 3, 5, 1, 3, 3,
       0, 0, 4, 3, 3, 5, 1, 5, 6, 2, 1, 0, 2, 6, 0, 1, 1, 2, 0, 0, 3, 3,
       6, 3, 4, 1, 0, 4, 1, 5, 3, 4, 2, 6, 3, 0, 4, 2, 6, 3, 5, 1, 6, 1,
       0, 1, 4, 6, 5, 2, 3, 4, 3, 2, 3, 4, 6, 0, 6, 3, 3, 4, 0, 2, 6, 6,
       0, 2, 0, 2, 6, 1, 3, 6, 2, 2, 4, 2, 5, 2, 6, 5, 0, 2, 3, 0, 0, 0,
       5, 3, 4, 6, 6, 5, 3, 3, 4, 0, 5, 4, 3, 3, 5, 4, 2, 6, 5, 1, 0, 1,
       2, 6, 3, 3, 0, 0, 5, 0, 1, 6, 0, 0, 5, 5, 5, 5, 1, 2, 3, 5, 3, 1,
       4, 2, 4, 0, 1, 0, 4, 6, 4, 2, 0, 6, 2, 5, 2, 5, 4, 1, 3, 1, 3, 0,
       2, 6, 0, 0, 2, 0, 3, 6, 6, 6, 2, 3, 1, 6, 0, 1, 3, 5, 5, 3, 3, 4,
       5, 3, 6, 2, 5, 1, 0, 1, 2, 1, 2, 3, 2, 3, 0, 3, 5, 5, 4, 1, 4, 3,
       3, 6, 2, 6, 5, 2, 0, 0, 0, 5, 6, 2, 3, 3, 3, 1, 2, 3, 6, 2, 3, 6,
       1, 2, 2, 3, 1, 1, 3, 4, 6, 0, 5, 3, 4, 3, 2, 1, 0, 4, 0, 1, 6, 5,
       5, 4, 3, 3, 1, 2, 6, 4, 1, 6, 3, 2, 3, 3, 6, 3, 5, 3, 2, 3, 5, 3,
       4, 4, 0, 3, 5, 5, 6, 5, 2, 6, 3, 5, 3, 3, 4,

In [138]:
import pickle

pickle_in = open("X.pickle","rb")
X = pickle.load(pickle_in)

pickle_in = open("y.pickle","rb")
y = pickle.load(pickle_in)