In [None]:
import os
import sys
from typing import Tuple

import numpy as np
import scipy.io.wavfile as wav
from speechpy.feature import mfcc
import pickle
import sys
from sklearn.model_selection import train_test_split
import numpy
from sklearn.neural_network import MLPClassifier
import librosa
from pathlib import Path
# warnings.filterwarnings("ignore")
mean_signal_length = 32000  # Empirically calculated for the given data set


def get_feature_vector_from_mfcc(file_path: str, flatten: bool,
                                 mfcc_len: int = 39) -> np.ndarray:
    fs, signal = wav.read(file_path)
    if len(signal.shape) != 1:
        print("convert stereo to mono")
        signal = signal[:,0]
    
    s_len = len(signal)
    zx=Zerocross(signal)
    Ix=Intensity(signal)
    # pad the signals to have same size if lesser than required
    # else slice them
    if s_len < mean_signal_length:
        pad_len = mean_signal_length - s_len
        pad_rem = pad_len % 2
        pad_len //= 2
        signal = np.pad(signal, (pad_len, pad_len + pad_rem),
                        'constant', constant_values=0)
    else:
#         print(s_len)
        print("Length is greater than the mean signal length defined ")
        pad_len = s_len - mean_signal_length
        pad_len //= 2
        signal = signal[pad_len:pad_len + mean_signal_length]
#     print(len(signal))
    mel_coefficients = mfcc(signal, fs, num_cepstral=mfcc_len)
    mel = mel_coefficients
    if flatten:
        # Flatten the data
        mel_coefficients = np.ravel(mel_coefficients)
    F0nz=mel_coefficients[mel_coefficients!=0]
    Jitter=jitter_env(F0nz, len(F0nz))
#     print(len(Jitter))
#     print(mel_coefficients.shape)
    return mel_coefficients+zx/100000+Ix/100000+len(Jitter)/100000, mel

def jitter_env(vPPS, iNumPuntos):

    iLongSec=len(vPPS)

    if (iLongSec < 2):
        print( 'Pitch sequence is too short' );
        vJitta=np.zeros(iNumPuntos)
        return vJitta

    vJitta=np.zeros(iNumPuntos)
    iIndiceIni=0

    iDesplazamiento=iLongSec/iNumPuntos

# average f0 of signal
    rFoMed=np.max(vPPS)

    for n in range(iNumPuntos-1):
        indice=int( iIndiceIni+n*iDesplazamiento )
        if ( n>0 ) and (indice==int( iIndiceIni+(n-1)*iDesplazamiento )):
            vJitta[ n ]=vJitta[ n-1 ]
        else:
            if (indice+1 < iLongSec):
                    vJitta[n]=np.abs(vPPS[indice+1]-vPPS[indice])
            else:
                vJitta[n]=0;
        vJitta[n]=100*vJitta[n]/rFoMed

    return vJitta

def Zerocross(sig):
    sig2=np.power(sig,2)
    sumsig2=np.sum(np.absolute(sig2))/len(sig2)
    logE=np.log10(sumsig2)
    return logE


def Intensity(sig):
    sig2=np.square(sig)
    sumsig2=np.sum(sig2)/len(sig)
    return sumsig2

def get_data(data_path: str, flatten: bool = True, mfcc_len: int = 39,
             class_labels: Tuple = ("Neutral", "Angry", "Happy", "Sad")) -> \
        Tuple[np.ndarray, np.ndarray]:
    data = []
    labels = []
    names = []
    cur_dir = os.getcwd()
    sys.stderr.write('curdir: %s\n' % cur_dir)
    print(os.listdir(cur_dir))
    os.chdir(data_path)
    for i, directory in enumerate(class_labels):
        sys.stderr.write("started reading folder %s\n" % directory)
        os.chdir(directory)
        for filename in os.listdir('.'):
            filepath = os.getcwd() + '/' + filename
            feature_vector, mel = get_feature_vector_from_mfcc(file_path=filepath,
                                                          mfcc_len=mfcc_len,
                                                          flatten=flatten)
            xc=np.isnan(feature_vector)
            feature_vector[xc]=0
            data.append(feature_vector)
            labels.append(i)
            names.append(filename)
        sys.stderr.write("ended reading folder %s\n" % directory)
        os.chdir('..')
    os.chdir(cur_dir)
    return np.array(data), np.array(labels)



_DATA_PATH = 'data/'
_CLASS_LABELS = ("Neutral", "Angry", "Happy", "Sad")


def extract_data(flatten):
    data, labels = get_data(_DATA_PATH, class_labels=_CLASS_LABELS,
                            flatten=flatten)
    x_train, x_test, y_train, y_test = train_test_split(
        data,
        labels,
        test_size=0.2,
        random_state=42)
    return np.array(x_train), np.array(x_test), np.array(y_train), np.array(
        y_test), len(_CLASS_LABELS)


def get_feature_vector(file_path, flatten):
    return get_feature_vector_from_mfcc(file_path, flatten, mfcc_len=39)

