In [1]:
import numpy as np
import scipy.io.wavfile as wav
from speechpy.feature import mfcc
import tensorflow as tf

2023-10-15 03:05:47.954148: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-15 03:05:47.993905: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-10-15 03:05:47.994394: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
import os
import sys
from typing import Tuple

In [3]:
mean_signal_length = 32000
# mean_signal_length = 88500

In [4]:
def get_feature_vector_from_mfcc(file_path: str, flatten: bool, mfcc_len: int = 39) -> np.ndarray:
    """
    Make feature vector from MFCC for the given wav file.

    Args:
        file_path (str): path to the .wav file that needs to be read.
        flatten (bool) : Boolean indicating whether to flatten mfcc obtained.
        mfcc_len (int): Number of cepestral co efficients to be consider.

    Returns:
        numpy.ndarray: feature vector of the wav file made from mfcc.
    """
    fs, signal = wav.read(file_path)
    s_len = len(signal)
    print(s_len)
    # pad the signals to have same size if lesser than required
    # else slice them
    if s_len < mean_signal_length:
        pad_len = mean_signal_length - s_len
        pad_rem = pad_len % 2
        pad_len //= 2
        signal = np.pad(signal, (pad_len, pad_len + pad_rem),
                        'constant', constant_values=0)
    else:
        pad_len = s_len - mean_signal_length
        pad_len //= 2
        signal = signal[pad_len:pad_len + mean_signal_length]
    print(len(signal))
    mel_coefficients = mfcc(signal, fs, num_cepstral=mfcc_len)
    print(len(signal))
    if flatten:
        # Flatten the data
        mel_coefficients = np.ravel(mel_coefficients)
    return mel_coefficients

In [5]:
filename = './test.wav'

In [6]:
features = get_feature_vector_from_mfcc(filename, flatten=False)

132096
32000
32000


In [7]:
features.shape

(70, 39)

In [8]:
def get_data(data_path: str, flatten: bool = True, mfcc_len: int = 39,
             class_labels: Tuple = ("Neutral", "Angry", "Happy", "Sad")) -> \
        Tuple[np.ndarray, np.ndarray]:
    """Extract data for training and testing.

    1. Iterate through all the folders.
    2. Read the audio files in each folder.
    3. Extract Mel frequency cepestral coefficients for each file.
    4. Generate feature vector for the audio files as required.

    Args:
        data_path (str): path to the data set folder
        flatten (bool): Boolean specifying whether to flatten the data or not.
        mfcc_len (int): Number of mfcc features to take for each frame.
        class_labels (tuple): class labels that we care about.

    Returns:
        Tuple[numpy.ndarray, numpy.ndarray]: Two numpy arrays, one with mfcc and
        other with labels.


    """
    data = []
    labels = []
    names = []
    cur_dir = os.getcwd()
    sys.stderr.write('curdir: %s\n' % cur_dir)
    os.chdir(data_path)
    for i, directory in enumerate(class_labels):
        sys.stderr.write("started reading folder %s\n" % directory)
        os.chdir(directory)
        for filename in os.listdir('.'):
            filepath = os.getcwd() + '/' + filename
            feature_vector = get_feature_vector_from_mfcc(file_path=filepath,
                                                          mfcc_len=mfcc_len,
                                                          flatten=flatten)
            data.append(feature_vector)
            labels.append(i)
            names.append(filename)
        sys.stderr.write("ended reading folder %s\n" % directory)
        os.chdir('..')
    os.chdir(cur_dir)
    return np.array(data), np.array(labels)

In [9]:
_DATA_PATH = './dataset'
_CLASS_LABELS = ("Neutral", "Angry", "Happy", "Sad")

In [10]:
from sklearn.model_selection import train_test_split


def extract_data(flatten):
    data, labels = get_data(_DATA_PATH, class_labels=_CLASS_LABELS,
                            flatten=flatten)
    x_train, x_test, y_train, y_test = train_test_split(
        data,
        labels,
        test_size=0.2,
        random_state=42)
    return np.array(x_train), np.array(x_test), np.array(y_train), np.array(
        y_test), len(_CLASS_LABELS)


In [11]:
from keras import Sequential
from keras.layers import LSTM as KERAS_LSTM, Dense, Dropout, Conv2D, Flatten, \
    BatchNormalization, Activation, MaxPooling2D
from sklearn.metrics import accuracy_score, confusion_matrix

In [12]:
class Model(object):
    """
    Model is the abstract class which determines how a model should be.
    Any model inheriting this class should do the following.

    1.  Set the model instance variable to the corresponding model class which
        which will provide methods `fit` and `predict`.

    2.  Should implement the following abstract methods `load_model`,
        `save_model` `train` and `evaluate`. These methods provide the
        functionality to save the model to the disk, load the model from the
        disk and train the model and evaluate the model to return appropriate
        measure like accuracy, f1 score, etc.

    Attributes:
        model (Any): instance variable that holds the model.
        save_path (str): path to save the model.
        name (str): name of the model.
        trained (bool): True if model has been trained, false otherwise.
    """

    def __init__(self, save_path: str = '', name: str = 'Not Specified'):
        """
        Default constructor for abstract class Model.

        Args:
            save_path(str): path to save the model to.
            name(str): name of the model given as string.

        """
        # Place holder for model
        self.model = None
        # Place holder on where to save the model
        self.save_path = save_path
        # Place holder for name of the model
        self.name = name
        # Model has been trained or not
        self.trained = False

    def train(self, x_train: np.ndarray, y_train: np.ndarray,
              x_val: np.ndarray = None,
              y_val: np.ndarray = None) -> None:
        """
        Trains the model with the given training data.

        Args:
            x_train (numpy.ndarray): samples of training data.
            y_train (numpy.ndarray): labels for training data.
            x_val (numpy.ndarray): Optional, samples in the validation data.
            y_val (numpy.ndarray): Optional, labels of the validation data.

        """
        # This will be specific to model so should be implemented by
        # child classes
        raise NotImplementedError()

    def predict(self, samples: np.ndarray) -> Tuple:
        """
        Predict labels for given data.

        Args:
            samples (numpy.ndarray): data for which labels need to be predicted

        Returns:
            list: list of labels predicted for the data.

        """
        results = []
        for _, sample in enumerate(samples):
            results.append(self.predict_one(sample))
        return tuple(results)

    def predict_one(self, sample) -> int:
        """
        Predict label of a single sample. The reason this method exists is
        because often we might want to predict label for a single sample.

        Args:
            sample (numpy.ndarray): Feature vector of the sample that we want to
                                    predict the label for.

        Returns:
            int: returns the label for the sample.
        """
        # This need to be implemented for the child models. The reason is that
        # ML models and DL models predict the labels differently.
        raise NotImplementedError()

    def restore_model(self, load_path: str = None) -> None:
        """
        Restore the weights from a saved model and load them to the model.

        Args:
            load_path (str): Optional, path to load the weights from a given path.

        """
        to_load = load_path or self.save_path
        if to_load is None:
            sys.stderr.write(
                "Provide a path to load from or save_path of the model\n")
            sys.exit(-1)
        self.load_model(to_load)
        self.trained = True

    def load_model(self, to_load: str) -> None:
        """
        Load the weights from the given saved model.

        Args:
            to_load: path containing the saved model.

        """
        # This will be specific to model so should be implemented by
        # child classes
        raise NotImplementedError()

    def save_model(self) -> None:
        """
        Save the model to path denoted by `save_path` instance variable.
        """
        # This will be specific to model so should be implemented by
        # child classes
        raise NotImplementedError()

    def evaluate(self, x_test: np.ndarray, y_test: np.ndarray) -> None:
        """
        Evaluate the current model on the given test data.

        Predict the labels for test data using the model and print the relevant
        metrics like accuracy and the confusion matrix.

        Args:
            x_test (numpy.ndarray): Numpy nD array or a list like object
                                    containing the samples.
            y_test (numpy.ndarray): Numpy 1D array or list like object
                                    containing the labels for test samples.
        """
        predictions = self.predict(x_test)
        print(y_test)
        print(predictions)
        print('Accuracy:%.3f\n' % accuracy_score(y_pred=predictions,
                                                 y_true=y_test))
        print('Confusion matrix:', confusion_matrix(y_pred=predictions,
                                                    y_true=y_test))

In [29]:
class DNN(Model):
    """
    This class is parent class for all Deep neural network models. Any class
    inheriting this class should implement `make_default_model` method which
    creates a model with a set of hyper parameters.
    """

    def __init__(self, input_shape, num_classes, **params):
        """
        Constructor to initialize the deep neural network model. Takes the input
        shape and number of classes and other parameters required for the
        abstract class `Model` as parameters.

        Args:
            input_shape (tuple): shape of the input
            num_classes (int): number of different classes ( labels ) in the data.
            **params: Additional parameters required by the underlying abstract
                class `Model`.

        """
        super(DNN, self).__init__(**params)
        self.input_shape = input_shape
        self.model = Sequential()
        self.make_default_model()
        self.model.add(Dense(num_classes, activation='softmax'))
        self.model.compile(loss='binary_crossentropy', optimizer='adam',
                           metrics=['accuracy'])
        print(self.model.summary(), file=sys.stderr)
        self.save_path = self.save_path or self.name + '_best_model.h5'

    def load_model(self, to_load):
        """
        Load the model weights from the given path.

        Args:
            to_load (str): path to the saved model file in h5 format.

        """
        try:
            self.model.load_weights(to_load)
        except:
            sys.stderr.write("Invalid saved file provided")
            sys.exit(-1)

    def save_model(self, path, save_format=None):
        """
        Save the model weights to `save_path` provided while creating the model.
        """
        # self.model.save_weights(self.save_path)
        self.model.save(path, save_format)

    def train(self, x_train, y_train, x_val=None, y_val=None, n_epochs=50):
        """
        Train the model on the given training data.


        Args:
            x_train (numpy.ndarray): samples of training data.
            y_train (numpy.ndarray): labels for training data.
            x_val (numpy.ndarray): Optional, samples in the validation data.
            y_val (numpy.ndarray): Optional, labels of the validation data.
            n_epochs (int): Number of epochs to be trained.

        """
        best_acc = 0
        if x_val is None or y_val is None:
            x_val, y_val = x_train, y_train
        for i in range(n_epochs):
            # Shuffle the data for each epoch in unison inspired
            # from https://stackoverflow.com/a/4602224
            p = np.random.permutation(len(x_train))
            x_train = x_train[p]
            y_train = y_train[p]
            self.model.fit(x_train, y_train, batch_size=32, epochs=1)
            loss, acc = self.model.evaluate(x_val, y_val)
            if acc > best_acc:
                best_acc = acc
        self.trained = True

    def predict_one(self, sample):
        # if not self.trained:
        #     sys.stderr.write(
        #         "Model should be trained or loaded before doing predict\n")
        #     sys.exit(-1)
        return np.argmax(self.model.predict(np.array([sample])))

    def make_default_model(self) -> None:
        """
        Make the model with default hyper parameters
        """
        # This has to be implemented by child classes. The reason is that the
        # hyper parameters depends on the model.
        raise NotImplementedError()

In [30]:
class CNN(DNN):
    """
    This class handles CNN for speech emotion recognitions
    """

    def __init__(self, **params):
        params['name'] = 'CNN'
        super(CNN, self).__init__(**params)

    def make_default_model(self):
        """
        Makes a CNN keras model with the default hyper parameters.
        """
        self.model.add(Conv2D(8, (13, 13),
                              input_shape=(
                                  self.input_shape[0], self.input_shape[1], 1)))
        self.model.add(BatchNormalization(axis=-1))
        self.model.add(Activation('relu'))
        self.model.add(Conv2D(8, (13, 13)))
        self.model.add(BatchNormalization(axis=-1))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 1)))
        self.model.add(Conv2D(8, (13, 13)))
        self.model.add(BatchNormalization(axis=-1))
        self.model.add(Activation('relu'))
        self.model.add(Conv2D(8, (2, 2)))
        self.model.add(BatchNormalization(axis=-1))
        self.model.add(Activation('relu'))
        self.model.add(MaxPooling2D(pool_size=(2, 1)))
        self.model.add(Flatten())
        self.model.add(Dense(64))
        self.model.add(BatchNormalization())
        self.model.add(Activation('relu'))
        self.model.add(Dropout(0.2))


class LSTM(DNN):
    """
    This class handles CNN for speech emotion recognitions
    """

    def __init__(self, **params):
        params['name'] = 'LSTM'
        super(LSTM, self).__init__(**params)

    def make_default_model(self):
        """
        Makes the LSTM model with keras with the default hyper parameters.
        """
        self.model.add(
            KERAS_LSTM(128,
                       input_shape=(self.input_shape[0], self.input_shape[1])))
        self.model.add(Dropout(0.5))
        self.model.add(Dense(32, activation='relu'))
        self.model.add(Dense(16, activation='tanh'))

In [31]:
from keras.utils import to_categorical

In [32]:
x_train, x_test, y_train, y_test, num_labels = extract_data(
    flatten=False)
y_train = to_categorical(y_train)
y_test_train = to_categorical(y_test)
print(x_train.shape)
in_shape = x_train[0].shape
x_train = x_train.reshape(x_train.shape[0], in_shape[0], in_shape[1], 1)
print(x_train.shape)
x_test = x_test.reshape(x_test.shape[0], in_shape[0], in_shape[1], 1)
model = CNN(input_shape=x_train[0].shape,
            num_classes=num_labels)

curdir: /home/yuvraj/projects/Speak-Secure
started reading folder Neutral


29215
32000
32000
49240
32000
32000
38049
32000
32000
61105
32000
32000
32799
32000
32000
26538
32000
32000
42766
32000
32000
38227
32000
32000
30587
32000
32000
58312
32000
32000
50688
32000
32000
41162
32000
32000
22893
32000
32000
46391
32000
32000
52045
32000
32000
29375
32000
32000
32532
32000
32000
44644
32000
32000
57935
32000
32000
28915
32000
32000
24250
32000
32000
28610
32000
32000
36103
32000
32000
32113
32000
32000
43552
32000
32000
27170
32000
32000
24933
32000
32000
48888
32000
32000
43425
32000
32000
26921
32000
32000
49013
32000
32000
24545
32000
32000
28650
32000
32000
25780
32000
32000
37664
32000
32000
34121
32000
32000
35768
32000
32000
24981
32000
32000
45660
32000
32000
26944
32000
32000
26052
32000
32000
47125
32000
32000
51004
32000
32000
62387
32000
32000
27711
32000
32000
39449
32000
32000
37716
32000
32000
47713
32000
32000
33343
32000
32000
26182
32000
32000
25845
32000
32000
57072
32000
32000
35523
32000
32000
43204
32000
32000
49935
32000
32000
35244
3200

  fs, signal = wav.read(file_path)
ended reading folder Neutral
started reading folder Angry


32000
26567
32000
32000
59547
32000
32000
43181
32000
32000
44580
32000
32000
28232
32000
32000
37885
32000
32000
23969
32000
32000
60812
32000
32000
42863
32000
32000
44866
32000
32000
28879
32000
32000
35209
32000
32000
56725
32000
32000
31105
32000
32000
26485
32000
32000
57790
32000
32000
52983
32000
32000
59040
32000
32000
50649
32000
32000
38394
32000
32000
44331
32000
32000
38626
32000
32000
38577
32000
32000
57344
32000
32000
40161
32000
32000
41758
32000
32000
39072
32000
32000
39011
32000
32000
41957
32000
32000
47396
32000
32000
50945
32000
32000
36035
32000
32000
26205
32000
32000
46300
32000
32000
35198
32000
32000
37109
32000
32000
25242
32000
32000
35814
32000
32000
62793
32000
32000
44540
32000
32000
27056
32000
32000
28982
32000
32000
46153
32000
32000
64109
32000
32000
31927
32000
32000
59121
32000
32000
31802
32000
32000
28979
32000
32000
41973
32000
32000
49576
32000
32000
42398
32000
32000
42339
32000
32000
35509
32000
32000
42933
32000
32000
26704
32000
32000
2411

ended reading folder Angry
started reading folder Happy


32000
24384
32000
32000
39711
32000
32000
37795
32000
32000
23702
32000
32000
25912
32000
32000
40654
32000
32000
31427
32000
32000
42396
32000
32000
58519
32000
32000
30095
32000
32000
41336
32000
32000
53000
32000
32000
62895
32000
32000
57857
32000
32000
48585
32000
32000
39215
32000
32000
26907
32000
32000
42128
32000
32000
24595
32000
32000
38829
32000
32000
28565
32000
32000
32702
32000
32000
50059
32000
32000
47396
32000
32000
33291
32000
32000
51939
32000
32000
49839
32000
32000
31409
32000
32000
28659
32000
32000
30372
32000
32000
40879
32000
32000
37940
32000
32000
50267
32000
32000
40252
32000
32000
32100
32000
32000
55071
32000
32000
43139
32000
32000
29818
32000
32000
56696
32000
32000
40964
32000
32000
28533
32000
32000
56029
32000
32000
30964
32000
32000
57798
32000
32000
28810
32000
32000
38929
32000
32000
33542
32000
32000
45395
32000
32000
36796
32000
32000
45981
32000
32000
34537
32000
32000
61277
32000
32000
32199
32000
32000
26026
32000
32000
62823
32000
32000
2929

ended reading folder Happy
started reading folder Sad


32000
108023
32000
32000
52216
32000
32000
35071
32000
32000
64252
32000
32000
87957
32000
32000
37467
32000
32000
36267
32000
32000
35609
32000
32000
62272
32000
32000
84789
32000
32000
95610
32000
32000
85457
32000
32000
31908
32000
32000
54251
32000
32000
33017
32000
32000
56010
32000
32000
143652
32000
32000
55965
32000
32000
122871
32000
32000
71222
32000
32000
89943
32000
32000
60243
32000
32000
27771
32000
32000
33032
32000
32000
74605
32000
32000
62538
32000
32000
62233
32000
32000
65492
32000
32000
31293
32000
32000
57668
32000
32000
94480
32000
32000
29820
32000
32000
34678
32000
32000
48745
32000
32000
59672
32000
32000
62947
32000
32000
82030
32000
32000
46956
32000
32000
85764
32000
32000
63154
32000
32000
99447
32000
32000
50651
32000
32000
45640
32000
32000
86622
32000
32000
59800
32000
32000
63927
32000
32000
64377
32000
32000
58475
32000
32000
55404
32000
32000
53343
32000
32000
61395
32000
32000
108608
32000
32000
70044
32000
32000
(271, 198, 39)
(271, 198, 39, 1)
Mod

ended reading folder Sad
None


In [88]:
features2 = np.expand_dims(features, axis=2)

In [89]:
features2.shape

(198, 39, 1)

In [33]:
model.load_model('./sentiment_models/best_model_CNN_13.h5')

In [34]:
model.save_model("./sentiment_models/sentiment_classifier.h5", save_format='h5')

  saving_api.save_model(


In [91]:
_CLASS_LABELS[model.predict_one(features2)]



'Sad'

In [93]:
tf.keras.saving.save_model(
    model, "./sentiment_models/sentiment_classifier.h5", save_format="h5")

  tf.keras.saving.save_model(


AttributeError: 'CNN' object has no attribute 'outputs'