In [17]:
import pandas as pd
import numpy as np
import librosa

In [18]:
dataset = pd.read_csv('features.csv')
X = dataset.iloc[: , :-1].values
y = dataset.iloc[:,-1].values

In [19]:
print(X)

[[2.20300745e-01 6.23627007e-01 6.38570786e-01 ... 4.71014682e-05
  1.41795917e-05 8.68814823e-07]
 [3.13761393e-01 7.06200879e-01 7.22757806e-01 ... 9.66208682e-05
  5.96556994e-05 4.68411033e-05]
 [1.71527778e-01 6.51568830e-01 6.04887307e-01 ... 9.97042662e-06
  3.80867436e-06 1.89324098e-07]
 ...
 [1.48202402e-02 5.41738391e-01 4.94506985e-01 ... 4.72475449e-07
  3.79017251e-07 2.64470543e-07]
 [2.22122758e-02 6.42237711e-01 6.16769111e-01 ... 2.25081128e-03
  2.25411704e-03 2.23426394e-03]
 [1.30208333e-02 5.73239028e-01 5.25233388e-01 ... 9.04421782e-08
  4.67206007e-08 1.34750602e-08]]


In [20]:
print(y)
val = pd.DataFrame(y)
val.to_csv('val.csv')

['sad' 'sad' 'sad' ... 'neutral' 'neutral' 'neutral']


In [21]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder()
y = encoder.fit_transform(np.array(y).reshape(-1,1)).toarray()

In [22]:
print(y)

[[0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]]


In [23]:
res = pd.DataFrame(y)
res.to_csv('emotion.csv')

In [24]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y,random_state = 9, shuffle=True)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((27355, 162), (27355, 8), (9119, 162), (9119, 8))

In [25]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((27355, 162), (27355, 8), (9119, 162), (9119, 8))

In [26]:
# making our data compatible to model.
X_train = np.expand_dims(X_train, axis=2)
X_test = np.expand_dims(X_test, axis=2)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((27355, 162, 1), (27355, 8), (9119, 162, 1), (9119, 8))

In [27]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten, Dropout, Activation
from tensorflow.keras.layers import Conv1D, MaxPooling1D

In [28]:
model = Sequential()

model.add(Conv1D(64, 5, padding='same', input_shape=(162,1)))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(MaxPooling1D(pool_size=4))
model.add(Conv1D(128, 5, padding='same', ))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(MaxPooling1D(pool_size=4))
model.add(Conv1D(256, 5, padding='same', ))
model.add(Activation('relu'))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(Dense(8))
model.add(Activation('softmax'))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d_3 (Conv1D)           (None, 162, 64)           384       
                                                                 
 activation_4 (Activation)   (None, 162, 64)           0         
                                                                 
 dropout_3 (Dropout)         (None, 162, 64)           0         
                                                                 
 max_pooling1d_2 (MaxPooling  (None, 40, 64)           0         
 1D)                                                             
                                                                 
 conv1d_4 (Conv1D)           (None, 40, 128)           41088     
                                                                 
 activation_5 (Activation)   (None, 40, 128)           0         
                                                      

In [15]:
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [16]:
history = model.fit(X_train, y_train, batch_size=2048, epochs=100, validation_data=(X_test, y_test))

Epoch 1/100


KeyboardInterrupt: 

In [None]:
print("Accuracy of our model on test data : " , model.evaluate(X_test,y_test)[1]*100 , "%")

In [None]:
model.save('result.h5')

In [1]:
from keras.models import load_model

In [2]:
def get_features(path):
    # duration and offset are used to take care of the no audio in start and the ending of each audio files as seen above.
    data, sample_rate = librosa.load(path, duration=2.5, offset=0.6)

    # without augmentation
    res1 = extract_features(data, sample_rate)
    result = np.array(res1)

    return result
def extract_features(data, sample_rate):
    # ZCR
    result = np.array([])
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
    result=np.hstack((result, zcr)) # stacking horizontally

    # Chroma_stft
    stft = np.abs(librosa.stft(data))
    chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    result = np.hstack((result, chroma_stft)) # stacking horizontally

    # MFCC
    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mfcc)) # stacking horizontally

    # Root Mean Square Value
    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)
    result = np.hstack((result, rms)) # stacking horizontally

    # MelSpectogram
    mel = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)
    result = np.hstack((result, mel)) # stacking horizontally

    return result

In [3]:
feature = get_features('')
feature = np.array(feature).reshape(-1,162)
feature.shape

NameError: name 'librosa' is not defined

In [None]:
loadedModel = load_model('result_73_61.h5')
loadedModel.summary()

In [None]:
result = loadedModel.predict([feature], batch_size=1)
prediction = encoder.inverse_transform(result)
print(prediction)