# <center> Emotion recognition task

In [4]:
import pandas as pd
import numpy as np
import os

In [5]:
emotion_metadata = os.listdir('Annotations_by_emotions')
emotion_metadata

['data_Angry.csv',
 'data_Disgusted.csv',
 'data_Domination.csv',
 'data_Happy.csv',
 'data_Neutral.csv',
 'data_Sad.csv',
 'data_Scared.csv',
 'data_Shame.csv',
 'data_Submission.csv',
 'data_Surprised.csv',
 'data_Tiredness.csv']

## Первый подход (без stft)

In [6]:
def features_extractor(filename):
    audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    
    return mfccs_scaled_features

In [7]:
features_extractor('Audio/10dec_D11_1_mic.wav')

array([-3.4626126e+02,  9.1232590e+01,  1.8996464e+01,  2.1748775e+01,
        2.5303898e+00,  1.3351003e+00, -4.1439075e+00, -4.4578829e+00,
       -6.6111913e+00, -1.2234282e-01, -4.7692361e+00,  4.1236606e+00,
       -4.3782949e+00,  1.3337648e+00,  1.0647517e+00,  5.6903768e-02,
       -1.6698180e+00,  2.5723994e+00, -1.1487240e+00, -1.4735566e-01,
        3.1428200e-01, -4.2726949e-01, -7.7109551e-01,  1.0621809e+00,
        1.2280817e+00,  1.6288308e+00,  3.0913405e+00,  2.8090508e+00,
        3.7402315e+00,  2.2184596e+00,  8.2098657e-01, -5.3912725e-02,
        2.6544949e-02,  6.9597429e-01,  1.4695151e+00,  9.1858304e-01,
        3.0254650e-01, -3.1394064e-01,  3.4233758e-01,  1.1811378e+00],
      dtype=float32)

In [8]:
emotion_dict = dict(zip(emotion_metadata, list(map(lambda x: x[5:-4], emotion_metadata))))
emotion_dict

{'data_Angry.csv': 'Angry',
 'data_Disgusted.csv': 'Disgusted',
 'data_Domination.csv': 'Domination',
 'data_Happy.csv': 'Happy',
 'data_Neutral.csv': 'Neutral',
 'data_Sad.csv': 'Sad',
 'data_Scared.csv': 'Scared',
 'data_Shame.csv': 'Shame',
 'data_Submission.csv': 'Submission',
 'data_Surprised.csv': 'Surprised',
 'data_Tiredness.csv': 'Tiredness'}

In [9]:
def make_dataset(emotion_dict, emotion_path, audio_path):
    small_files = 0
    files_lost = 0
    error = 0
    result_data = []
    for table, classname in emotion_dict.items():
        data = pd.read_csv(emotion_path + '/' + table)
        for i, row in data.iterrows():
            if row['End'] - row['Start'] > 3:
                try:
                    try:
                        file = AudioSegment.from_file(audio_path + '/' + row['File'] + '_mic.wav')
                    except FileNotFoundError:
                        files_lost += 1
                        continue
                    new_file = file[row['Start'] * 1000 : row['End'] * 1000]
                    new_file.export('test.wav', format='wav')
                    features = features_extractor('test.wav')
                    result_data.append([features, classname])
                except:
                    error += 1
                    continue
            else:
                small_files += 1
            if i % 100 == 0:
                print(f'Class {classname}, Step {i}')
    print(f'Small files: {small_files}, Lost_files: {files_lost}, Errors: {error}')
    return result_data

In [46]:
ds = make_dataset(emotion_dict, 'Annotations_by_emotions', 'Audio')

Class Angry, Step 0
Class Angry, Step 100
Class Angry, Step 200
Class Angry, Step 300
Class Angry, Step 400
Class Angry, Step 500
Class Angry, Step 600
Class Angry, Step 700
Class Angry, Step 800
Class Angry, Step 900
Class Angry, Step 1000
Class Angry, Step 1100
Class Angry, Step 1200
Class Angry, Step 1300
Class Angry, Step 1400
Class Angry, Step 1500
Class Disgusted, Step 0
Class Disgusted, Step 100
Class Disgusted, Step 200
Class Disgusted, Step 300
Class Disgusted, Step 400
Class Disgusted, Step 500
Class Disgusted, Step 600
Class Disgusted, Step 700
Class Disgusted, Step 800
Class Disgusted, Step 900
Class Disgusted, Step 1000
Class Disgusted, Step 1100
Class Domination, Step 0
Class Domination, Step 100
Class Domination, Step 200
Class Domination, Step 300
Class Domination, Step 400
Class Domination, Step 500
Class Domination, Step 600
Class Domination, Step 700
Class Domination, Step 800
Class Domination, Step 900
Class Domination, Step 1000
Class Domination, Step 1100
Class Do

In [53]:
X = np.array([ds[i][0] for i in range(len(ds))])

In [55]:
X.shape

(12872, 40)

In [56]:
y = np.array([ds[i][1] for i in range(len(ds))])

In [57]:
y.shape

(12872,)

In [58]:
y=np.array(pd.get_dummies(y))

In [60]:
y.shape

(12872, 11)

In [90]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.1,random_state=0)

In [44]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
from keras.layers.recurrent import LSTM

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [92]:
num_labels=y.shape[1]

### Модель

In [140]:
model=Sequential()
###first layer
model.add(Dense(100,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.2))
###second layer
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.2))
###third layer
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.2))

###final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [141]:
opt = Adam(learning_rate=0.001)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy", "AUC"])

In [142]:
import tensorflow as tf

In [143]:
# model.compile(loss='mse',metrics=[tf.keras.metrics.AUC()], optimizer='adam')

In [146]:
## Trianing my model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

num_epochs = 200
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/200

Epoch 00001: val_loss improved from inf to 1.68835, saving model to saved_models\audio_classification.hdf5
Epoch 2/200

Epoch 00002: val_loss improved from 1.68835 to 1.67651, saving model to saved_models\audio_classification.hdf5
Epoch 3/200

Epoch 00003: val_loss improved from 1.67651 to 1.65924, saving model to saved_models\audio_classification.hdf5
Epoch 4/200

Epoch 00004: val_loss did not improve from 1.65924
Epoch 5/200

Epoch 00005: val_loss did not improve from 1.65924
Epoch 6/200

Epoch 00006: val_loss did not improve from 1.65924
Epoch 7/200

Epoch 00007: val_loss did not improve from 1.65924
Epoch 8/200

Epoch 00008: val_loss did not improve from 1.65924
Epoch 9/200

Epoch 00009: val_loss did not improve from 1.65924
Epoch 10/200

Epoch 00010: val_loss did not improve from 1.65924
Epoch 11/200

Epoch 00011: val_loss did not improve from 1.65924
Epoch 12/200

Epoch 00012: val_loss did not improve from 1.65924
Epoch 13/200

Epoch 00013: val_loss did not improve f


Epoch 00035: val_loss did not improve from 1.64768
Epoch 36/200

Epoch 00036: val_loss did not improve from 1.64768
Epoch 37/200

Epoch 00037: val_loss did not improve from 1.64768
Epoch 38/200

Epoch 00038: val_loss did not improve from 1.64768
Epoch 39/200

Epoch 00039: val_loss did not improve from 1.64768
Epoch 40/200

Epoch 00040: val_loss did not improve from 1.64768
Epoch 41/200

Epoch 00041: val_loss did not improve from 1.64768
Epoch 42/200

Epoch 00042: val_loss did not improve from 1.64768
Epoch 43/200

Epoch 00043: val_loss did not improve from 1.64768
Epoch 44/200

Epoch 00044: val_loss did not improve from 1.64768
Epoch 45/200

Epoch 00045: val_loss did not improve from 1.64768
Epoch 46/200

Epoch 00046: val_loss did not improve from 1.64768
Epoch 47/200

Epoch 00047: val_loss did not improve from 1.64768
Epoch 48/200

Epoch 00048: val_loss did not improve from 1.64768
Epoch 49/200

Epoch 00049: val_loss did not improve from 1.64768
Epoch 50/200

Epoch 00050: val_loss di


Epoch 00071: val_loss did not improve from 1.64768
Epoch 72/200

Epoch 00072: val_loss did not improve from 1.64768
Epoch 73/200

Epoch 00073: val_loss did not improve from 1.64768
Epoch 74/200

Epoch 00074: val_loss did not improve from 1.64768
Epoch 75/200

Epoch 00075: val_loss did not improve from 1.64768
Epoch 76/200

Epoch 00076: val_loss did not improve from 1.64768
Epoch 77/200

Epoch 00077: val_loss did not improve from 1.64768
Epoch 78/200

Epoch 00078: val_loss did not improve from 1.64768
Epoch 79/200

Epoch 00079: val_loss did not improve from 1.64768
Epoch 80/200

Epoch 00080: val_loss did not improve from 1.64768
Epoch 81/200

Epoch 00081: val_loss did not improve from 1.64768
Epoch 82/200

Epoch 00082: val_loss did not improve from 1.64768
Epoch 83/200

Epoch 00083: val_loss did not improve from 1.64768
Epoch 84/200

Epoch 00084: val_loss did not improve from 1.64768
Epoch 85/200

Epoch 00085: val_loss did not improve from 1.64768
Epoch 86/200

Epoch 00086: val_loss di


Epoch 00107: val_loss did not improve from 1.64768
Epoch 108/200

Epoch 00108: val_loss did not improve from 1.64768
Epoch 109/200

Epoch 00109: val_loss did not improve from 1.64768
Epoch 110/200

Epoch 00110: val_loss did not improve from 1.64768
Epoch 111/200

Epoch 00111: val_loss did not improve from 1.64768
Epoch 112/200

Epoch 00112: val_loss did not improve from 1.64768
Epoch 113/200

Epoch 00113: val_loss did not improve from 1.64768
Epoch 114/200

Epoch 00114: val_loss did not improve from 1.64768
Epoch 115/200

Epoch 00115: val_loss did not improve from 1.64768
Epoch 116/200

Epoch 00116: val_loss did not improve from 1.64768
Epoch 117/200

Epoch 00117: val_loss did not improve from 1.64768
Epoch 118/200

Epoch 00118: val_loss did not improve from 1.64768
Epoch 119/200

Epoch 00119: val_loss did not improve from 1.64768
Epoch 120/200

Epoch 00120: val_loss did not improve from 1.64768
Epoch 121/200

Epoch 00121: val_loss did not improve from 1.64768
Epoch 122/200

Epoch 001


Epoch 00143: val_loss did not improve from 1.64768
Epoch 144/200

Epoch 00144: val_loss did not improve from 1.64768
Epoch 145/200

Epoch 00145: val_loss did not improve from 1.64768
Epoch 146/200

Epoch 00146: val_loss did not improve from 1.64768
Epoch 147/200

Epoch 00147: val_loss did not improve from 1.64768
Epoch 148/200

Epoch 00148: val_loss did not improve from 1.64768
Epoch 149/200

Epoch 00149: val_loss did not improve from 1.64768
Epoch 150/200

Epoch 00150: val_loss did not improve from 1.64768
Epoch 151/200

Epoch 00151: val_loss did not improve from 1.64768
Epoch 152/200

Epoch 00152: val_loss did not improve from 1.64768
Epoch 153/200

Epoch 00153: val_loss did not improve from 1.64768
Epoch 154/200

Epoch 00154: val_loss did not improve from 1.64768
Epoch 155/200

Epoch 00155: val_loss did not improve from 1.64768
Epoch 156/200

Epoch 00156: val_loss did not improve from 1.64768
Epoch 157/200

Epoch 00157: val_loss did not improve from 1.64768
Epoch 158/200

Epoch 001


Epoch 00179: val_loss did not improve from 1.64768
Epoch 180/200

Epoch 00180: val_loss did not improve from 1.64768
Epoch 181/200

Epoch 00181: val_loss did not improve from 1.64768
Epoch 182/200

Epoch 00182: val_loss did not improve from 1.64768
Epoch 183/200

Epoch 00183: val_loss did not improve from 1.64768
Epoch 184/200

Epoch 00184: val_loss did not improve from 1.64768
Epoch 185/200

Epoch 00185: val_loss did not improve from 1.64768
Epoch 186/200

Epoch 00186: val_loss did not improve from 1.64768
Epoch 187/200

Epoch 00187: val_loss did not improve from 1.64768
Epoch 188/200

Epoch 00188: val_loss did not improve from 1.64768
Epoch 189/200

Epoch 00189: val_loss did not improve from 1.64768
Epoch 190/200

Epoch 00190: val_loss did not improve from 1.64768
Epoch 191/200

Epoch 00191: val_loss did not improve from 1.64768
Epoch 192/200

Epoch 00192: val_loss did not improve from 1.64768
Epoch 193/200

Epoch 00193: val_loss did not improve from 1.64768
Epoch 194/200

Epoch 001

In [163]:
model.evaluate(X_train, y_train)



[1.1675776243209839, 0.5694923996925354, 0.9389387965202332]

In [164]:
model.evaluate(X_test, y_test)



[1.6886274814605713, 0.3928571343421936, 0.8564711809158325]

In [192]:
list(map(np.argmax, model.predict(X_test)[:20]))

[3, 2, 0, 8, 0, 0, 3, 3, 4, 4, 2, 4, 1, 4, 1, 3, 4, 0, 6, 0]

In [193]:
list(map(np.argmax, y_test[:20]))

[9, 8, 1, 6, 0, 9, 3, 3, 8, 4, 9, 8, 7, 4, 1, 3, 4, 2, 2, 0]

In [194]:
from sklearn.metrics import f1_score

In [199]:
f1_score(list(map(np.argmax, model.predict(X_test))), list(map(np.argmax, y_test)), average=None)

array([0.46931408, 0.40875912, 0.34061135, 0.51578947, 0.55769231,
       0.20512821, 0.11904762, 0.22222222, 0.40169133, 0.27488152,
       0.        ])

In [202]:
emotion_dict.values()

dict_values(['Angry', 'Disgusted', 'Domination', 'Happy', 'Neutral', 'Sad', 'Scared', 'Shame', 'Submission', 'Surprised', 'Tiredness'])

In [203]:
f1_score(list(map(np.argmax, model.predict(X_train))), list(map(np.argmax, y_train)), average=None)

array([0.63456311, 0.6247191 , 0.51572976, 0.6793238 , 0.63405527,
       0.43949045, 0.46376812, 0.50424929, 0.54701286, 0.46542393,
       0.19354839])

In [207]:
tf.random.normal([32, 10, 8])[0].shape

TensorShape([10, 8])

In [208]:
model.save('my_model_1.hdf5')

## Второй подход (с stft и audio_crop)

In [17]:
def features_extractor_fft(filename):
    audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13, n_fft=512, hop_length=512)
#     mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    return mfccs_features.T

In [18]:
features_extractor_fft('test.wav').shape

(130, 13)

In [19]:
def make_dataset_fft(emotion_dict, emotion_path, audio_path):
    small_files = 0
    files_lost = 0
    error = 0
    result_data = []
    for table, classname in emotion_dict.items():
        data = pd.read_csv(emotion_path + '/' + table)
        for i, row in data.iterrows():
            if row['End'] - row['Start'] > 3:
                try:
                    try:
                        file = AudioSegment.from_file(audio_path + '/' + row['File'] + '_mic.wav')
                    except FileNotFoundError:
                        files_lost += 1
                        continue
#                     new_file = file[row['Start'] * 1000 : row['End'] * 1000]
#                     new_file.export('test.wav', format='wav')
#                     features = features_extractor('test.wav')
#                     result_data.append([features, classname])
                    start = row['Start'] * 1000
                    end = row['Start'] + 3000
                    while end < row['End'] * 1000:
                        new_file = file[start: end]
                        new_file.export('test.wav', format='wav')
                        features = features_extractor_fft('test.wav')
                        result_data.append([features, classname])
                        start = end
                        end += 3000
                except:
                    error += 1
                    continue
            else:
                small_files += 1
            if i % 100 == 0:
                print(f'Class {classname}, Step {i}')
    print(f'Small files: {small_files}, Lost_files: {files_lost}, Errors: {error}')
    return result_data

In [20]:
ds_fft = make_dataset_fft(emotion_dict, 'Annotations_by_emotions', 'Audio')

Class Angry, Step 500
Class Angry, Step 1200
Class Angry, Step 1300
Class Disgusted, Step 700
Class Disgusted, Step 900
Class Domination, Step 100




Class Domination, Step 200
Class Domination, Step 400




Class Domination, Step 600
Class Domination, Step 700
Class Domination, Step 800
Class Domination, Step 1000




Class Domination, Step 1100
Class Domination, Step 1500
Class Domination, Step 1600
Class Domination, Step 1700




Class Domination, Step 1900
Class Domination, Step 2000
Class Domination, Step 2100
Class Happy, Step 500
Class Happy, Step 600
Class Happy, Step 1000
Class Happy, Step 1400
Class Happy, Step 1500
Class Happy, Step 1600
Class Happy, Step 1700
Class Happy, Step 1800
Class Happy, Step 1900
Class Happy, Step 2100
Class Happy, Step 2400
Class Neutral, Step 0
Class Neutral, Step 200
Class Neutral, Step 400
Class Neutral, Step 500




Class Neutral, Step 600
Class Neutral, Step 1000
Class Neutral, Step 1200
Class Sad, Step 300
Class Sad, Step 800
Class Scared, Step 100




Class Scared, Step 500




Class Scared, Step 700
Class Scared, Step 800
Class Scared, Step 900
Class Scared, Step 1000
Class Scared, Step 1100
Class Submission, Step 100
Class Submission, Step 200




Class Submission, Step 700
Class Submission, Step 900
Class Submission, Step 1000
Class Submission, Step 1400
Class Submission, Step 1500
Class Submission, Step 1900
Class Submission, Step 2000
Class Surprised, Step 100
Class Surprised, Step 400
Class Surprised, Step 600
Class Surprised, Step 700
Class Surprised, Step 1000
Class Surprised, Step 1100
Class Surprised, Step 1200
Class Surprised, Step 1300
Class Surprised, Step 1500
Class Surprised, Step 1600
Class Tiredness, Step 0
Small files: 2886, Lost_files: 0, Errors: 10007


In [21]:
len(ds_fft)

28387

In [33]:
ds_fft[1][1]

'Angry'

In [34]:
x_data = []
y_data = []
for i in range(len(ds_fft)):
    if ds_fft[i][0].shape == (130, 13):
        x_data.append(ds_fft[i][0])
        y_data.append(ds_fft[i][1])

In [174]:
X = np.array(x_data)
y = np.array(y_data)
y=np.array(pd.get_dummies(y))

In [175]:
X.shape, y.shape

((26167, 130, 13), (26167, 11))

In [177]:
# pd.Series(y).value_counts()

In [178]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.15,random_state=0)

In [179]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten, Bidirectional, Attention,ELU, Input
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
import tensorflow as tf
from keras.layers.recurrent import LSTM
from tensorflow.compat.v1.keras.layers import CuDNNLSTM
# from attention_keras.layers.attention import AttentionLayer
# from keras_self_attention import SeqSelfAttention
from tensorflow.keras import initializers
from tensorflow.keras import regularizers
from tensorflow.keras import constraints
from keras.layers.merge import Multiply

In [180]:
y

array([[1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 0, 1]], dtype=uint8)

### Модель

In [183]:
input_shape = (130, 13)
optimizer  = Adam()
n_classes = 11

model = Sequential()
model.add(LSTM(units=128, dropout=0.05, recurrent_dropout=0.35, return_sequences=True, input_shape=input_shape))
model.add(LSTM(units=32,  dropout=0.05, recurrent_dropout=0.35, return_sequences=False))
model.add(Dense(units=n_classes, activation="softmax"))
# model.add(Dense(n_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['acc'])

In [186]:
## Trianing my model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

num_epochs = 2
num_batch_size = 32

# checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification11.hdf5', 
#                                verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/2
Epoch 2/2
Training completed in time:  0:14:13.261720


In [187]:
num_epochs = 10
num_batch_size = 32

# checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification11.hdf5', 
#                                verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Training completed in time:  1:19:41.323812


In [190]:
# model.save('my_model_2.hdf5') Не работает сохранение модели

## Модель CNN

In [222]:
X1 = X.reshape((26167, 130, 13, 1))

In [223]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X1,y,test_size=0.15,random_state=0)

In [224]:
opt = tf.keras.optimizers.RMSprop(lr=0.0005, decay=1e-6)

In [225]:
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers
import pandas as pd
import numpy as np

model = Sequential()
model.add(Conv2D(32, (3, 3), padding='same',
                 input_shape=(130, 13, 1)))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(1, 1)))
model.add(Dropout(0.5))
model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Conv2D(128, (3, 3)))
model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(11, activation='softmax'))
model.compile(optimizer = 'rmsprop', loss="categorical_crossentropy", metrics=["accuracy"])
model.summary()

Model: "sequential_24"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_119 (Conv2D)          (None, 130, 13, 32)       320       
_________________________________________________________________
activation_125 (Activation)  (None, 130, 13, 32)       0         
_________________________________________________________________
conv2d_120 (Conv2D)          (None, 128, 11, 64)       18496     
_________________________________________________________________
activation_126 (Activation)  (None, 128, 11, 64)       0         
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 64, 5, 64)         0         
_________________________________________________________________
dropout_66 (Dropout)         (None, 64, 5, 64)         0         
_________________________________________________________________
conv2d_121 (Conv2D)          (None, 64, 5, 64)       

In [226]:
num_epochs = 10
num_batch_size = 32

# checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification11.hdf5', 
#                                verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/10
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('model',), but source function had ()
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('model',), but source function had ()
Please report this to t

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: Unable to locate the source code of <bound method Reduce.result of <keras.metrics.Mean object at 0x000001C1B0EF5E50>>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: lineno is out of bounds
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('model',), but source function had ()
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full 

In [227]:
num_epochs = 10
num_batch_size = 32

# checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification11.hdf5', 
#                                verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
 26/696 [>.............................] - ETA: 2:22 - loss: 1.6577 - accuracy: 0.3966

KeyboardInterrupt: 

## CNN + RNN

In [233]:
from keras.layers import Conv1D, MaxPooling1D

In [235]:
model = Sequential()
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu', input_shape=(130, 13)))
model.add(MaxPooling1D(pool_size=2))
model.add(LSTM(units=128, dropout=0.05, recurrent_dropout=0.35, return_sequences=True))
model.add(LSTM(units=32,  dropout=0.05, recurrent_dropout=0.35, return_sequences=False))
model.add(Dense(units=11, activation="softmax"))
# model.add(Dense(n_classes, activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

In [236]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.15,random_state=0)

In [None]:
X.shape, y.shape

In [None]:
num_epochs = 100
num_batch_size = 32

# checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification11.hdf5', 
#                                verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/100
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('model',), but source function had ()
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('model',), but source function had ()
Please report this to 