In [57]:
from tensorflow import keras
import tensorflow as tf
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score, f1_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional
from tensorflow.keras.optimizers import SGD
from tensorflow.random import set_seed

set_seed(2024)
np.random.seed(2024)


import csv
import librosa
import librosa.display
import matplotlib.pyplot as plt
os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

train_dir = os.path.abspath('../data/train/train')
test_dir = os.path.abspath('../data/test/test')

classes = os.listdir(train_dir + '/audio')
classes.remove("_background_noise_")

X_train = np.load(train_dir + "/X_train.npy")
y_train = np.load(train_dir + "/y_train.npy")

X_val = np.load(train_dir + "/X_val.npy")
y_val = np.load(train_dir + "/y_val.npy")

X_train = X_train.reshape((-1, X_train.shape[1], X_train.shape[2]))
X_val = X_val.reshape((-1, X_val.shape[1], X_val.shape[2]))

X_test = np.load(test_dir + '/X_test.npy')
X_files = np.loadtxt(test_dir + '/X_files.txt', delimiter=" ", dtype='str')

def plot_loss(history_df, name, idx):
    plt.figure()
    plt.plot(history_df['loss'])
    plt.plot(history_df['val_loss'])
    plt.title(f'{name}: loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper right')
    path = f'train_history/{name}/{idx}_loss.png'
    plt.savefig(path)
    print(f'Loss plot is saved to: {path}')
    plt.close()


In [83]:
input_shape = (122, 85)
epochs = 100
batch_size = 32


model_lstm = Sequential()
model_lstm.add(LSTM(units=125, activation="tanh", input_shape=input_shape))
model_lstm.add(Dense(units=len(classes)))
# Compiling the model
model_lstm.compile(optimizer="RMSprop", loss="mse")

print(model_lstm.summary())

# Model training
history = model_lstm.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val))

history_df = pd.DataFrame(history.history) 
hist_csv_file = 'train_history/' + 'LSTM' + '/' + str(0) + '_history.csv'
with open(hist_csv_file, mode='w') as f:
    history_df.to_csv(f)

plot_loss(history_df, 'LSTM', str(0))

# Check validation accuracy and f1
y_pred = np.argmax(model_lstm.predict(X_val), axis=-1)
accuracy = accuracy_score(y_val, y_pred)

f1 = f1_score(y_val, y_pred, average='macro')
metrics_df = pd.DataFrame({'accuracy': [accuracy], 'f1': [f1]}) 
metrics_csv_file = 'train_history/' + 'LSTM' + '/' + str(0) + '_metrics.csv'
with open(metrics_csv_file, mode='w') as f:
    metrics_df.to_csv(f)
print(metrics_df)

# Prepare submission
submission_df = pd.DataFrame({'fname': [], 'label': []}) 
for ind, test_file in enumerate(X_files):
    y_pred = np.argmax(model_lstm.predict(np.array([X_test[ind]])), axis=-1)
    submission_df.loc[len(submission_df.index)] = [X_files[ind], classes[y_pred[0]]]
submission_csv_file = 'train_history/' + 'LSTM' + '/' + str(0) + '_submission.csv'
with open(submission_csv_file, mode='w') as f:
    submission_df.to_csv(f, index=False, lineterminator='\n')
print('Submission saved.')

Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_6 (LSTM)               (None, 125)               105500    
                                                                 
 dense_6 (Dense)             (None, 30)                3780      
                                                                 
Total params: 109,280
Trainable params: 109,280
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/2
Epoch 2/2
Loss plot is saved to: train_history/LSTM/0_loss.png
   accuracy        f1
0  0.030891  0.028384
Submission saved.


In [85]:
input_shape = (122, 85)
epochs = 100
batch_size = 32


model_gru = Sequential()
model_gru.add(GRU(units=125, activation="tanh", input_shape=input_shape))
model_gru.add(Dense(units=len(classes)))
# Compiling the model
model_gru.compile(optimizer="RMSprop", loss="mse")

print(model_gru.summary())

# Model training
history = model_gru.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_val, y_val))

history_df = pd.DataFrame(history.history) 
hist_csv_file = 'train_history/' + 'GRU' + '/' + str(0) + '_history.csv'
with open(hist_csv_file, mode='w') as f:
    history_df.to_csv(f)

plot_loss(history_df, 'GRU', str(0))

# Check validation accuracy and f1
y_pred = np.argmax(model_gru.predict(X_val), axis=-1)
accuracy = accuracy_score(y_val, y_pred)

f1 = f1_score(y_val, y_pred, average='macro')
metrics_df = pd.DataFrame({'accuracy': [accuracy], 'f1': [f1]}) 
metrics_csv_file = 'train_history/' + 'GRU' + '/' + str(0) + '_metrics.csv'
with open(metrics_csv_file, mode='w') as f:
    metrics_df.to_csv(f)
print(metrics_df)

# Prepare submission
submission_df = pd.DataFrame({'fname': [], 'label': []}) 
for ind, test_file in enumerate(X_files):
    y_pred = np.argmax(model_gru.predict(np.array([X_test[ind]])), axis=-1)
    submission_df.loc[len(submission_df.index)] = [X_files[ind], classes[y_pred[0]]]
submission_csv_file = 'train_history/' + 'GRU' + '/' + str(0) + '_submission.csv'
with open(submission_csv_file, mode='w') as f:
    submission_df.to_csv(f, index=False, lineterminator='\n')
print('Submission saved.')

Loss plot is saved to: train_history/GRU/0_loss.png
   accuracy        f1
0  0.052368  0.044722
Submission saved.
