In [1]:
import numpy as np
import pandas as pd
import librosa
import os

In [2]:
trn_dl = pd.read_csv('train.csv')
trn_dl['Audio'] = '/Competition/train/train_new/train_' + trn_dl['ID'].astype(str) + '.wav'
trn_dl = trn_dl[['Audio', 'Label']]

In [3]:
max_pad_len = 150
def extract_features(file_name):
    try:
        audio, sample_rate = librosa.load(file_name, sr = 8000)
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    except:
        return None
    
    return mfccs

In [4]:
path = '/Competition/train/train_new/train_'
features = []
for index, row in trn_dl.iterrows():
    file_name = os.path.join('..'+ str(row["Audio"]))
    class_labels = row["Label"]
    data = extract_features(file_name)
    features.append([data, class_labels])
featuresdf = pd.DataFrame(features, columns=['feature','label'])

In [5]:
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.label.tolist())
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

In [6]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

In [7]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_rows = 40
num_columns = 150
num_channels = 1

x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

num_labels = yy.shape[1]
filter_size = 2

model = Sequential()
model.add(Conv2D(filters=16, kernel_size=2, input_shape=(num_rows, num_columns, num_channels), activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax')) 

In [8]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam') 

In [9]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime

num_epochs = 100
num_batch_size = 100

checkpointer = ModelCheckpoint(filepath='D:/anaconda3/5420 Assignments/Competition', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)

duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/100
Epoch 00001: val_loss improved from inf to 0.26025, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 2/100
Epoch 00002: val_loss improved from 0.26025 to 0.16254, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 3/100
Epoch 00003: val_loss improved from 0.16254 to 0.12803, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 4/100
Epoch 00004: val_loss improved from 0.12803 to 0.10001, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 5/100
Epoch 00005: val_loss improved from 0.10001 to 0.08230, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/

Epoch 00022: val_loss improved from 0.03434 to 0.03381, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 23/100
Epoch 00023: val_loss improved from 0.03381 to 0.03118, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 24/100
Epoch 00024: val_loss improved from 0.03118 to 0.02998, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 25/100
Epoch 00025: val_loss did not improve from 0.02998
Epoch 26/100
Epoch 00026: val_loss improved from 0.02998 to 0.02815, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 27/100
Epoch 00027: val_loss did not improve from 0.02815
Epoch 28/100
Epoch 00028: val_loss did no

Epoch 47/100
Epoch 00047: val_loss improved from 0.02044 to 0.01989, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 48/100
Epoch 00048: val_loss improved from 0.01989 to 0.01987, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 49/100
Epoch 00049: val_loss did not improve from 0.01987
Epoch 50/100
Epoch 00050: val_loss did not improve from 0.01987
Epoch 51/100
Epoch 00051: val_loss did not improve from 0.01987
Epoch 52/100
Epoch 00052: val_loss did not improve from 0.01987
Epoch 53/100
Epoch 00053: val_loss did not improve from 0.01987
Epoch 54/100
Epoch 00054: val_loss did not improve from 0.01987
Epoch 55/100
Epoch 00055: val_loss did not improve from 0.01987
Epoch 56/100
Epoch 00056: val_loss did not improve from 0.01987
Epoch 57/100
Epoch 00057: val_loss did not improve from 0.01987
Ep

Epoch 00073: val_loss improved from 0.01630 to 0.01513, saving model to D:/anaconda3/5420 Assignments\Competition
INFO:tensorflow:Assets written to: D:/anaconda3/5420 Assignments\Competition\assets
Epoch 74/100
Epoch 00074: val_loss did not improve from 0.01513
Epoch 75/100
Epoch 00075: val_loss did not improve from 0.01513
Epoch 76/100
Epoch 00076: val_loss did not improve from 0.01513
Epoch 77/100
Epoch 00077: val_loss did not improve from 0.01513
Epoch 78/100
Epoch 00078: val_loss did not improve from 0.01513
Epoch 79/100
Epoch 00079: val_loss did not improve from 0.01513
Epoch 80/100
Epoch 00080: val_loss did not improve from 0.01513
Epoch 81/100
Epoch 00081: val_loss did not improve from 0.01513
Epoch 82/100
Epoch 00082: val_loss did not improve from 0.01513
Epoch 83/100
Epoch 00083: val_loss did not improve from 0.01513
Epoch 84/100
Epoch 00084: val_loss did not improve from 0.01513
Epoch 85/100
Epoch 00085: val_loss did not improve from 0.01513
Epoch 86/100
Epoch 00086: val_loss

Training completed in time:  6:46:49.521968


In [10]:
res = np.zeros((24750,2))
for i in range(24750):
    file_name = "D:/anaconda3/5420 Assignments/Competition/test/test_new/test_" + str(i) + ".wav"
    prediction_feature = extract_features(file_name) 
    prediction_feature = prediction_feature.reshape(1, num_rows, num_columns, num_channels)
    predicted_vector = model.predict(prediction_feature)
    classes_x=np.argmax(predicted_vector,axis=1)
    predicted_class = le.inverse_transform(classes_x)
    res[i,0] = i
    res[i,1] = predicted_class[0]

In [11]:
np.savetxt("sub_2_ver2.csv", res, delimiter = ",")