In [None]:
# retrieve the preprocessed data from previous notebook

%store -r x_train 
%store -r x_test 
%store -r y_train 
%store -r y_test 
%store -r yy 
%store -r le

In [None]:
import numpy as np
import pandas as pd
import os
from python_speech_features import mfcc
import scipy.io.wavfile as wav
from scipy import signal
max_pad_len = 174

def extract_features(file_name):
   
    try:
        sample_rate,audio=wav.read(file_name)
        #audio, samplerate = sf.read('existing_file.wav')
        audio = audio.astype(np.float32, order='C') / 32768.00
        try:
            d = (audio[:,0] + audio[:,1]) / 2
            f = signal.resample(d, 22050)
        except:
            f = signal.resample(audio, 22050)

        #print('samplerate',sample_rate)
        #f = signal.resample(d, 22050)
        mfccs = mfcc(f, samplerate =22050, numcep=20,nfilt=26,nfft=1024, appendEnergy=False)
        print(mfccs)
        print("shape",mfccs.shape)
        print('t',np.transpose(mfccs).shape)
        pad_width = max_pad_len - mfccs.shape[1]
        #print(pad_width)
        #print('Sample_rate',sample_rate)
        #input()
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
        print(mfccs.shape)

    except Exception as e:
        print("Error encountered while parsing file: ", file_name)
        print(e)
        return None
     
    return mfccs
        

In [None]:
# Set the path to the full UrbanSound dataset 
fulldatasetpath = 'C:/Users/lglar/Documents/2021/TCC/SOM/UrbanSound8K/audio/'

metadata = pd.read_csv('C:/Users/lglar/Documents/2021/TCC/SOM/UrbanSound8K/metadata/UrbanSound8K.csv')

features =[]

for index, row in metadata.iterrows():
    
    #file_name = os.path.join(os.path.abspath(fulldatasetpath),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    file_name = fulldatasetpath +'fold'+str(row["fold"])+'/'+str(row["slice_file_name"])
    #print(file_name)
    class_label = row["class"]
    data = extract_features(file_name)
    #input()
    
    features.append([data, class_label])

# Convert into a Panda dataframe 
featuresdf = pd.DataFrame(features, columns=['feature','class'])

print('Finished feature extraction from ', len(featuresdf), ' files')
    

In [None]:

print(len(featuresdf[featuresdf.feature.isna()]))
featuresdf = featuresdf[featuresdf.feature.notna()]

In [None]:
import numpy as np 
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf['class'].tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 

num_rows = 99
num_columns = 174
num_channels = 1

x_train = x_train.reshape(x_train.shape[0], num_rows, num_columns, num_channels)
x_test = x_test.reshape(x_test.shape[0], num_rows, num_columns, num_channels)

num_labels = yy.shape[1]
filter_size = 2

In [None]:
model = Sequential()
model.add(Conv2D(filters=16,kernel_size=2,input_shape=(num_rows,num_columns,num_channels),activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
          
model.add(Conv2D(filters=32, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))

model.add(Conv2D(filters=64, kernel_size=2, activation='relu'))
model.add(MaxPooling2D(pool_size=2))
model.add(Dropout(0.2))
model.add(GlobalAveragePooling2D())

model.add(Dense(num_labels, activation='softmax'))

In [None]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

In [None]:
# Display model architecture summary 
model.summary()

# Calculate pre-training accuracy 
score = model.evaluate(x_test, y_test, verbose=1)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

In [None]:
from keras.callbacks import ModelCheckpoint 
from datetime import datetime

#num_epochs = 12
#num_batch_size = 128

num_epochs = 72
num_batch_size = 256

checkpointer = ModelCheckpoint(filepath='saved_models/weights.best.basic_cnn.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

history = model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)
#model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_test, y_test), callbacks=[checkpointer], verbose=1)



duration = datetime.now() - start
print("Training completed in time: ", duration)

In [None]:
# Avaliando o modelo
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

In [None]:
history.history.keys()

In [None]:
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
fig = plt.figure(figsize=(16,6))

print ("History keys:", (history.history.keys()))
# summarise history for training and validation set accuracy
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')

# summarise history for training and validation set loss
plt.subplot(1,2,2)
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()

In [None]:
def print_prediction(file_name):
    prediction_feature = extract_features(file_name) 
    prediction_feature = prediction_feature.reshape(1, num_rows, num_columns, num_channels)

    predicted_vector = model.predict_classes(prediction_feature)
    predicted_class = le.inverse_transform(predicted_vector) 
    print("The predicted class is:", predicted_class[0], '\n') 

    predicted_proba_vector = model.predict_proba(prediction_feature) 
    predicted_proba = predicted_proba_vector[0]
    for i in range(len(predicted_proba)): 
        category = le.inverse_transform(np.array([i]))
        print(category[0], "\t\t : ", format(predicted_proba[i], '.32f') )

In [None]:
from sklearn.metrics import confusion_matrix

y_pred = model.predict(x_test)
y_pred = np.argmax(y_pred, axis=1)
y_test_ = np.argmax(y_test, axis=1)
conf = confusion_matrix(y_test_,y_pred )
print(conf)

In [None]:
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt

df_cm = pd.DataFrame(conf,le.classes_,le.classes_)
plt.figure(figsize=(10,7))
sn.set(font_scale=1.4) # for label size
sn.heatmap(df_cm, annot=True, annot_kws={"size": 12}) # font size

In [None]:
from matplotlib import pyplot
pyplot.plot(model.history['accuracy'])pyplot.show()

In [None]:
# Classe: Musica de rua

filename = "C:/Users/lglar/Documents/2021/TCC/SOM/UrbanSound8K/audio/fold2/66996-8-1-0" 
print_prediction(filename)

In [None]:
# Classe: Cachorro

filename = "C:/Users/lglar/Documents/2021/TCC/SOM/UrbanSound8K/audio/fold2/18581-3-1-1.wav" 
print_prediction(filename)

In [None]:
from keras.models import load_model

model.save('tcc_v3.h5') 

In [None]:
import numpy 
numpy.save('classes_v3.npy', le.classes_)