In [None]:
!pip install librosa --user

In [None]:
!pip install pandas


In [None]:
!pip install tqdm

In [None]:
!pip install tensorflow --user

In [None]:
!pip install daal==2021.4.0

In [None]:
!pip install numpy==1.22

In [None]:
print(tf.test.is_gpu_available())

In [None]:
import numpy as np

In [None]:

import matplotlib.pyplot as plt
%matplotlib inline
import IPython.display as ipd
import librosa
import librosa.display
import pandas as pd
import os

In [None]:
np.__version__

In [None]:
filename = 'UrbanSound8K/dog_bark.wav'


In [None]:
plt.figure(figsize=(15,6))
librosa_data, sample_rate = librosa.load(filename)
librosa.display.waveshow(librosa_data, sr=sample_rate)
ipd.Audio(filename)

In [None]:
print(librosa_data.shape)
librosa_data

In [None]:
sample_rate

In [None]:
from scipy.io import wavfile as wav

wav_sample_rate, wav_data = wav.read(filename)
wav_data


In [None]:
wav_sample_rate

In [None]:
plt.figure(figsize=(15,6))
plt.plot(wav_data)

In [None]:
metadata = pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')

metadata

In [None]:
name = metadata.index[metadata['slice_file_name']=='34050-7-5-0.wav']
name

In [None]:
metadata.iloc[6133]

In [None]:
metadata['class'].unique()

In [None]:
metadata['class'].value_counts()

# Features extraction

In [None]:
mfccs = librosa.feature.mfcc(y=librosa_data, sr=sample_rate, n_mfcc=40)  # mfcc--> Mel-Feature Cepstrum Coefficients
mfccs.shape

In [None]:
mfccs

In [None]:
metadata

In [None]:
def feature_extractor(filename):
    audio_data, sample_rate = librosa.load(filename)
    mfccs_features = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
    
    return mfccs_scaled_features

In [None]:
audio_dataset_path = 'UrbanSound8K/audio/'

In [None]:
from tqdm import tqdm

extracted_features = []
for idx, row in tqdm(metadata.iterrows()):
    filename = os.path.join(os.path.abspath(audio_dataset_path), 'fold'+str(row['fold'])+'/'+str(row['slice_file_name']))
    class_label = row['class']
    data = feature_extractor(filename)
    extracted_features.append([data,class_label])

In [None]:
extracted_features_df = pd.DataFrame(extracted_features, columns=['features', 'class'])
extracted_features_df

In [None]:
X = np.array(extracted_features_df['features'].tolist())
y= np.array(extracted_features_df['class'].tolist())

In [None]:
X.shape

In [None]:
y.shape

In [None]:
#y = pd.get_dummies(y)

from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder



In [None]:
a = labelencoder.fit_transform(y)
a = pd.DataFrame(a)
a[0].unique()

In [None]:
labelencoder = LabelEncoder()
y = to_categorical(labelencoder.fit_transform(y))

In [None]:
y

In [None]:
total_labels = y.shape[1]

In [None]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2, random_state = 321)

In [None]:
X_train_tuner,X_val,y_train_tuner,y_val = train_test_split(X_train,y_train,test_size=0.2, random_state = 321)

In [None]:
X_train_tuner.shape

In [None]:
y_train_tuner.shape

In [None]:
y_test.shape

In [None]:
X_val.shape

In [None]:
import tensorflow as tf


In [None]:
!pip install -U keras-tuner

In [None]:
from tensorflow.keras import layers

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
import keras_tuner
from keras_tuner import RandomSearch

In [None]:
hp = keras_tuner.HyperParameters()


In [None]:
def build_model(hp):
    model = Sequential()
    model.add(layers.Flatten())
    for i in range(hp.Int("num_layers", 1, 10)):
        model.add(layers.Dense(hp.Int(f'units_{i}', min_value=32, max_value=512, step=32), activation='relu'))
        if hp.Boolean(f'dropout_{i}'):
            model.add(layers.Dropout(0.25))
            
    model.add(layers.Dense(units=total_labels, activation='softmax'))
              
    learning_rate = hp.Float('ls', min_value=0.00001, max_value=0.001, sampling='log')
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
              
    return model 

In [None]:
tuner = RandomSearch(
    hypermodel=build_model,
    objective='val_accuracy',
    max_trials=10,
    directory="F:",
    project_name="tuned_ANN_speech"
    )

In [None]:
tuner.search_space_summary()

In [None]:
tuner.search(X_train_tuner, y_train_tuner, epochs=50, validation_data=(X_val, y_val), verbose=1)

In [None]:
tuner.results_summary()

In [None]:
best_hp = tuner.get_best_hyperparameters()[0]
model = tuner.hypermodel.build(best_hp)

In [None]:
model.build(input_shape=(None, 40))
model.summary()

In [None]:
model.fit(X_train, y_train, batch_size=64, epochs=100, validation_data=(X_test, y_test))

In [None]:
# best epoch
# Epoch 94/100
# 110/110 [==================] - 1s 5ms/step - loss: 0.0183 - accuracy: 0.9930 - val_loss: 0.3895 - val_accuracy: 0.9393

In [None]:
model.save('ANN_hypertuned_model')

In [None]:
test_accuracy = model.evaluate(X_test, y_test, verbose=1)

In [None]:
"""model = Sequential()
model.add(layers.Flatten())

for i in range(0,7):
    model.add(layers.Dense(units= 280 + (i*10), activation='relu'))
    model.add(layers.Dropout(0.25))
    

model.add(layers.Dense(units=total_labels, activation='softmax'))


model.compile(optimizer=Adam(learning_rate=6.023588532722681e-05), loss='categorical_crossentropy', metrics=['accuracy'])"""

In [None]:
'''model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_test,y_test))'''

In [None]:
filenames = 'UrbanSound8K/mixkit-laser-weapon-shot-1681.wav'
audio_data = feature_extractor(filenames)
audio_data = audio_data.reshape(1,-1)
predicted_label = np.argmax(model.predict(audio_data), axis=-1)
print(predicted_label)
predicted_class = labelencoder.inverse_transform(predicted_label)
predicted_class

In [None]:
'''Trial 03 summary
Hyperparameters:
num_layers: 7
units: 352
dropout: False
ls: 6.023588532722681e-05
Score: 0.9126700162887573'''

In [None]:
'''model = Sequential([
    
    Dense(units=100, activation= 'relu', input_shape = (40,)),
    Dropout(0.5),
    Dense(units=200, activation= 'relu'),
    Dropout(0.5),
    Dense(units=100, activation= 'relu'),
    Dropout(0.5),
    Dense(units=total_labels, activation= 'softmax')
])'''

In [None]:
'''model.summary()'''

In [None]:
'''model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')'''

In [None]:
'''from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime'''

In [None]:

'''
num_epochs = 100
num_batch = 32

checkpoint = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5', verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch, epochs=num_epochs, validation_data=(X_test,y_test), callbacks=[checkpoint])

duration = datetime.now() - start
print("Total Training Time: ",duration)
'''

In [None]:
'''test_accuracy = model.evaluate(X_test, y_test, verbose=0)
test_accuracy[1]'''

In [None]:
'''filenames = 'UrbanSound8K/mixkit-laser-weapon-shot-1681.wav'
audio_data = feature_extractor(filenames)
audio_data = audio_data.reshape(1,-1)
predicted_label = np.argmax(model.predict(audio_data), axis=-1)
print(predicted_label)
predicted_class = labelencoder.inverse_transform(predicted_label)
predicted_class'''