# Audio Classification Data Preprocessing

In [None]:
### Let's read a sample audio using librosa
import librosa
audio_file_path='UrbanSound8K/100263-2-0-3.wav'
librosa_audio_data,librosa_sample_rate=librosa.load(audio_file_path)

In [None]:
print(librosa_audio_data)

In [None]:
### Lets plot the librosa audio data
import matplotlib.pyplot as plt
# Original audio with 1 channel 
plt.figure(figsize=(12, 4))
plt.plot(librosa_audio_data)

# Observation
Here Librosa converts the signal to mono, meaning the channel will alays be 1

In [None]:
### Lets read with scipy
from scipy.io import wavfile as wav
wave_sample_rate, wave_audio = wav.read(audio_file_path) 

In [None]:
wave_audio

In [None]:
import matplotlib.pyplot as plt

# Original audio with 2 channels 
plt.figure(figsize=(12, 4))
plt.plot(wave_audio)

# Extract Features
Here we will be using Mel-Frequency Cepstral Coefficients(MFCC) from the audio samples. The MFCC summarises the frequency distribution across the window size, so it is possible to analyse both the frequency and time characteristics of the sound. These audio representations will allow us to identify features for classification.

In [None]:
# .)The blw is used to extract the features of 1 audio file i audio.
mfccs = librosa.feature.mfcc(y=librosa_audio_data,sr=librosa_sample_rate,n_mfcc=40)
print(mfccs.shape)

In [None]:
mfccs

In [None]:
#### Extracting MFCC's For every audio file
import pandas as pd
import os
import librosa

audio_dataset_path='UrbanSound8K/audio/'
metadata=pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')
metadata.head()

In [None]:
# making fun to  extract features of all 8000 audios in folder.
def features_extractor(file):
    audio,sample_rate = librosa.load(file_name,res_type='kaiser_fast')
    mfccs_features = librosa.features.mfcc(y=audio,sr=sample_rate,n_mffc=40)
#     .)the blw will return the scales features of each audio.
    mfccs_scaled_features = np.mean(mfccs.T,axis=0)

In [None]:
# .)Now i will iterate to all audios to aextract the featured fom audio by 
# iterating the rows of dataframe.
import numpy as np
from tqdm import tqdm
### Now we iterate through every audio file and extract features 
### using Mel-Frequency Cepstral Coefficients
extracted_features=[]
for index_num,row in tqdm(metadata.iterrows()):
    file_name= os.path.join(os.path.abspath(audio_dataset_path),'fold'+str(row['fold']),
                           '/',str(row['slice_file_name']))
    final_class_labels = row['class']
    data = features_extractor(file_name)
    extracted_features.append([data,final_class_labels])
    

In [None]:
### converting extracted_features to Pandas dataframe
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
extracted_features_df.head()

In [None]:
### Split the dataset into independent and dependent dataset
X=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['class'].tolist())

In [None]:
### Label Encoding
###y=np.array(pd.get_dummies(y))
### Label Encoder
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
# .The LabelEncoder from scikit-learn is used to convert the categorical labels in y
# into numerical labels. For example, if you have classes 'cat', 'dog', and 'bird',
# the label encoder will map them to integers, such as 0, 1, and 2.
labelencoder=LabelEncoder()

# The to_categorical function from Keras is then used to convert the numerical
# labels obtained from the label encoder into one-hot encoded vectors.
y=to_categorical(labelencoder.fit_transform(y))

In [None]:
### Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [None]:
X_train

In [None]:
y

In [None]:
X_train.shape

In [None]:
X_test.shape

In [None]:
y_train.shape

In [None]:
y_test.shape

# Model Creation

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

In [None]:
 ### No of classes
num_labels=y.shape[1]

In [None]:
model=Sequential()
###first layer
model.add(Dense(100,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###second layer
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###third layer
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))

###final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [None]:
model.summary()

In [None]:
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')

In [None]:
## Trianing my model

# . The ModelCheckpoint callback is a useful tool that allows you to save the model's
# weights during training, typically based on a certain criterion such as the 
# validation accuracy.

from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

num_epochs = 100
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs,
          validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

In [None]:
test_accuracy=model.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

In [None]:
filename="UrbanSound8K/dog_bark.wav"
prediction_feature=features_extractor(filename)

# .)the blw (1,-1) This reshaping is common when you want to convert a feature vector
# into a format that can be fed into a machine learning model. Many machine learning 
# models, especially those from libraries like scikit-learn or Keras, expect input data
# to be in a certain shape. Reshaping is also commonly used when you want to flatten 
# an array or when dealing with images represented as multi-dimensional arrays.

prediction_feature=prediction_feature.reshape(1,-1)
model.predict_classes(prediction_feature)

In [None]:
metadata['class'].unique()

# Testing Some Test Audio Data
Steps

Preprocess the new audio data


predict the classes


Invere transform your Predicted Label

In [None]:
filename="UrbanSound8K/drilling_1.wav"
audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

print(mfccs_scaled_features)
mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)
print(mfccs_scaled_features)
print(mfccs_scaled_features.shape)
predicted_label=model.predict_classes(mfccs_scaled_features)
print(predicted_label)
prediction_class = labelencoder.inverse_transform(predicted_label) 
prediction_class