In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Libraries for Classification and building Models
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, MaxPool2D, Dropout
from tensorflow.keras.utils import to_categorical 

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
import IPython.display as ipd
filepath = "/kaggle/input/oxpeckersounds/OxpeckerSoundClassification/OxpeckerSounds/29XC645943 - Red-billed Oxpecker - Buphagus erythrorynchus.wav"
ipd.Audio(filepath)

In [None]:
#Audio waveplot
import librosa
import librosa.display
data, sample_rate = librosa.load(filepath)
plt.figure(figsize=(12, 5))
librosa.display.waveshow(data, sr=sample_rate)

In [None]:
print(data.shape)
print(data)

In [None]:
# librosa sr = 22050
sample_rate

In [None]:
#scipy sr = 44100
from scipy.io import wavfile as wav
wave_sample_rate, wave_audio = wav.read(filepath)
print(wave_sample_rate)
print(wave_audio)
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 4))
plt.plot(wave_audio)

when we print the data retrieved from librosa, it can be normalized, but when we try to read an audio file using scipy, it can’t be normalized

## Check for Class Imbalance

In [None]:
num_oxpecker_sounds = 0 #To count the number of oxpecker sounds
num_traffic_sounds = 0 #To count the number of traffic sounds

for dirname, _, filenames in os.walk('/kaggle/input/oxpeckersounds/OxpeckerSoundClassification/OxpeckerSounds'):
    for filename in filenames:
        num_oxpecker_sounds += 1
num_oxpecker = 0 #To count the number of oxpecker sounds
for dirname, _, filenames in os.walk('/kaggle/input/oxpeckersounds/OxpeckerSoundClassification/traffic'):
    for filename in filenames:
        num_traffic_sounds += 1       

In [None]:
print(f'Number of Oxpecker Sounds: {num_oxpecker_sounds}')
print(f'Number of Traffic Sounds: {num_traffic_sounds}')

- The number of samples equal for both classes. Therefore, there is no class imbalance in this case

In [None]:
# Create a list of the two integers
counts = [num_oxpecker_sounds, num_traffic_sounds]
plt.figure(figsize=(10, 6))
# Create a barplot of the data
sns.barplot(x=["Oxpecker", "Traffic"], y=counts)
plt.title("Count of records in each class")
# Show the plot
plt.show()

## Data Preprocessing

### Spectogram

In [None]:
data1, sample_rate1 = librosa.load('/kaggle/input/oxpeckersounds/OxpeckerSoundClassification/OxpeckerSounds/31XC718396 - Red-billed Oxpecker - Buphagus erythrorynchus17.wav')
plt.figure(figsize=(20, 10))
D = librosa.amplitude_to_db(np.abs(librosa.stft(data1)), ref=np.max)
plt.subplot(4, 2, 1)
librosa.display.specshow(D, y_axis='linear')
plt.colorbar(format='%+2.0f dB')
plt.title('Linear-frequency power spectrogram - Oxpecker')

In [None]:
data1, sample_rate1 = librosa.load('/kaggle/input/oxpeckersounds/OxpeckerSoundClassification/traffic/sound_405.wav')
plt.figure(figsize=(20, 10))
D = librosa.amplitude_to_db(np.abs(librosa.stft(data1)), ref=np.max)
plt.subplot(4, 2, 1)
librosa.display.specshow(D, y_axis='linear')
plt.colorbar(format='%+2.0f dB')
plt.title('Linear-frequency power spectrogram - Traffic')

## **Mel-frequency cepstral coefficients (MFCCs)**

**MFCCs** – The MFCC summarizes the frequency distribution across the window size. So, it is possible to analyze both the frequency and time characteristics of the sound.

In [None]:
mfccs = librosa.feature.mfcc(y=data, sr=sample_rate, n_mfcc=40)
print(mfccs.shape)
print(mfccs)

In [None]:
print(mfccs[0].mean())
mfccs[0]

In [None]:
np.mean(mfccs.T,axis=0)

In [None]:
np.mean(mfccs.T,axis=0).shape

In [None]:
#We define a function to extract the mfcc features from the audio
def extract_mfcc_features(filename):
    #Load audio file
    data1, sample_rate1 = librosa.load(filename)
    #Extract mfcc features
    mfccs_features = librosa.feature.mfcc(y=data1, sr=sample_rate1, n_mfcc=40)
    #in order to find out scaled feature we do mean of transpose of value
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    return mfccs_scaled_features

In [None]:
#We define now extract the mfcc features from the audios we have
features = []
labels = []
data_dirs = ['OxpeckerSounds', 'traffic'] #Directories with the wav files
for i in range(0, len(data_dirs)):
    my_data_dir = data_dirs[i]
    for dirname, _, filenames in os.walk(f'/kaggle/input/oxpeckersounds/OxpeckerSoundClassification/{my_data_dir}'):
        for filename in filenames:
            my_filename = os.path.join(dirname, filename)
            mfccs_scaled_features = extract_mfcc_features(my_filename)
            features.append(mfccs_scaled_features)
            if my_data_dir == 'OxpeckerSounds':
                labels.append('Oxpecker')
            else:
                labels.append('Traffic')

In [None]:
print(f'Num Features: {len(features)}')
print(f'Num Labels: {len(labels)}')

In [None]:
x = np.asarray(features)
y = np.asarray(labels)

In [None]:
print(x.shape)
print(y.shape)

In [None]:
x[0]

In [None]:
y[0]

## Dummy Variable Creation for Y

In [None]:
y_df = pd.DataFrame({'y': y})
y_df.head()

y = pd.get_dummies(y_df)
del y['y_Traffic']
y.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()

In [None]:
y = to_categorical(labelencoder.fit_transform(y))
y[:5] #View the first 5 samples

In [None]:
print(y.shape)

In [None]:
### Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)

In [None]:
print(X_train.shape)
print(X_test.shape)

In [None]:
X_train = X_train.reshape(320, 10, 4, 1)
X_test = X_test.reshape(80, 10, 4, 1)

In [None]:
input_dim = (10, 4, 1)

## **CNN Model Training**

In [None]:
model = Sequential()

In [None]:
model.add(Conv2D(64, (3, 3), padding = "same", activation = "tanh", input_shape = input_dim))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Conv2D(128, (3, 3), padding = "same", activation = "tanh"))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.1))
model.add(Flatten())
model.add(Dense(1024, activation = "tanh"))
model.add(Dense(2, activation = "softmax"))

In [None]:
model.summary()

In [None]:
#Visualizing the Model Architecture
import pydot
tf.keras.utils.plot_model(model)

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)

In [None]:
model.compile(optimizer = optimizer, loss = 'categorical_crossentropy', metrics = ['accuracy'])


In [None]:
# We Create a ModelCheckpoint callback to save the best model based on validation accuracy
from tensorflow.keras.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint(filepath='best_model.h5', monitor='val_acc', save_best_only=True)

In [None]:
#history = model.fit(X_train, y_train, epochs = 30, batch_size = 50, validation_data = (X_test, y_test))
history = model.fit(X_train, y_train, epochs = 30, batch_size = 50, validation_data = (X_test, y_test), callbacks=[checkpoint])

In [None]:
#Plotting the loss curves
pd.DataFrame(history.history).plot(figsize = (8, 5))
plt.grid(True)
plt.gca().set_ylim(-0.2,1.2)
plt.title('Training Curves')
plt.show()

### **Evaluating Model Perfomance**

In [None]:
predictions = model.predict(X_test)
score = model.evaluate(X_test, y_test)
print(score)

In [None]:
preds = np.argmax(predictions, axis = 1)
preds

In [None]:
y1 = np.argmax(y_test, axis = 1)

In [None]:
from sklearn.metrics import accuracy_score
test_accuracy = accuracy_score(preds, y1)
print(f'Test Accuracy: {test_accuracy*100}%')

### **Making Predictions With the Model**

In [None]:
def classify_audio(filename):
    #preprocess the audio file
    audio, sample_rate = librosa.load(filename) 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    #Reshape MFCC feature to 2-D array
    mfccs_scaled_features=mfccs_scaled_features.reshape(1, 10, 4, 1)
    #predicted_label=model.predict_classes(mfccs_scaled_features)
    x_predict=model.predict(mfccs_scaled_features) 
    predicted_label=np.argmax(x_predict,axis=1)
    #print(predicted_label)
    prediction_class = labelencoder.inverse_transform(predicted_label) [0]
    if prediction_class == 'Oxpecker':
        print(f'Prediction Probability: {x_predict[0][0]}')
    else:
        print(f'Prediction Probability: {x_predict[0][1]}')
    print(f'Predicted Class: {prediction_class}')

In [None]:
classify_audio('/kaggle/input/oxpeckersounds/OxpeckerSoundClassification/OxpeckerSounds/10XC240612 - Red-billed Oxpecker - Buphagus erythrorynchus2.wav')

In [None]:
classify_audio('/kaggle/input/oxpeckersounds/OxpeckerSoundClassification/traffic/sound_403.wav')