## Birds Sound Classification using Siamese Networks
<img align="center" alt="Python" width="20%" src="https://media.giphy.com/media/MYxWqhObg2EqVGkF0W/giphy.gif" />


### Import libraries

In [13]:
import os, sys, cv2, matplotlib.pyplot as plt, numpy as np, shutil, itertools, pickle, pandas as pd, seaborn as sn
from random import seed, random, randint
from scipy.spatial import distance
import random

import tensorflow as tf
from keras import backend as K
from keras.models import Model, load_model, Sequential
from keras.layers import Input, Dense, Masking, Conv1DTranspose, Conv1D, Flatten, Reshape, LSTM, TimeDistributed

from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import normalize
from mpl_toolkits.mplot3d import Axes3D

import librosa 
import librosa.display


### Read audio files

In [14]:
bird1 = []
bird2 = []

dir = os.getcwd() + "/BirdData/"

# Read all audio files from directory
for root, dirs, files in os.walk(dir, topdown=False):    
    for name in files:
        if name.find(".wav") != -1 :
            fullname = os.path.join(root, name)
            if fullname.find("Class_Chickadee") != -1 :
                bird1.append(fullname)
            else :
                bird2.append(fullname)


# Put all samples in test data
test_bird1 = bird1
test_bird2 = bird2


# Train on random 40 samples from each class
random_indices = random.sample(range(0, len(bird1)), 40)
bird1 = [bird1[x] for x in random_indices]

random_indices = random.sample(range(0, len(bird2)), 40)
bird2 = [bird2[x] for x in random_indices]

### Extract spectrograms using librosa

In [15]:
X1 = []
X2 = []
y = []

# Extract spectrograms of bird 1 (Chickadee)
for i in range(len(bird1)) :

    fname = bird1[i]
    y1, sr1 = librosa.load(fname)
    S1 = np.abs(librosa.stft(y1, n_fft=512)).T
    S1 = S1/np.amax(S1)

    X1.append(S1)
    y.append(1) # Set target label to 1


# Extract spectrograms of bird 2 (Titmouse)
for i in range(len(bird2)) :
    
    fname = bird2[i]
    y1, sr1 = librosa.load(fname)
    S1 = np.abs(librosa.stft(y1, n_fft=512)).T
    S1 = S1/np.amax(S1)

    X2.append(S1)
    y.append(0) # Set target label to 0


y = np.array(y)

### We will use siamese networks for classification. So, we will need a pair of images as input. If the pair belongs to the same class, then the target output is set to 1. Otherwise the target is set to 0. 

<img align="center" alt="Python" width="40%" src="https://miro.medium.com/max/1400/1*I7a9aVN2poHUtiHSq2q44Q.png" />



In [16]:
# Generate positive samples of chickadee
positive_chickadee = list(itertools.combinations(bird1, 2))

# Generate positive samples of titmouse
positive_titmouse = list(itertools.combinations(bird2, 2))


positive = positive_chickadee + positive_titmouse

# Generate negative samples
negative = itertools.product(bird1, bird2)
negative = list(negative)
negative = random.sample(negative, len(positive_chickadee)*2)

print("Positive pair ", len(positive))
print("Negative pair ", len(negative))


Positive pair  1560
Negative pair  1560
