In [8]:
pip install --upgrade protobuf

Collecting protobuf
  Using cached protobuf-4.22.1-cp39-cp39-win_amd64.whl (420 kB)
Installing collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 3.19.0
    Uninstalling protobuf-3.19.0:
      Successfully uninstalled protobuf-3.19.0
Successfully installed protobuf-4.22.1
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
mediapipe 0.8.11 requires protobuf<4,>=3.11, but you have protobuf 4.22.1 which is incompatible.
tensorflow-gpu 2.5.0 requires absl-py~=0.10, but you have absl-py 1.4.0 which is incompatible.
tensorflow-gpu 2.5.0 requires flatbuffers~=1.12.0, but you have flatbuffers 23.3.3 which is incompatible.
tensorflow-gpu 2.5.0 requires grpcio~=1.34.0, but you have grpcio 1.53.0 which is incompatible.
tensorflow-gpu 2.5.0 requires numpy~=1.19.2, but you have numpy 1.23.5 which is incompatible.
tensorflow-gpu 2.5.0 requires six~=1.15.0, but you have six 1.16.0 which is incompatible.
tensorflow-gpu 2.5.0 requires tensorflow-estimator<2.6.0,>=2.5.0rc0, but you have tensorflow-estimator 2.12.0 which is incompatible.
tensorflow-gpu 2.5.0 requires typing-extensions~=3.7.4, but you have typing-extensions 4.4.0 which is incompatible

In [9]:
import os
from keras.models import Sequential,Model
from keras.layers import Conv2D,MaxPool2D,GlobalMaxPool2D,Flatten,Dense,Dropout,Input,Lambda,BatchNormalization
from keras.callbacks import ModelCheckpoint,EarlyStopping, ReduceLROnPlateau
import keras.backend as K
import librosa
import numpy as np
import random
import string
import matplotlib.pyplot as plt
import librosa.display
from sklearn.utils import shuffle
import cv2

In [10]:
#convert song to mel spectogram as siamese network doesn't work on sound directly
def create_spectrogram(clip,sample_rate,save_path):
    plt.interactive(False)
    fig=plt.figure(figsize=[0.72,0.72])
    ax=fig.add_subplot(111)
    ax.axes.get_xaxis().set_visible(False)
    ax.axes.get_yaxis().set_visible(False)
    ax.set_frame_on(False)
    S=librosa.feature.melspectrogram(y=clip,sr=sample_rate)
    librosa.display.specshow(librosa.power_to_db(S,ref=np.max))
    fig.savefig(save_path,dpi=400,bbox_inches='tight',pad_inches=0)
    plt.close()
    fig.clf()
    plt.close(fig)
    plt.close('all')
    del save_path,clip,sample_rate,fig,ax,S

In [11]:
#encoder of siamese network
# def get_encoder(input_size):
#     #convolutional neural network layers
#     model=Sequential()
#     model.add(Conv2D(32,(3,3),input_shape=(150,150,3),activation='relu'))
#     model.add(BatchNormalization())
#     model.add(Dropout(0.5))
#     model.add(Conv2D(64,(3,3),activation='relu'))
#     model.add(MaxPool2D(2,2))
#     model.add(Dropout(0.5))

#     model.add(Conv2D(64,(3,3),activation='relu'))
#     model.add(BatchNormalization())
#     model.add(Dropout(0.5))
#     model.add(Conv2D(64,(3,3),activation='relu'))
#     model.add(MaxPool2D(2,2))
#     model.add(Dropout(0.5))


#     model.add(GlobalMaxPool2D())

#     return model

In [12]:
from keras.applications import VGG16
def get_encoder(input_size):
    # Use VGG16 as the encoder
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_size)
    for layer in base_model.layers:
        layer.trainable = False
    x = base_model.output
    x = GlobalMaxPool2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    encoder = Model(inputs=base_model.input, outputs=x)
    return encoder

In [13]:
def get_siamese_network(encoder,input_size):
    
    #define tensors of 2 input
    input1=Input(input_size)
    input2=Input(input_size)

    #generate encoding i.e (feature vector) of the 2 imgs
    encoder_l=encoder(input1)
    encoder_r=encoder(input2)
    
    #add customized layer to compute absolute diff between encoding
    L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([encoder_l, encoder_r])
        
    # Add a dense layer with a sigmoid unit to generate the similarity score
    output=Dense(1,activation='sigmoid')(L1_distance)
    
    #connect inputs with output
    siam_model=Model(inputs=[input1,input2],outputs=output)
    return siam_model

def accuracy(y_true, y_pred):
    """
    Custom metric function to calculate accuracy.
    """
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    false_negatives = K.sum(K.round(K.clip(y_true * (1-y_pred), 0, 1)))
    true_negatives = K.sum(K.round(K.clip((1-y_true) * (1-y_pred), 0, 1)))
    false_positives = K.sum(K.round(K.clip((1-y_true) * y_pred, 0, 1)))

    accuracy = (true_positives + true_negatives) / (true_positives + true_negatives + false_positives + false_negatives + K.epsilon())
    return accuracy

encoder=get_encoder((150,150,3))
siamese_net=get_siamese_network(encoder,(150,150,3))
siamese_net.compile(loss='binary_crossentropy',optimizer='adam',metrics=[accuracy])

In [14]:
from keras.preprocessing.image import ImageDataGenerator
# Use data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)

In [15]:
#tar= target var
def different_label_index(X):
    idx1=0
    idx2=0
    while idx1==idx2:
        idx1=np.random.randint(0,len(X))
        idx2=np.random.randint(0,len(X))
    return idx1,idx2

def load_img(path):
    img=cv2.imread(path)
    img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    img=cv2.resize(img,(150,150))
    return img


def batch_generator(X,batch_size):
    while True:
        data=[np.zeros((batch_size,150,150,3)) for i in range(2)]
        tar=[np.zeros(batch_size,)]

        #Generating same pairs.
        for i in range(0,batch_size//2):
            idx1=np.random.randint(0,len(X))
            img1=load_img(X[idx1])
            img1=img1/255

            data[0][i,:,:,:]=img1
            data[1][i,:,:,:]=img1
            tar[0][i]=1

        #Generating different pairs.
        for k in range(batch_size//2,batch_size):
            idx1,idx2=different_label_index(X)
            img1=load_img(X[idx1])
            img1=img1/255
            img2=load_img(X[idx2])
            img2=img2/255

            data[0][k,:,:,:]=img1
            data[1][k,:,:,:]=img2
            tar[0][k]=0
        np.delete(data[0],np.where(~data[0].any(axis=1))[0], axis=0) #Remove the data points in case they have zero value.
        np.delete(data[1],np.where(~data[1].any(axis=1))[0], axis=0) 
        yield data,tar

In [16]:
import scipy
# Lists all the files in the folder.
songs_list = [f for f in os.listdir('C:/Users/nisar/sem6 project/Siamese Network/seismese_net_songs') if not f.startswith('.')]



counter = 1
def get_spec_name(song_name):
    global counter
    spec_name = f"{song_name}_{counter}.png"
    counter += 1
    return spec_name

for song in songs_list:
    print(song)
    songfile, sr = librosa.load('C:/Users/nisar/sem6 project/Siamese Network/seismese_net_songs/'+song)
    duration = librosa.get_duration(y=songfile, sr=sr)
    # Apply pre-emphasis filter
    preemphasis_coeff = 0.07
    preemphasis_filter = np.array([1, -preemphasis_coeff])
    songfile = scipy.signal.lfilter(preemphasis_filter, [1], songfile.ravel())
    song_name = os.path.splitext(song)[0]
    prev = 0
    for i in range(1, int((duration // 10) + 1)):
        if i == int((duration // 10)):
            """Since we are dividing the song in 10s segment there might be case that after taking 10
            fragments also few more seconds are left so in this case extra becomes extra=extra+(10-extra) 
            from the previous segment."""
            extra = int((int(duration) / 10 - int(int(duration) / 10)) * 10) 
            st = (sr * i * 10) - (10 - extra)
            end = st + 10
            songfrag = np.copy(songfile[st:end])
        else:
            songfrag = np.copy(songfile[prev:(sr * i * 10)])
        
            specname = get_spec_name(song_name)
            create_spectrogram(songfrag, sr, 'C:/Users/nisar/sem6 project/Siamese Network/test_spect/' + specname)
        
        prev = sr * i * 10



#Eminem, KXNG Crooked, Royce Da 5_9##, Joell Ortiz# - #I Will (feat. KXNG Crooked, Royce Da 5_9## _ Joell Ortiz)#.mp3
#Eminem, Royce Da 5_9##, Black Thought, Q-Tip, Denaun# - #Yah Yah (feat. Royce Da 5_9##, Black Thought, Q-Tip _ Denaun)#.mp3
#Eminem, Royce Da 5_9##, White Gold# - #You Gon’ Learn (feat. Royce Da 5_9## _ White Gold)#.mp3
A.R. Rahman,Arijit Singh - Enna Sona.mp3
Aditi Singh Sharma,Amitabh Bhattacharya - Offo.mp3
Amit Trivedi - Naina Da Kya Kasoor.mp3
Amit Trivedi - Namo Namo.mp3
Anuv Jain - Alag Aasmaan.mp3
AP Dhillon,Gurinder Gill,Intense - Excuses.mp3
Eminem, Juice WRLD - Godzilla (feat. Juice WRLD).mp3
Imagine Dragons - Bones.mp3
Indila - Tourner Dans Le Vide.mp3
Mohit Chauhan - Masakali.mp3
Sanam - Gulabi Aankhen.mp3
Shaan,KK - Dus Bahane.mp3
Shankar-Ehsaan-Loy,Shankar Mahadevan - Aaj Kal Zindagi.mp3
Sohail Sen,Benny Dayal,Aditi Singh Sharma,Irshad Kamil - Choomantar.mp3
Sohail Sen,Rahat Fateh Ali Khan,Irshad Kamil - Isq Risk.mp3
Sonu Nigam,Jayesh Gandhi,Amrita Kak -

In [18]:
batch_size=10
specfilelist=os.listdir('C:/Users/nisar/sem6 project/test_spect/')
specfilelist=['C:/Users/nisar/sem6 project/test_spect/'+filename for filename in specfilelist]
specfilelist=shuffle(specfilelist)

X_train=specfilelist[0:int(0.80*len(specfilelist))]
X_test=specfilelist[int(0.80*len(specfilelist)):]
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, min_lr=1e-6)
es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10, min_delta=0.0001) 
mc = ModelCheckpoint('embdmodel_1.hdf5', monitor='val_loss', verbose=1, save_best_only=True, mode='min')
history=siamese_net.fit_generator(batch_generator(X_train,batch_size),steps_per_epoch=len(X_train)//batch_size,epochs=80,validation_data=batch_generator(X_test,batch_size),
 validation_steps=len(X_test)//batch_size,callbacks=[es,mc],shuffle=True)

Epoch 1/80


  history=siamese_net.fit_generator(batch_generator(X_train,batch_size),steps_per_epoch=len(X_train)//batch_size,epochs=80,validation_data=batch_generator(X_test,batch_size),


Epoch 1: val_loss improved from inf to 0.34454, saving model to embdmodel_1.hdf5
Epoch 2/80
Epoch 2: val_loss improved from 0.34454 to 0.33845, saving model to embdmodel_1.hdf5
Epoch 3/80
Epoch 3: val_loss did not improve from 0.33845
Epoch 4/80

KeyboardInterrupt: 