In [5]:
%cd /content/drive/MyDrive

/content/drive/MyDrive


In [6]:
!git clone https://github.com/Amahseyn/Cat_Dog_Detection


Cloning into 'Cat_Dog_Detection'...
remote: Enumerating objects: 834, done.[K
remote: Counting objects: 100% (640/640), done.[K
remote: Compressing objects: 100% (633/633), done.[K
remote: Total 834 (delta 1), reused 640 (delta 1), pack-reused 194 (from 1)[K
Receiving objects: 100% (834/834), 56.04 MiB | 14.15 MiB/s, done.
Resolving deltas: 100% (19/19), done.
Updating files: 100% (453/453), done.
Filtering content: 100% (449/449), 160.28 MiB | 9.32 MiB/s, done.
fatal: cannot exec '/content/drive/MyDrive/Cat_Dog_Detection/.git/hooks/post-checkout': Permission denied


In [9]:
import os
import numpy as np
import librosa
from skimage.transform import resize
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout, BatchNormalization, Input,MaxPool2D,Conv2D
from tensorflow.keras.applications import VGG16
from sklearn.model_selection import train_test_split
import tensorflow as tf
def time_stretch(audio, rate=1.1):
    return librosa.effects.time_stretch(y=audio, rate=rate)

def pitch_shift(audio, sr, n_steps=2):
    return librosa.effects.pitch_shift(y=audio, sr=sr, n_steps=n_steps)

def add_noise(audio, noise_factor=0.005):
    noise = np.random.randn(len(audio))
    augmented_audio = audio + noise_factor * noise
    return augmented_audio

def normalize(feature):
    return (feature - np.mean(feature)) / np.std(feature)

def load_data(main_directory,files,augment=False,size = 64):
    data = []
    labels = []

    for i,folder in sorted(enumerate(os.listdir(main_directory))):
        folder_path = os.path.join(main_directory, folder)
        if os.path.isdir(folder_path):


            for file in os.listdir(folder_path):
                if file.endswith(".wav") and file in files:
                    file_path = os.path.join(folder_path, file)
                    audio, sr = librosa.load(file_path, sr=None)

                    if augment:
                        augmentation_choice = np.random.random()
                        if augmentation_choice > 0.5:
                            audio = time_stretch(audio, rate=np.random.uniform(0.8, 1.2))
                        #augmentation_choice = np.random.random()
                        # if augmentation_choice > 0.5:
                        #     audio = pitch_shift(audio, sr, n_steps=np.random.randint(-3, 3))
                        augmentation_choice = np.random.random()
                        if augmentation_choice > 0.5:
                            audio = add_noise(audio, noise_factor=np.random.uniform(0.001, 0.01))

                    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
                    mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128)
                    spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr)

                    # mfccs = normalize(mfccs)
                    # mel_spectrogram = normalize(mel_spectrogram)
                    # spectral_contrast = normalize(spectral_contrast)

                    mfccs = resize(np.expand_dims(mfccs, axis=-1), (size, size))
                    mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), (size, size))
                    spectral_contrast = resize(np.expand_dims(spectral_contrast, axis=-1), (size, size))

                    combined_features = np.concatenate([mel_spectrogram,mfccs,spectral_contrast], axis=-1)

                    data.append(combined_features)
                    labels.append(i)

    return np.array(data), np.array(labels)


main_directory = "/content/drive/MyDrive/Cat_Dog_Detection/mydata"
files = os.listdir("/content/drive/MyDrive/Cat_Dog_Detection/mydata/cat")
files.extend(os.listdir("/content/drive/MyDrive/Cat_Dog_Detection/mydata/other"))
train_sample,test_sample= train_test_split(files, test_size=0.2)
train_sample,val_sample= train_test_split(train_sample, test_size=0.25)
print(len(train_sample),len(test_sample),len(val_sample))
x_train,y_train = load_data(main_directory,train_sample,augment=True)
x_val,y_val = load_data(main_directory,val_sample,augment=False)
x_test,y_test = load_data(main_directory,test_sample,augment=False)
y_train = to_categorical(y_train, num_classes=2)
y_val = to_categorical(y_val, num_classes=2)
y_test = to_categorical(y_test, num_classes=2)

input_shape = x_train[0].shape
input_layer = Input(shape=input_shape)
x = Conv2D (filters =16, kernel_size =3, padding ='same', activation='relu')(input_layer)
x = BatchNormalization()(x)
x = Conv2D (filters =32, kernel_size =3, padding ='same', activation='relu')(x)
x = BatchNormalization()(x)

# x = Conv2D (filters =128, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =128, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# # 3rd Conv block

# x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =256, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# #4th Conv block

# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)

# # 5th Conv block

# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = Conv2D (filters =512, kernel_size =3, padding ='same', activation='relu')(x)
# x = MaxPool2D(pool_size =2, strides =2, padding ='same')(x)
# #Fully connected layers

x = Flatten()(x)
x = Dropout(0.5)(x)
x = Dense(units = 32, activation ='relu')(x)
# x = Dense(units = 256, activation ='relu')(x)
output = Dense(units = 2, activation ='softmax')(x)
model = Model (inputs=input_layer, outputs =output)



model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(x_train, y_train, epochs=250, batch_size=16, validation_data=(x_val, y_val))

test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test accuracy: {test_acc:.4f}")


269 90 90
Epoch 1/250
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 204ms/step - accuracy: 0.7071 - loss: 3.1622 - val_accuracy: 0.8889 - val_loss: 0.2981
Epoch 2/250
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 190ms/step - accuracy: 0.9345 - loss: 0.6068 - val_accuracy: 0.8222 - val_loss: 0.3548
Epoch 3/250
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 243ms/step - accuracy: 0.9800 - loss: 0.0632 - val_accuracy: 0.7778 - val_loss: 0.4638
Epoch 4/250
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 192ms/step - accuracy: 0.9896 - loss: 0.0172 - val_accuracy: 0.8000 - val_loss: 0.4893
Epoch 5/250
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 184ms/step - accuracy: 0.9931 - loss: 0.0155 - val_accuracy: 0.7889 - val_loss: 0.5856
Epoch 6/250
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 320ms/step - accuracy: 1.0000 - loss: 8.1575e-04 - val_accuracy: 0.8556 - val_loss: 0.5139
Epoch 7/