In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/audio-cats-and-dogs/utils.py
/kaggle/input/audio-cats-and-dogs/train_test_split.csv
/kaggle/input/audio-cats-and-dogs/cats_dogs/dog_barking_102.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/cat_43.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/dog_barking_29.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/cat_47.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/dog_barking_50.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/dog_barking_83.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/cat_83.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/cat_84.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/cat_129.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/cat_74.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/cat_124.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/dog_barking_30.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/cat_89.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/cat_52.wav
/kaggle/input/audio-cats-and-dogs/cats_dogs/dog_barking_98.wav
/kaggle/input/audio-cats-a

In [2]:
import librosa
import librosa.display
import os
import pylab
from pathlib import Path
import numpy as np
!pip install split-folders
import splitfolders 
from tensorflow.keras.applications import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import  Dense, Flatten, Conv2D, MaxPooling2D
from keras.callbacks import ModelCheckpoint,EarlyStopping ,TensorBoard
from tensorflow.keras import regularizers
import tensorflow as tf
import datetime
from keras.models import load_model
import glob
import random
from PIL import Image
from IPython.display import display
from ipywidgets import widgets, HBox

Collecting split-folders
  Downloading split_folders-0.4.3-py3-none-any.whl (7.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.4.3


In [3]:
def audio_to_spectogam(path):
        y, sr = librosa.load(path, mono=True)
        mel=librosa.feature.melspectrogram(y=y, sr=sr)
        pylab.figure(figsize=(3,3))
        pylab.axis('off') 
        pylab.axes([0., 0., 1., 1.], frameon=False, xticks=[], yticks=[]) 
        log_power = librosa.power_to_db(mel, ref=np.max)
        librosa.display.specshow(log_power)
        filename=Path(path).stem
        if filename[0]=='c':
            folder_path = './data/cats'
        else: folder_path = './data/dogs'
        pylab.savefig(os.path.join(folder_path,filename) + ".jpg", bbox_inches=None, pad_inches=0)
        pylab.close()

In [4]:
if not os.path.exists("./data"):
    os.makedirs("./data")
if not os.path.exists("./data/cats"):
    os.makedirs("./data/cats")
if not os.path.exists("./data/dogs"):
    os.makedirs("./data/dogs")

for i in os.scandir('../input/audio-cats-and-dogs/cats_dogs'):
    if os.path.splitext(i)[1] == '.wav':
        audio_to_spectogam(i)

In [5]:
dogs_folder = "./data/dogs"
cats_folder = "./data/cats"
n_samples=4
cats_images=random.sample([os.path.join(cats_folder, f) for f in os.listdir(cats_folder)],n_samples)
dogs_images=random.sample([os.path.join(dogs_folder, f) for f in os.listdir(dogs_folder)],n_samples)

cat_images=[]
for i in range(len(cats_images)):
    cat_images.append(widgets.Image(value=open(cats_images[i], 'rb').read()))
hbox1 = HBox(cat_images)

dog_images=[]
for i in range(len(dogs_images)):
    dog_images.append(widgets.Image(value=open(dogs_images[i], 'rb').read()))
hbox2 = HBox(dog_images)

print("Cats")
display(hbox1)
print("Dogs")
display(hbox2)


Cats


HBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x…

Dogs


HBox(children=(Image(value=b'\xff\xd8\xff\xe0\x00\x10JFIF\x00\x01\x01\x01\x00H\x00H\x00\x00\xff\xdb\x00C\x00\x…

In [6]:
#split the combined folder into test, train, validation folders
splitfolders.ratio("./data", output="./dataset", seed=1337, ratio=(.7, .15, .15), group_prefix=None)

Copying files: 277 files [00:00, 6269.07 files/s]


In [7]:
# build the train generator to load and augment the images
train_datagen=ImageDataGenerator(rescale=1./255,
                                 horizontal_flip=False,
                                 preprocessing_function=preprocess_input)
train_generator=train_datagen.flow_from_directory("./dataset/train",
                                                  target_size=(150,150),color_mode="rgb",
                                                  batch_size=8,class_mode="categorical")

# build the validation generator 
val_datagen=ImageDataGenerator(rescale=1./255,preprocessing_function=preprocess_input)
val_generator=val_datagen.flow_from_directory("./dataset/val",
                                                  target_size=(150,150),color_mode="rgb",
                                                  batch_size=8,class_mode="categorical")

Found 193 images belonging to 2 classes.
Found 40 images belonging to 2 classes.


In [8]:
# using transfer learning on the VGG16 network with imagenet weights
base_model=VGG16(weights="imagenet",include_top=False,input_shape=(150,150,3))
#calculate the step per epoch size
step_size_train=train_generator.n//train_generator.batch_size
print("step size train:",step_size_train)
step_size_val=val_generator.n//val_generator.batch_size
print("step size test:",step_size_val)

VGG16_self=Sequential()
VGG16_self.add(base_model)
VGG16_self.add(Flatten())
VGG16_self.add(Dense(2,activation="softmax"))
# only the added layers going to train 
base_model.trainable=False
VGG16_self.compile(loss="categorical_crossentropy",optimizer="adam",metrics=["accuracy"])
# callbacks for saving the best model, early stopping and tensorboard
callbacks = [
ModelCheckpoint(str(datetime.datetime.now())+"_vgg16.h5",
                monitor='val_accuracy', verbose=1, save_best_only=True,
                save_weights_only=False, mode='auto', save_freq='epoch'),
EarlyStopping(monitor='val_accuracy', min_delta=0, patience=5, verbose=1, mode='auto')]

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
step size train: 24
step size test: 5


In [9]:
# train the model
VGG16_self.fit(train_generator,
                    steps_per_epoch=step_size_train,
                    epochs=10,
                    validation_data=val_generator,
                    validation_steps=step_size_val,
                     callbacks=callbacks)

Epoch 1/10

Epoch 00001: val_accuracy improved from -inf to 0.80000, saving model to 2021-02-25 11:12:46.399438_vgg16.h5
Epoch 2/10

Epoch 00002: val_accuracy improved from 0.80000 to 0.82500, saving model to 2021-02-25 11:12:46.399438_vgg16.h5
Epoch 3/10

Epoch 00003: val_accuracy improved from 0.82500 to 0.85000, saving model to 2021-02-25 11:12:46.399438_vgg16.h5
Epoch 4/10

Epoch 00004: val_accuracy did not improve from 0.85000
Epoch 5/10

Epoch 00005: val_accuracy did not improve from 0.85000
Epoch 6/10

Epoch 00006: val_accuracy did not improve from 0.85000
Epoch 7/10

Epoch 00007: val_accuracy did not improve from 0.85000
Epoch 8/10

Epoch 00008: val_accuracy did not improve from 0.85000
Epoch 00008: early stopping


<tensorflow.python.keras.callbacks.History at 0x7fd49532e350>

In [10]:
# build the the test generator
test_datagen=ImageDataGenerator(rescale=1./255,preprocessing_function=preprocess_input)
test_generator=test_datagen.flow_from_directory("./dataset/test",
                                                  target_size=(150,150),color_mode="rgb",
                                                  batch_size=1,class_mode="categorical")
best_acc=0
best_loss=100
# load the best model
models = (glob.glob('./*.h5'))
for a_model in models:
          model = load_model(a_model)
          # evaluate the model on a unseen data
          scoreSeg = model.evaluate(test_generator)
          if scoreSeg[1] > best_acc and scoreSeg[0]<best_loss:
                best_acc = scoreSeg[1]
                best_loss = scoreSeg[0]
                best_model = a_model
                
print('best loss:',best_loss)
print('best accuracy:',best_acc)                

Found 44 images belonging to 2 classes.
best loss: 0.181899294257164
best accuracy: 0.9318181872367859


In [11]:
print(best_model)

./2021-02-25 11:12:46.399438_vgg16.h5
