In [2]:
from tensorflow import keras
  
# load the VGG16 network *pre-trained* on the ImageNet dataset

base_model = keras.applications.VGG16(weights="imagenet",
                   input_shape=(224, 224, 3),
                  include_top=False)

In [3]:
base_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [4]:
# freeze
base_model.trainable = False

In [5]:
inputs = keras.Input(shape=(224, 224, 3))

# Separately from setting trainable on the model, we set training to False 

x = base_model(inputs, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)

# A Dense classifier with 8 unites (categorical)
outputs = keras.layers.Dense(8)(x)
model = keras.Model(inputs, outputs)

In [6]:
model.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_3 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
vgg16 (Model)                (None, 7, 7, 512)         14714688  
_________________________________________________________________
global_average_pooling2d (Gl (None, 512)               0         
_________________________________________________________________
dense (Dense)                (None, 8)                 4104      
Total params: 14,718,792
Trainable params: 4,104
Non-trainable params: 14,714,688
_________________________________________________________________


In [7]:
# Important to use categorical crossentropy and categorical accuracy as we now have a categorical classification problem
# Compile the model

model.compile(loss=keras.losses.CategoricalCrossentropy(from_logits = True), metrics=[keras.metrics.CategoricalAccuracy()])

In [8]:
import os
import shutil

# Find the hidden file

os.listdir("dataset/train/")
os.listdir("dataset/valid/")

# Remove it

shutil.rmtree("dataset/train/.ipynb_checkpoints")
shutil.rmtree("dataset/valid/.ipynb_checkpoints")

FileNotFoundError: [Errno 2] No such file or directory: 'dataset/train/.ipynb_checkpoints'

In [10]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# create a data generator

datagen = ImageDataGenerator(
        rescale = 1./224 ,
        samplewise_center=True,  # set each sample mean to 0
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False) # we don't expect Bo to be upside-down so we will not flip vertically

In [11]:
# load and iterate training dataset

train_ds = datagen.flow_from_directory('dataset/train/', 
                                       target_size=(224, 224), 
                                       color_mode='rgb', 
                                       class_mode='categorical',
                                       batch_size=32)
# load and iterate validation dataset

valid_ds = datagen.flow_from_directory('dataset/valid/', 
                                      target_size=(224, 224), 
                                      color_mode='rgb', 
                                      class_mode='categorical',
                                      batch_size=32)

Found 9833 images belonging to 8 classes.
Found 4195 images belonging to 8 classes.


In [None]:
# Train the model

model.fit(train_ds , steps_per_epoch=12 , batch_size = 32 , validation_data = valid_ds , validation_steps=8 , epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30

In [None]:
# Unfreeze the base model

base_model.trainable = True

# Re-Compile model

model.compile( optimizer=keras.optimizers.SGD(learning_rate = 0.002),  # learning rate
              loss=keras.losses.CategoricalCrossentropy(from_logits = True),
              metrics=[keras.metrics.CategoricalAccuracy()])
# Train again

# Vaiualize accuracy/loss history

history = model.fit(
    train_ds, steps_per_epoch = 10 , batch_size = 32 , validation_data=valid_ds, validation_steps=4, epochs=30)

In [None]:
# Validate model as a part of fitting process

import matplotlib.pyplot as plt
    
validation_data=(valid_ds)

# Visualize loss history
# Get training and test loss histories
    
training_acc = history.history["categorical_accuracy"]
test_acc = history.history['val_categorical_accuracy']

# Create count of the number of epochs

epoch_count = range(1, len(training_acc) + 1)

# Visualize loss history

plt.plot(epoch_count, training_acc, 'r--')
plt.plot(epoch_count, test_acc, 'b-')
plt.legend(['Training acc', 'Valid acc'])
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.grid(True)
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg

def show_image(image_path):
    image = mpimg.imread(image_path)
    plt.imshow(image)    

show_image('download.jpeg')

from tensorflow.keras.preprocessing import image as image_utils
from tensorflow.keras.applications.vgg16 import preprocess_input

def load_and_process_image(image_path):
    # Print image's original shape, for reference
    print('Original image shape: ', mpimg.imread(image_path).shape)
    
    # Load in the image with a target size of 224, 224
    
    image = image_utils.load_img(image_path, target_size=(224, 224))
    
    # Convert the image from a PIL format to a numpy array
    image = image_utils.img_to_array(image)
    # Add a dimension for number of images, in our case 1
    image = image.reshape(1,224,224,3)
    
    # Preprocess image to align with original ImageNet dataset
    
    image = preprocess_input(image)
    # Print image's shape after processing
    print('Processed image shape: ', image.shape)
    return image

import numpy as np
from IPython.display import Audio

def voice(image_path):
    show_image(image_path)
    image = load_and_process_image(image_path)
    return model.predict(image)

a = voice('download.jpeg')

if 151 <= np.argmax(a) <= 268:
    print("Dog!")
    Audio('sound/dog.mp3')
elif 281 <= np.argmax(a) <= 285:
    print("Cat!")
    Audio('sound/cat.mp3')
elif 7 <= np.argmax(a) <= 8:
    print("Chicken!")
    Audio('sound/chicken.mp3')
elif 345 <= np.argmax(a) <= 347:
    print("Cow!")
    Audio('sound/cow.mp3')
elif 101 == np.argmax(a) :
    print("Elephant!")
    Audio('sound/elephant.mp3')
elif 385 <= np.argmax(a) <= 386 :
    print("Elephant!")
    Audio('sound/elephant.mp3')
elif 339 == np.argmax(a):
    print("Horse!")
    Audio('sound/horse.mp3')
elif 348 <= np.argmax(a) <= 353 :
    print("Sheep!")
    Audio('sound/sheep.mp3')
elif 335  == np.argmax(a) :
    print("Squirrel!")
    Audio('sound/squirrel.mp3')

In [None]:
print(np.argmax(a)
     )

In [None]:
# Clear GPU

import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)