# Animal Recognition with Neural Networks

## Libraries import

In [50]:
#                                                   LIBRARIES

import os

# Dataset
from kaggle.api.kaggle_api_extended import KaggleApi
from datasets import load_dataset

# Image preprocesssing- processing
import cv2
from PIL import Image
import numpy as np

# Neural Network libraries
import tensorflow as tf
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Conv2D,MaxPooling2D,Dense,Flatten,Dropout

## Get Dataset

We get the data from a Dataset of Kaggle with different animals images:
https://www.kaggle.com/datasets/antoreepjana/animals-detection-images-dataset

- Dog
- Cat
- Zebra
- Lion
- Leopard
- Cheetah
- Tiger
- Bear
- Brown Bear
- Butterfly
- Canary
- Crocodile
- Polar Bear
- Bull
- Camel
- Crab
- Chicken
- Centipede
- Cattle
- Caterpillar
- Duck
....
- +70 classes (we will use reduced function)

In [51]:
#                                               PREPARE DATASET
#                                            get images from kaggle
# kaggle datasets download -d antoreepjana/animals-detection-images-dataset

def download_dataset():
    # Prepare Kaggle API
    os.environ['KAGGLE_USERNAME'] = "<kaggle-username>" #"gruncrow"
    os.environ['KAGGLE_KEY'] = "<kaggle-api-key>" #"ee9f8b0a071cf306b7903a984f9fe492"

    api = KaggleApi()
    api.authenticate()

    # List files
    api.dataset_list_files('antoreepjana/animals-detection-images-dataset').files

    # download dataset
    api.dataset_download_files('antoreepjana/animals-detection-images-dataset', path=".")

# load dataset
def load_dataset():
    dataset = load_dataset("arxiv_dataset", data_dir='./raw_data/', split='train', ignore_verifications=True)

In [52]:
# Download dataset if it is not downloaded, it will take +- 40 min (10 GB)
# download_dataset()

## Prepare Dataset

Transform images to the right format for the model

2 arrays:
- data: array of images converted to numpy array
- labels: corresponding labels
    1. Dog
    2. Cat
    ...
    - Zebra
    - Lion
    - Leopard
    - Cheetah
    - Tiger
    - Bear
    - Brown Bear
    - Butterfly
    - Canary
    - Crocodile
    - Polar Bear
    - Bull
    - Camel
    - Crab
    - Chicken
    - Centipede
    - Cattle
    - Caterpillar
    - Duck

In [53]:
data=[]
# supervised -> labels are needed
labels=[]

In [54]:
#                                           Transform images into an array
def load_and_save_all_data(set):
    l_data=[]
    # supervised -> labels are needed
    l_labels=[]

    label = 0
    train_dir = "animals-detection-images-dataset" + os.sep + set
    animals_list=os.listdir(train_dir)

    # for each animal directory in the dataset directory
    for animal in  animals_list:
        animal_imgs = os.listdir("animals-detection-images-dataset/train/" + animal)

        num_images = 0

        # for each image of the current image
        # print(animal_dir)
        for image in animal_imgs:
            # read image
            img_dir = train_dir + os.sep + animal + os.sep + image
            if os.path.isfile(img_dir):
                img=cv2.imread(img_dir)

                # convert image to array
                img = np.array(img)
                img_from_ar = Image.fromarray(img, 'RGB')

                # image of same size needed for CNN, ensure they are -> 50X50
                resized_image = img_from_ar.resize((50, 50))

                # convert back to numpy array
                l_data.append(np.array(resized_image))

                l_labels.append(label)
                num_images += 1


        print("Label " + str(label) + " = " + str(animal) + " with " + str(num_images) + " images")


        # increment label for next animal
        label += 1

        # Transform "normal" array into numpy array
        animals=np.array(l_data)
        labels=np.array(l_labels)

        # save numpy arrays so manipulation dont need to be done again
        np.save("animals",animals)
        np.save("labels",labels)

def load_data_and_labels():
    animals=np.load("animals.npy")
    labels=np.load("labels.npy")


#### Reduced:
Label 0 = Bear with 87 images
Label 1 = Chicken with 388 images
Label 2 = Duck with 542 images
Label 3 = Frog with 588 images
Label 4 = Sea turtle with 239 images
Label 5 = Squirrel with 367 images

In [55]:
train_animals_reduced=[]
test_animals_reduced=[]
# supervised -> labels are needed
train_labels_reduced=[]
test_labels_reduced=[]

In [56]:
#                                           Transform images into an array

def load_and_save_data_reduced(set_tt):
    assert set_tt == "test" or "train"
    folder_directory = ""
    if set_tt == "train":
        folder_directory = "animals-detection-images-dataset" + os.sep + "train"
    elif set_tt == "test":
        folder_directory = "animals-detection-images-dataset" + os.sep + "test"
    print("============= " + set_tt + " =============")
    l_data=[]
    # supervised -> labels are needed
    l_labels=[]

    label = 0

    animals_list=os.listdir(folder_directory)

    # for each animal directory in the dataset directory
    for animal in  animals_list:
        animals_wanted = ["Bear", "Chicken", "Duck", "Frog", "Sea turtle",  "Squirrel"]

        if animal in animals_wanted:
            animal_imgs = os.listdir(folder_directory + os.sep + animal)

            num_images = 0

            # for each image of the current image
            # print(animal_dir)
            for image in animal_imgs:
                # read image
                img_dir = folder_directory + os.sep + animal + os.sep + image
                if os.path.isfile(img_dir):
                    img=cv2.imread(img_dir)

                    # convert image to array
                    img = np.array(img)
                    img_from_ar = Image.fromarray(img, 'RGB')

                    # image of same size needed for CNN, ensure they are -> 50X50
                    resized_image = img_from_ar.resize((50, 50))

                    # convert back to numpy array
                    l_data.append(np.array(resized_image))

                    l_labels.append(label)
                    num_images += 1


            print("Label " + str(label) + " = " + str(animal) + " with " + str(num_images) + " images")


            # increment label for next animal
            label += 1

            global train_animals_reduced, train_labels_reduced, test_animals_reduced, test_labels_reduced
            # Transform "normal" array into numpy array
            if set_tt == "test":
                test_animals_reduced=np.array(l_data)
                test_labels_reduced=np.array(l_labels)

                # save numpy arrays so manipulation dont need to be done again
                np.save("test_animals",test_animals_reduced)
                np.save("test_labels",test_labels_reduced)
            elif set_tt == "train":
                train_animals_reduced=np.array(l_data)
                train_labels_reduced=np.array(l_labels)

                # save numpy arrays so manipulation dont need to be done again
                np.save("train_animals_reduced",train_animals_reduced)
                np.save("train_labels_reduced",train_labels_reduced)

def load_data_and_labels_reduced():
    global train_animals_reduced, train_labels_reduced, test_animals_reduced, test_labels_reduced
    train_animals_reduced = np.load("train_animals.npy")
    train_labels_reduced = np.load("train_labels.npy")
    test_animals_reduced = np.load("test_animals.npy")
    test_labels_reduced = np.load("test_labels.npy")

In [57]:
# if its not saved yet:
#load_and_save_data_reduced()

# if its saved:
#load_data_and_labels_reduced()



In [58]:
# load_and_save_data_reduced("train")
# load_and_save_data_reduced("test")

#### ============= train =============
Label 0 = Bear with 87 images
Label 1 = Chicken with 388 images
Label 2 = Duck with 542 images
Label 3 = Frog with 588 images
Label 4 = Sea turtle with 239 images
Label 5 = Squirrel with 367 images

#### ============= test =============
Label 0 = Bear with 39 images
Label 1 = Chicken with 137 images
Label 2 = Duck with 88 images
Label 3 = Frog with 77 images
Label 4 = Sea turtle with 87 images
Label 5 = Squirrel with 68 images

In [59]:
load_data_and_labels_reduced()

In [60]:
# shuffle animals and labels from both sets
s=np.arange(train_animals_reduced.shape[0])
np.random.shuffle(s)
train_animals_reduced=train_animals_reduced[s]
train_labels_reduced=train_labels_reduced[s]

s=np.arange(test_animals_reduced.shape[0])
np.random.shuffle(s)
test_animals_reduced=test_animals_reduced[s]
test_labels_reduced=test_labels_reduced[s]

In [61]:
# number of classes (labels) and size of dataset
num_classes_train=len(np.unique(train_labels_reduced))
num_classes_test=len(np.unique(test_labels_reduced))
assert num_classes_test == num_classes_train
num_classes = num_classes_train

train_data_length=len(train_animals_reduced)
test_data_length=len(test_animals_reduced)
data_length = train_data_length + test_data_length
print("Train data length: " + str(train_data_length))
print("Test data length: " + str(test_data_length))

Train data length: 2211
Test data length: 496


Train data length: 2211
Test data length: 496

In [39]:
# /255
train_animals_reduced = train_animals_reduced.astype('float32')/255
test_animals_reduced = test_animals_reduced.astype('float32')/255

In [66]:
# One hot encoding for labels
train_labels_reduced=keras.utils.to_categorical(train_labels_reduced,num_classes_train)
test_labels_reduced=keras.utils.to_categorical(test_labels_reduced,num_classes_test)

## Making Keras Model

think about hyper parameters like Filter size, number of filters, which type of padding to use, which activatioon functions to use etc.

In [67]:
# import sequential model and all the required layers

#make model
model=Sequential()
# Pairs of Conv2D layer and MaxPool2D Layer with increasing filter sizes ( 16,32 ,64). This helps to make image grow more in depthwise and become more flatten.
# Maxpool: great as they optimize the training time

# Pair 1 (16)
model.add(Conv2D(filters=16,kernel_size=2,padding="same",activation="relu",input_shape=(50,50,3)))
model.add(MaxPooling2D(pool_size=2))

# Pair 2 (32)
model.add(Conv2D(filters=32,kernel_size=2,padding="same",activation="relu"))
model.add(MaxPooling2D(pool_size=2))

# Pair 3 (64)
model.add(Conv2D(filters=64,kernel_size=2,padding="same",activation="relu"))
model.add(MaxPooling2D(pool_size=2))

# Dropout layers to reduce overfitting
model.add(Dropout(0.2))
model.add(Flatten())


model.add(Dense(500,activation="relu"))
model.add(Dropout(0.2))

# Final dense layer with num_classes nodes = categories of animals we have in the set
# Softmax activation is used to give scores to these categories which lie between 0 and 1.
model.add(Dense(num_classes,activation="softmax"))
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 50, 50, 16)        208       
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 25, 25, 16)       0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 25, 25, 32)        2080      
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 12, 12, 32)       0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 12, 12, 64)        8256      
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 6, 6, 64)        

In [68]:
# compile the model
# We use loss function as categorical_crossentropy and Adam optimizer

# if binary data -> loss = Binary Cross Entropy and activation = sigmoid
model.compile(loss='categorical_crossentropy', optimizer='adam',
                  metrics=['accuracy'])

In [75]:
# training
model.fit(train_animals_reduced,train_labels_reduced,batch_size=50 ,epochs=50,verbose=1)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x23b86bd49d0>

In [76]:
# test the model
score = model.evaluate(test_animals_reduced, test_labels_reduced, verbose=1)
print('\n', 'Test accuracy:', score[1])


 Test accuracy: 0.4818548262119293


In [77]:
# save the model

# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk


In [78]:
def convert_to_array(img):
    im = cv2.imread(img)
    img = Image.fromarray(im, 'RGB')
    image = img.resize((50, 50))
    return np.array(image)

def get_animal_name(label):
    if label==0:
        return "Bear"
    if label==1:
        return "Chicken"
    if label==2:
        return "Duck"
    if label==3:
        return "Frog"
    if label==4:
        return "Sea Turtle"
    if label==5:
        return "Squirrel"

def predict_animal(file):
    print("Predicting .................................")
    ar=convert_to_array(file)
    ar=ar/255
    label=1
    a=[]
    a.append(ar)
    a=np.array(a)
    score=model.predict(a,verbose=1)
    print(score)
    label_index=np.argmax(score)
    print(label_index)
    acc=np.max(score)
    animal=get_animal_name(label_index)
    print(animal)
    print("The predicted Animal is a "+animal+" with accuracy =    "+str(acc))

In [87]:
# predict image
file = "animals-detection-images-dataset/prediction/duck.jpg"
predict_animal(file)

Predicting .................................
[[0.12157457 0.11814366 0.1955372  0.19213589 0.21696585 0.1556429 ]]
4
Sea Turtle
The predicted Animal is a Sea Turtle with accuracy =    0.21696585
