### Importing Libraries

In [3]:
import os 
from os import listdir 
import shutil
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import img_to_array, load_img

## Organizing Dataset for One vs Rest Classification

### Defining Paths
#### Here, I have already downloaded the dataset from the link provided. Hence, defining the paths to file stored on my system.

In [2]:
path = r"C:\Users\rudra\OneDrive\Desktop\animals\animals"
save = r"C:\Users\rudra\OneDrive\Desktop\animals\OvR3"

#### listing the 90 classes provided from the dataset

In [3]:
animals = listdir(path)
print(animals)
print("number of classes:",len(animals))

['antelope', 'badger', 'bat', 'bear', 'bee', 'beetle', 'bison', 'boar', 'butterfly', 'cat', 'caterpillar', 'chimpanzee', 'cockroach', 'cow', 'coyote', 'crab', 'crow', 'deer', 'dog', 'dolphin', 'donkey', 'dragonfly', 'duck', 'eagle', 'elephant', 'flamingo', 'fly', 'fox', 'goat', 'goldfish', 'goose', 'gorilla', 'grasshopper', 'hamster', 'hare', 'hedgehog', 'hippopotamus', 'hornbill', 'horse', 'hummingbird', 'hyena', 'jellyfish', 'kangaroo', 'koala', 'ladybugs', 'leopard', 'lion', 'lizard', 'lobster', 'mosquito', 'moth', 'mouse', 'octopus', 'okapi', 'orangutan', 'otter', 'owl', 'ox', 'oyster', 'panda', 'parrot', 'pelecaniformes', 'penguin', 'pig', 'pigeon', 'porcupine', 'possum', 'raccoon', 'rat', 'reindeer', 'rhinoceros', 'sandpiper', 'seahorse', 'seal', 'shark', 'sheep', 'snake', 'sparrow', 'squid', 'squirrel', 'starfish', 'swan', 'tiger', 'turkey', 'turtle', 'whale', 'wolf', 'wombat', 'woodpecker', 'zebra']
number of classes: 90


### Initializing preprocessing using Keras
#### The augmented samples were generated with the help of ImageDataGenerator provided by keras.preprocessing library

In [5]:
datagen = ImageDataGenerator(
        rotation_range=40,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest') 

### Loading the images and Augmenting the samples of required Class
The task required Image Augmentation like Rotation, Zoom and Horizontal_flip due to the reason of sample scarcity which leads to class imbalance in the Primary class of One vs Rest classification. If performed OvR classification without image augmentation the sample ratio in the Primary class and the Other(rest) class would not be a viable training dataset. For example, if considering OvR classification on the class 'antelope', the class 'antelope' would contain 60 image samples originally, where as the 'rest' class would contain about 5340 image samples which ultimately would create an class imbalance. 

Hence, to solve the problem of class imbalance, I augmented the samples of Primary class 30 times. i.e For every sample we create 30 different augmented sample, this artificially increases the samples approximately to 1800. At the same time, we select 20 samples from each remaining class so that we have somewhat equal amount of training samples in both the 'Primary' and 'rest' class.

In [7]:
print("[INFO] Loading images ...")
rest = "rest"
for animal in animals:
    os.makedirs(os.path.join(save, animal, animal))
    os.makedirs(os.path.join(save, animal, rest))
    image_folder_list = animals
    print(f"[INFO] Processing {animal} ...")
    for image_folder in image_folder_list:
            image_list = listdir(f"{path}/{image_folder}/")
            if image_folder == animal:
                for samples in image_list[:]:
                    image_directory = f"{path}/{image_folder}/{samples}"
                    if (image_directory.endswith(".JPG")==True or image_directory.endswith(".jpg")==True):
                        source = image_directory
                        target = f"{save}/{image_folder}/{image_folder}"
                        img = load_img(image_directory)
                        x = img_to_array(img)  
                        x = np.expand_dims(x, axis=0)  
                        i = 0
                        # generate new augmented images 
                        for batch in datagen.flow(x, batch_size = 1, 
                                          save_to_dir =target, save_format ='jpeg'):
                            i += 1
                            if i >= 30: 
                                break
            elif image_folder != animal:
                for samples in image_list[:20]:
                    image_directory = f"{path}/{image_folder}/{samples}"
                    if (image_directory.endswith(".JPG")==True or image_directory.endswith(".jpg")==True):
                        source = image_directory
                        target = f"{save}/{animal}/{rest}/{samples}"
                        shutil.copyfile(source, target)                

## Organizing Dataset for 5-Class Classification

### Defining Paths

In [4]:
path = r"C:\Users\rudra\OneDrive\Desktop\animals\animals"
save = r"C:\Users\rudra\OneDrive\Desktop\animals\5_Class"

### Defining Classification Schema for condensing and organizing 90 different classes into 5 classes
Here, the 90 different animals were classified into 5 classes, where each class reflecting a different ecological niche like Terrestrial, Aquatic, Aerial, Arboreal and Burrowing.


In [5]:
Terrestrial  = ['antelope','bear', 'cat', 'cow','deer', 'dog', 'bison', 'boar', 'cockroach', 'donkey', 'elephant', 'goat'
               , 'hippopotamus', 'horse', 'hyena', 'kangaroo', 'leopard', 'lion', 'okapi', 'ox', 'penguin'
               , 'pig', 'reindeer', 'rhinoceros', 'sheep', 'tiger', 'wolf', 'zebra']

Aquatic = ['dolphin', 'crab', 'goldfish','duck', 'jellyfish', 'lobster', 'octopus', 'otter',
           'oyster', 'seahorse', 'seal', 'shark', 'squid', 'starfish', 'swan','whale', 'turtle']

Aerial  = ['crow', 'eagle', 'flamingo', 'fly','goose','hornbill', 'hummingbird', 'mosquito',
           'moth', 'owl', 'parrot', 'pelecaniformes', 'pigeon','sandpiper', 'sparrow', 'turkey', 'woodpecker']

Arboreal = ['bat', 'chimpanzee','bee', 'beetle','butterfly','caterpillar', 'dragonfly', 
            'gorilla', 'grasshopper','koala', 'ladybugs', 'orangutan', 'panda', 'squirrel','lizard']

Burrowing = ['badger', 'coyote', 'fox', 'hamster', 'hare', 'hedgehog', 'mouse','porcupine',
             'possum', 'raccoon', 'rat', 'wombat', 'snake']

classes = [Terrestrial, Aquatic, Aerial, Arboreal, Burrowing]
class_name = ['Terrestrial', 'Aquatic', 'Aerial', 'Arboreal', 'Burrowing']

In [6]:
print("[INFO] Loading images ...")
i=0
for Class in classes:
    os.makedirs(os.path.join(save, f"{class_name[i]}"))
    image_folder_list = listdir(path)
    print(f"[INFO] Processing {class_name[i]} ...")
    for image_folder in image_folder_list:
        image_list = listdir(f"{path}/{image_folder}/")
        if image_folder in Class:
            for samples in image_list[:]:
                image_directory = f"{path}/{image_folder}/{samples}"
                if (image_directory.endswith(".JPG")==True or image_directory.endswith(".jpg")==True or image_directory.endswith(".jpeg")==True):
                    source = image_directory
                    target = f"{save}/{class_name[i]}/{samples}"
                    shutil.copyfile(source, target)
    i = i+1

[INFO] Loading images ...
[INFO] Processing Terrestrial ...
[INFO] Processing Aquatic ...
[INFO] Processing Aerial ...
[INFO] Processing Arboreal ...
[INFO] Processing Burrowing ...
