# **Animal Classification**
* 28,000 medium quality animal images
* 10 categories: dog, cat, horse, spyder, butterfly, chicken, sheep, cow, squirrel, elephant

Image count for each category varies between 2k and 5k units

To download data visit **[here](https://www.kaggle.com/alessiocorrado99/animals10)**

[Data Publisher](https://www.kaggle.com/alessiocorrado99)

### Data Preparation

In [None]:
import os 
import numpy as np
import cv2
import matplotlib.pyplot as plt

REFRESH_DATA = True

class animals_class():

    img_size = 50
    img_path = "/data/raw-img/" # download this 'raw-img from the link given on top'

    training_data = []
    count = 0
    total = 0

    def create_training_data(self):

        for idx, animal in enumerate(os.listdir(self.img_path)):

            print(animal, idx)
            path = os.path.join(self.img_path, animal)

            for img in os.listdir(path):

                try:
                    img = cv2.imread(os.path.join(path,img))
                    img = cv2.resize(img, (self.img_size, self.img_size))
                    self.training_data.append([img, idx])
                    self.count += 1
                    self.total += 1
                except Exception as e:
                    pass
                
            print(self.count)
            self.count = 0   

        np.random.shuffle(self.training_data)  
        np.save("/data/training_data_RGB.npy", self.training_data)  
        print(self.total)

if REFRESH_DATA:
    ac = animals_class()
    ac.create_training_data()
    print(f'count = {ac.count}, total = {ac.total}')

### With GrayScale data

In [None]:
training_data = np.load("/data/training_data_GRYSCL.npy", allow_pickle=True)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers


x = []
y = []

for feature, label in training_data:
    x.append(feature)
    y.append(label)

split_size = int(len(x) * 0.20)

x = (np.array(x).astype("float32")/255.0).reshape(-1,50,50,1)
y = np.array(y)

x_train = x[split_size:]
y_train = y[split_size:]

x_test = x[:split_size]
y_test = y[:split_size]

model = keras.Sequential([
                          keras.Input(shape=(50,50,1)),

                          layers.Conv2D(54, 3, activation="relu"),
                          layers.MaxPooling2D(pool_size=(2,2)),

                          layers.Dropout(.5),
                          layers.Conv2D(108, 3, activation="relu"),
                          layers.MaxPooling2D(pool_size=(2,2)),

                          layers.Dropout(.5),
                          layers.Conv2D(216, 3, activation="relu"),#, kernel_regularizer=regularizers.l2(0.0125)
                          layers.MaxPooling2D(pool_size=(2,2)),


                          layers.Flatten(),
                          layers.Dense(216, activation="relu"),
                          layers.Dropout(.6),
                          layers.Dense(10)
])
print(model.summary())

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(lr=1e-3),
    metrics=["accuracy"]
)

model.fit(x_train, y_train, batch_size=64, epochs=30, verbose=2)

model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Model: "sequential_76"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_253 (Conv2D)          (None, 48, 48, 54)        540       
_________________________________________________________________
max_pooling2d_208 (MaxPoolin (None, 24, 24, 54)        0         
_________________________________________________________________
dropout_254 (Dropout)        (None, 24, 24, 54)        0         
_________________________________________________________________
conv2d_254 (Conv2D)          (None, 22, 22, 108)       52596     
_________________________________________________________________
max_pooling2d_209 (MaxPoolin (None, 11, 11, 108)       0         
_________________________________________________________________
dropout_255 (Dropout)        (None, 11, 11, 108)       0         
_________________________________________________________________
conv2d_255 (Conv2D)          (None, 9, 9, 216)       

[1.0670278072357178, 0.6510028839111328]

### With RGB data

In [None]:
training_data = np.load("/data/training_data_RGB.npy", allow_pickle=True)

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, regularizers


x = []
y = []

for feature, label in training_data:
    x.append(feature)
    y.append(label)

split_size = int(len(x) * 0.20)

x = (np.array(x).astype("float32")/255.0).reshape(-1,50,50,3)
y = np.array(y)

x_train = x[split_size:]
y_train = y[split_size:]

x_test = x[:split_size]
y_test = y[:split_size]

model = keras.Sequential([
                          keras.Input(shape=(50,50,3)),

                          layers.Conv2D(54, 3, activation="relu"),
                          layers.MaxPooling2D(pool_size=(2,2)),

                          layers.Dropout(.5),
                          layers.Conv2D(108, 3, activation="relu"),
                          layers.MaxPooling2D(pool_size=(2,2)),

                          layers.Dropout(.5),
                          layers.Conv2D(216, 3, activation="relu"),#, kernel_regularizer=regularizers.l2(0.0125)
                          layers.MaxPooling2D(pool_size=(2,2)),


                          layers.Flatten(),
                          layers.Dense(216, activation="relu"),
                          layers.Dropout(.6),
                          layers.Dense(10)
])
print(model.summary())

model.compile(
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.Adam(lr=1e-3),
    metrics=["accuracy"]
)

model.fit(x_train, y_train, batch_size=64, epochs=40, verbose=2, 
          validation_data=(x_test, y_test))

#model.evaluate(x_test, y_test, batch_size=64, verbose=2)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 48, 48, 54)        1512      
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 24, 24, 54)        0         
_________________________________________________________________
dropout (Dropout)            (None, 24, 24, 54)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 22, 22, 108)       52596     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 11, 11, 108)       0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 11, 11, 108)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 9, 9, 216)         2

<tensorflow.python.keras.callbacks.History at 0x7f9191fa5da0>

In [None]:
model.evaluate(x_test, y_test, batch_size=64, verbose=2)

82/82 - 0s - loss: 0.9066 - accuracy: 0.7163


[0.9066025018692017, 0.7163323760032654]