# Bird Species Prediction

In [None]:
import pandas as pd
import numpy as np
from PIL import Image
import cv2 as cv
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import random
import shutil

from keras.models import Sequential
from keras.layers import Conv2D, MaxPool2D, Dense, Dropout, Flatten
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

## Getting Data from kaggle

In [None]:
# ! mkdir ~/.kaggle/
# ! cp ./kaggle.json ~/.kaggle/
# ! chmod 600 ~/.kaggle/kaggle.json

In [None]:
# ! kaggle datasets download -d veeralakrishna/200-bird-species-with-11788-images

In [None]:
# ! unzip 200-bird-species-with-11788-images.zip

In [None]:
# shutil.unpack_archive("./CUB_200_2011.tgz", "./train")

In [None]:
# ! rm 200-bird-species-with-11788-images 

## Data Preprocessing

In [None]:
path = '/content/train/CUB_200_2011/images/001.Black_footed_Albatross'

In [None]:
## Checking the shape of images
# w = []
# h = []
# for i in range(0, 100):
#   img_path = os.path.join(path, random.choice(os.listdir(path)))
#   img = plt.imread(img_path)
#   w.append(img.shape[0])
#   h.append(img.shape[1])


In [None]:

plt.figure(figsize=(10, 10))
for i in range(1, 10):
  plt.subplot(4, 4, i)
  plt.tight_layout()
  img_path = os.path.join(path, random.choice(os.listdir(path)))
  img = plt.imread(img_path)
  plt.imshow(img)
plt.show()

In [None]:
# TRAIN_PATH = "/content/train/CUB_200_2011/images"
# TEST_PATH = "/content/test/segmentations"

In [None]:
# train = caer.preprocess_from_dir(TRAIN_PATH, CLASS_NAMES, IMG_SIZE=(255, 255), channels=3)

In [None]:
# path = '/content/train/CUB_200_2011/images'
# image_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255, validation_split=0.2)
# train_dataset = image_generator.flow_from_directory(batch_size=128,
#                                                  directory=path,
#                                                  shuffle=True,
#                                                  target_size=(224, 224), 
#                                                  classes=CLASS_NAMES[:5],
#                                                  subset="training",
#                                                  color_mode='grayscale')

# validation_dataset = image_generator.flow_from_directory(batch_size=128,
#                                                  directory=path,
#                                                  classes=CLASS_NAMES[:5],
#                                                  shuffle=True,
#                                                  target_size=(224, 224), 
#                                                  subset="validation",
#                                                  color_mode='grayscale')

In [None]:
%%time
path = "/content/train/CUB_200_2011/images"

x = []
y = []
for i in os.listdir(path)[:100]:
  img_class = os.path.join(path, i)
  for j in os.listdir(img_class):
    y.append(i)
    img_path = os.path.join(img_class, j)
    img_array = cv.imread(img_path)
    gray = cv.cvtColor(img_array, cv.COLOR_BGR2GRAY)
    resized_img = cv.resize(gray, (224, 224))
    x.append(resized_img)
    

In [None]:
plt.imshow(x[200], cmap='gray')

In [None]:
x = np.array(x)
y = np.array(y)


In [None]:
# label_encoder
label_encoder = LabelEncoder().fit(y)
Y = label_encoder.transform(y)

In [None]:
# Simplifying data
X = x / 255

In [None]:
del x, y, img_class, img_path, img_array

In [None]:
num, w, h = X.shape

In [None]:
X = X.reshape(num, w, h, 1)

In [None]:
X.shape

In [None]:
# Splittin data
x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

In [None]:
del X, Y

In [None]:
len(np.unique(y_train))

# Building model

In [None]:
model = Sequential()

In [None]:
# adding layes

model.add(Conv2D(filters=128, kernel_size=(2, 2), padding='same', activation='relu',
                 input_shape=(224, 224, 1)))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.2))

model.add(Conv2D(filters=128, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.3))

model.add(Conv2D(filters=64, kernel_size=(4, 4), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.3))

model.add(Conv2D(filters=32, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(MaxPool2D(pool_size=(2, 2)))
model.add(Dropout(0.3))

model.add(Flatten())

model.add(Dense(units=256, activation='relu'))
model.add(Dropout(0.4))
model.add(Dense(units=100, activation='softmax'))

In [None]:
# compile 
model.compile(
optimizer= tf.keras.optimizers.Adam(0.005), 
loss= "sparse_categorical_crossentropy", 
metrics= ['accuracy'])

In [None]:
model.summary()

In [None]:
# training the model
history = model.fit(x_train, y_train, epochs=100, validation_data=(x_test, y_test))