# Data preparation 

Let's first only classify 30 dog breed from Standford dataset. 
Which are the one with the most images provided (number of images superior to the third inter-quartile)

In [None]:
%load_ext autoreload
%autoreload 2
import os 
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf 
import cv2 

from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import img_to_array, array_to_img, load_img
from sklearn.model_selection import train_test_split

from common_params import data_dir, img_dir


: 

: 

In [None]:
print(tf.__version__)

: 

In [None]:
data = pd.read_csv("selected_breed.csv")


: 

In [None]:
data.drop('Unnamed: 0',axis=1,inplace=True)
data.head()

: 

### Load images 

Let's begin with imbalanced dataset
I choose to use image_dataset_from_directory instead of ImageDataGenerator  because of the speed benchmark [here](https://towardsdatascience.com/what-is-the-best-input-pipeline-to-train-image-classification-models-with-tf-keras-eb3fe26d3cc5)

In [None]:
def image_cropping()

: 

In [None]:
IMG_SIZE = 256

: 

In [None]:
keras_ds = tf.keras.preprocessing.image_dataset_from_directory(img_dir, batch_size=32, image_size=(IMG_SIZE, IMG_SIZE))
keras_ds = keras_ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

num_classes = len(os.listdir(img_dir))

: 

In [None]:
data_iterator = keras_ds.as_numpy_iterator()
batch = data_iterator.next()

: 

In [None]:
np.array(batch)

: 

In [None]:
from google.colab.patches import cv2_imshow
for batch_x, batch_y in keras_ds:
    x = np.asarray(batch_x[1])  # access second image of batch
    cv2_imshow(x)

: 

In [None]:
batch[0][0]

: 

In [None]:

plt.imshow(cv2.cvtColor(batch[0][0],cv2.IMREAD_ANYCOLOR))

: 

In [None]:
data_imgs = []
data_labels = []
for idx, row  in data.iterrows():
    images_path = os.path.join(img_dir, row['folder_name'])
    breed = row['dog_breed']
    cnt = 0 
    for image_path in os.listdir(images_path) :
        image_path = os.path.join(images_path, image_path)
        img = cv2.imread(image_path)
        data_imgs.append(data_imgs)
        data_labels.append(breed)


: 

In [None]:


X_train, X_test, y_train, y_test = train_test_split(data_imgs, data_labels, test_size=0.33, random_state=42)

: 

In [None]:
print( len(X_train) )
print( len(X_test)  )
print( len(y_train) )
print( len(y_test)  )

: 

# Data augmentation 

: 

In [None]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print(f"Number of GPU available : {len(physical_devices)}")

: 

In [None]:
test_img = os.path.join(img_dir , "n02085620-Chihuahua\\n02085620_7.jpg")

: 

In [None]:


datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True)

img = load_img(test_img)  # this is a PIL image
x = img_to_array(img)  # this is a Numpy array with shape (3, 150, 150)
x = x.reshape((1,) + x.shape)  # this is a Numpy array with shape (1, 3, 150, 150)

# the .flow() command below generates batches of randomly transformed images
# and saves the results to the `preview/` directory
i = 0
for batch in datagen.flow(x, batch_size=1, save_to_dir='data/generated/', save_prefix='dog', save_format='jpeg'):
    i += 1
    if i > 20:
        break  # otherwise the generator would loop indefinitely

: 

In [None]:
from common_plots import display_images

: 

In [None]:
directory = os.path.join(data_dir , "generated/")
img_list = [ directory + file for file in os.listdir(directory) ] 
len(img_list)

: 

In [None]:
display_images(img_list,2,4)

: 

# Convolutional neural network 

In [None]:
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3 ))) # kernel = 3*3
model.add(MaxPooling2D((2, 2),strides=(2,2),padding="same"))

model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Conv2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(30, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(30, activation='sigmoid'))

model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

: 

In [None]:
model.summary()

: 