## Library Import

In [None]:
!python --version

In [None]:
import os
import sys
import json
from collections import Counter
import cv2
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
from matplotlib import pyplot as plt
from PIL import Image
from sklearn.model_selection import train_test_split

#from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img 
#Tensorflow contains the array_to_img, imag_to_array and load_img. Whereas keras has ImageDataGenerator
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import array_to_img, img_to_array, load_img
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout,Flatten, Dense
from keras.utils import to_categorical

## Exploring the dataset

### Counting the images in the dataset

In [None]:
# Location of the dataset
!ls ../input/flower-classification-dataset/

In [None]:
#Looking what is inside files folder and labels.csv
path = "../input/flower-classification-dataset/"
df = pd.read_csv(os.path.join(path, "labels.csv"))
df.tail(5)

#### Loading the dataset

In [None]:
X = []
y = []
for image, label in tqdm(zip(df.image_id.values, df.category.values), total= len(df)):
    try:
        xt = np.array(Image.open(os.path.join(path, f"files/{image}.jpg")).resize((128, 128)))
        yt = label
        X.append(xt)
        y.append(yt)
    except:
        print(os.path.join(path, f"files/{image}.jpg"))

X = np.array(X)
y = np.array(y)
X.shape, y.shape

<p>Here there are two columns in the dataset, the image_id contains the image name and the category contains the image class.</p>

**Note**
The next question arises that how many images are there in each class so that we get an unserstanding of the image distribution. 
If the image distribution is biased then this can result in biasness in the classifier model.

### Exploring the dataset

<p>Checking the total number of images present in the files folder</p>

In [None]:
files = os.listdir(os.path.join(path, "files"))
print(f"The total number of images in the dataset are: {len(files)}")

<p>Checking if the number of images in the datset matches the number of labels in the dataset</p>

In [None]:
print(f"The total number of points in the label are: {len(df)}")

## Splitting the dataset

In [None]:
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)

In [None]:
train_y = to_categorical(train_y)
test_y = to_categorical(test_y)
train_y.shape, test_y.shape # Returning the shape of train_y and test_y

In [None]:
# Checking the shape of train_X, test_X
train_X.shape, test_y.shape

## Exploring the images in the dataset

In [None]:
# Plot the first few images
plt.figure(figsize = (12, 12))
for i in range(9):
    # Define the subplot
    plt.subplot(330 + 1 + i)
    # plot raw pixel data
    plt.imshow(train_X[i])
# show the figure
plt.show() 

#### Lets explore the class distribution for now

In [None]:
plt.figure(figsize = (17, 7))
df["category"].value_counts().plot(kind = "bar")

In [None]:
heights = []
widths = []
for image in tqdm(os.listdir(os.path.join(path, "files"))):
    ht , wt = Image.open(os.path.join(path, f"files/{image}")).size
    heights.append(ht)
    widths.append(wt)

In [None]:
Counter(heights), Counter(widths) 

<p>The Counter() is used to count the number of occurences of a particular element in this case height and width of the images. It will count the height and width of each image.</p>
<p>The output shows that there are 18540 images with height of 500 pixels and 500 width.
That means all the images in the dataset are of same height and width</p>

## Understanding Image transformation

In [None]:
!rm -rf preview
!mkdir preview

<p>Image Data Generator is a keras class which are used to preprocess &  augument the images to improve model performance. </p>

In [None]:
datagen = ImageDataGenerator(
        rotation_range = 40, # rotates the image with range of -40 t0 +40
        width_shift_range = 0.2, # shifts the image horizontally by a fraction of total width
        height_shift_range = 0.2, # shifts the image vertically by a fraction of total height
        shear_range = 0.2, # applies shearing byt distorting the shape of the image
        zoom_range = 0.2, # zooms in the image
        horizontal_flip = True, # filps the image horizontally
        fill_mode = "nearest") # determines the strategy to fill new pixels with nearest neighbor

img = load_img(os.path.join(path, f"files/0.jpg")) # loads a image which is in the file folder using the path
x = img_to_array(img) # this is a Numpy array with shape (3, 150, 150)
x = x.reshape((1,) + x.shape) # this is a Numpy array with shape (1, 3, 150, 150) = (1, height, width, channels)


i = 0
for batch in datagen.flow(x, batch_size=1,
                          save_to_dir="preview", save_prefix="f", save_format="jpg"):
    i += 1
    if i > 20:
        break

In [None]:
!ls preview

In [None]:
x = []
for image in os.listdir("preview"):
    xt = np.array(Image.open(os.path.join("preview", image)).resize((128, 128)))
    x.append(xt)
    
plt.figure(figsize=(12, 12))
for i in range(9):
    plt.subplot(330 + 1 + i)
    plt.imshow(x[i])
plt.show()

## Building our Classifier model

In [None]:
model = Sequential()
model.add(Conv2D(16, (3, 3), padding='same', input_shape=(128, 128, 3)))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(32, (3, 3), padding='same'))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))


model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation("relu"))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Flatten())
model.add(Dense(256))
model.add(Activation("relu"))
model.add(Dropout(0.5))
model.add(Dense(103))
model.add(Activation("softmax"))

model.compile(loss = "categorical_crossentropy", optimizer = "adam", metrics = ["accuracy"])

In [None]:
model.summary()

## Evaluating the model

In [None]:
batch_size = 16

train_datagen = ImageDataGenerator(
                rescale = 1./255,
                shear_range = 0.2,
                zoom_range = 0.2,
                horizontal_flip = True)

test_datagen = ImageDataGenerator(rescale = 1./255)

In [None]:
train_generator = train_datagen.flow(
                train_X,
                train_y,
                batch_size = batch_size,
                shuffle = True
                )

validation_generator = test_datagen.flow(
                    test_X,
                    test_y,
                    shuffle = False
                    )

In [None]:
gen = model.fit(
     train_generator,
     epochs = 50,
     validation_data = validation_generator)

## Plotting the Performance of the model

In [None]:
# summarize history for accuracy
plt.plot(gen.history['accuracy'])
plt.plot(gen.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()

In [None]:
# summarize history for loss
plt.plot(gen.history['loss'])
plt.plot(gen.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()