In [None]:
# Import necessary libraries for data manipulation, visualization, and machine learning
import matplotlib.pyplot as plt
import numpy as np
import cv2
import os
import PIL
import tensorflow as tf

# Import specific components from TensorFlow and Keras
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

# Define the URL for the dataset and download it using TensorFlow's utility function
dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
data_dir = tf.keras.utils.get_file('flower_photos', origin=dataset_url, cache_dir='.', untar=True)

# Convert the data directory path to a pathlib object
# Pathlib is a native Python library for handling files and paths on your operating system.
import pathlib
data_dir = pathlib.Path(data_dir)
data_dir

# You can use pathlib object "data_dir" to do operation like give me all the images with .jpg extension.
# Getting images with .jpg extension.
list(data_dir.glob("*/*.jpg"))

# Total number of images we have
image_count = len(list(data_dir.glob("*/*.jpg")))
image_count

# glob is useful to get path of images with particular label
roses = list(data_dir.glob("roses/*"))
roses[:5]

# PIL stands for Python Imaging Library, and it's the original library that enabled Python to deal with images.
# Using PIL show roses
PIL.Image.open(str(roses[0]))

# Using PIL show tulips
tulips = list(data_dir.glob("tulips/*"))
PIL.Image.open(str(tulips[0]))

# Create dictionaries to store images and their corresponding labels
flowers_images_dict = {
    'roses': list(data_dir.glob('roses/*')),
    'daisy': list(data_dir.glob('daisy/*')),
    'dandelion': list(data_dir.glob('dandelion/*')),
    'sunflowers': list(data_dir.glob('sunflowers/*')),
    'tulips': list(data_dir.glob('tulips/*')),
}

# Accessing the particular flower from flowers_images_dict
flowers_images_dict["roses"]

# Assign numerical labels to each category of flowers
flowers_labels_dict = {
    'roses': 0,
    'daisy': 1,
    'dandelion': 2,
    'sunflowers': 3,
    'tulips': 4,
}

# Getting the path of particular image
flowers_images_dict["roses"][0]

# Now we can use cv2 module to read any image file in OpenCV object (like numpy array)
# OpenCV expects string path so default one doesn't work
# So we need to wrap up in str to give it to OpenCV
img = cv2.imread(str(flowers_images_dict["roses"][0]))
img

# Above OpenCV read the image from disk & convert to 3D numpy array.
# Viewing 3D numpy array
img.shape

# In the data, we have images with different dimensions and to train the model we need all the images in same dimensions. OpenCV helps with that.
# OpenCV resize the image
cv2.resize(img, (180, 180)).shape

# Getting the keys and values from our dictionary
for flowers_name, images in flowers_images_dict.items():
    print(flowers_name)
    print(len(images))

# Preparing the X and Y while resizing the images
x, y = [], []

for flowers_name, images in flowers_images_dict.items():
    for image in images:
        img = cv2.imread(str(image))
        resized_img = cv2.resize(img, (180, 180))
        x.append(resized_img)
        y.append(flowers_labels_dict[flowers_name])

"""
You can call flowers_labels_dict because of
Shared Keys: Both flowers_images_dict and flowers_labels_dict use the same keys
(flower category names like 'roses', 'daisy', etc.).
"""

# For further operations we need X and Y in NumPy arrays so we do conversion to NumPy arrays
x = np.array(x)
y = np.array(y)

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0)
x_test[0]

# Image data is not scaled so we need to scale it by dividing by 255
# Scaling
x_train_scaled = x_train / 255
x_test_scaled = x_test / 255

# Viewing the scaled value
x_train_scaled[0]

# Define the convolutional neural network (CNN) architecture using Keras Sequential API
# Number of classes (flowers that we have 5)
num_classes = 5

model = Sequential([
    # Making convolutional layer
    layers.Conv2D(16, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding="same", activation="relu"),
    layers.MaxPooling2D(),

    # To pass CNN to Dense layer we need to convert it into 1D using Flatten layer
    layers.Flatten(),
    # Making Dense layer
    layers.Dense(128, activation="relu"),
    layers.Dense(num_classes)
])

model.compile(optimizer="adam",
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=["accuracy"])

model.fit(x_train_scaled, y_train, epochs=30)

"""
Setting from_logits=True means that the model's output isn't transformed
(like using softmax) before calculating the loss. Instead, it directly uses
the raw predictions from the model. This can be better for numerical stability
and efficiency during training.
"""

model.evaluate(x_test_scaled, y_test)

# Doing prediction using our model
predictions = model.predict(x_test_scaled)
predictions

# Above we are getting very scattered output because when we build a Dense network we didn't specify the output layer. So the default activation function is linear activation function
# Converting the scattered output to probability score
score = tf.nn.softmax(predictions[1])
score
np.argmax(score)

# It predicted the second image as rose

# We can verify from y_test that it predicted correctly
y_test[1]

# Still, our model gives only 50% accuracy so we can fix that by using data augmentation
data_augmentation = keras.Sequential([
    layers.experimental.preprocessing.RandomZoom(0.9),
])

# Original Image
plt.axis('off')
plt.imshow(x[0])

# Newly generated training sample using data augmentation
plt.axis('off')
plt.imshow(data_augmentation(x)[0].numpy().astype("uint8"))

# From above image you can see that RandomZoom makes the image zoomed out

# There are different data augmentations like contrast, rotate, etc.
data_augmentation = keras.Sequential([
    layers.experimental.preprocessing.RandomZoom(0.3),
])

# Applying data augmentation to our data
# Define image augmentation preprocessing layers using Keras Sequential API
img_height = 180
img_width = 180
data_augmentation = keras.Sequential([
    layers.experimental.preprocessing.RandomFlip("horizontal", input_shape=(img_height, img_width, 3)),
    layers.experimental.preprocessing.RandomRotation(0.1),
    layers.experimental.preprocessing.RandomZoom(0.1)
])

# Train the model using data augmentation and a dropout layer
num_classes = 5

model = Sequential([
    data_augmentation,
    layers.Conv2D(16, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(32, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Conv2D(64, 3, padding='same', activation='relu'),
    layers.MaxPooling2D(),
    layers.Dropout(0.2),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes)
])

# Compile the model with appropriate loss function, optimizer, and evaluation metric
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

# Train the model on the training data with data augmentation for 30 epochs
model.fit(x_train_scaled, y_train, epochs=30)

# Evaluating the model on test data
model.evaluate(x_test_scaled, y_test)

# So by doing data augmentation we got standard results here
