<div style="background-color: #c3e8fb; padding: 10px; color: #144d84;">
<b>Exercise 3) Flower Classification</b><br>
Using the flower dataset, write a classifier to identify these flowers.

The network structure should be similar to VGG, meaning each block should have twice as many filters as the previous one, all convolutional layers should be 3x3 with 'same' padding, and all pooling layers should be 2x2.
Use batch normalization.
During training, use the ReduceLROnPlateau callback with a patience of 5 epochs and the EarlyStopping callback with a patience of 10 epochs.
Use the restore_best_weights=True parameter in the EarlyStopping callback. What does this parameter do?

</div>

# phase 1. : data loading and preprocessing

In [1]:
# import tensorflow as tf

# flowers_root = tf.keras.utils.get_file(
#     'flower_photos',
#     'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
#     untar=True)

In [2]:
!wget https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz

--2024-10-27 21:29:26--  https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.24.207, 142.251.10.207, 142.251.12.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.24.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 228813984 (218M) [application/x-compressed-tar]
Saving to: ‘flower_photos.tgz’


2024-10-27 21:29:38 (20.2 MB/s) - ‘flower_photos.tgz’ saved [228813984/228813984]



In [3]:
!tar  -xzf flower_photos.tgz

In [4]:
import os
os.listdir('flower_photos')

['dandelion', 'LICENSE.txt', 'sunflowers', 'tulips', 'daisy', 'roses']

In [5]:
!rm flower_photos/LICENSE.txt

In [6]:
print(len(os.listdir('./flower_photos/daisy')))
print(len(os.listdir('./flower_photos/dandelion')))
print(len(os.listdir('./flower_photos/roses')))
print(len(os.listdir('./flower_photos/sunflowers')))
print(len(os.listdir('./flower_photos/tulips')))

633
898
641
699
799


In [7]:
# renaming with "type name" and "numbers"
# eg. daisy_67
# eg. roses_600

import os
import random

# Set the base directory
original_flower_dir = "./flower_photos"

random_names = [str(name) for name in range(1000)]

count = 0

# Iterate through the folders in the base directory
for folder_name in os.listdir(original_flower_dir):
    folder_path = os.path.join(original_flower_dir, folder_name)

    # Check if the item is a directory
    if os.path.isdir(folder_path):
        # Iterate through the files in the folder

        count = 0
        for filename in os.listdir(folder_path):

            image_extension = filename.split(".")[-1]
            new_filename = folder_name +"_" + f"{str(random_names[count])}" + "." + image_extension

            # Construct the full paths
            old_file_path = os.path.join(folder_path, filename)
            new_file_path = os.path.join(folder_path, new_filename)

            # Rename the file
            os.rename(old_file_path, new_file_path)

            # Increment the count
            count += 1


In [9]:
# making my flower dataset
# making train and test folders in my flower dataset

my_flower_dir = "./my_flower_photos"

os.makedirs(my_flower_dir, exist_ok=True)

train_dir = os.path.join(my_flower_dir, 'train')
os.makedirs(train_dir, exist_ok=True)
test_dir = os.path.join(my_flower_dir, 'test')
os.makedirs(test_dir, exist_ok=True)

# making train folder in my flower photos
for folder_name in os.listdir('flower_photos'):
  dest_dir = os.path.join(train_dir, folder_name)
  os.makedirs(dest_dir, exist_ok=True)

# making test folder in my flower photos
for folder_name in os.listdir('flower_photos'):
  dest_dir = os.path.join(test_dir, folder_name)
  os.makedirs(dest_dir, exist_ok=True)

In [11]:
# Copy first 500 images to each train directory

import shutil

original_flower_dir = './flower_photos'
my_flower_dir = "./my_flower_photos"
my_flower_train_dir = os.path.join(my_flower_dir, 'train')

for folder_name in os.listdir(original_flower_dir):
  fnames = [f'{folder_name}_{i}.jpg' for i in range(500)]
  for fname in fnames:
      src = os.path.join(os.path.join(original_flower_dir,folder_name), fname) # eg. flower_photos/daisy/daisy_41
      dst = os.path.join(os.path.join(my_flower_train_dir,folder_name), fname) # eg. my_flower_photos/train/daisy/daisy_41
      shutil.copyfile(src, dst)

In [12]:
# Copy 100 of remaining images to each test directory

import shutil

original_flower_dir = './flower_photos'
my_flower_dir = "./my_flower_photos"
my_flower_test_dir = os.path.join(my_flower_dir, 'test')

for folder_name in os.listdir(original_flower_dir):
  fnames = [f'{folder_name}_{i}.jpg' for i in range(500,600)]
  for fname in fnames:
      src = os.path.join(os.path.join(original_flower_dir,folder_name), fname) # eg. flower_photos/daisy/daisy_540
      dst = os.path.join(os.path.join(my_flower_test_dir,folder_name), fname) # eg. my_flower_photos/test/daisy/daisy_540
      shutil.copyfile(src, dst)

# phase 2. : training

In [14]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

my_flower_dir = "./my_flower_photos"
my_flower_train_dir = os.path.join(my_flower_dir, 'train')
my_flower_test_dir = os.path.join(my_flower_dir, 'test')


batch_size = 32
img_height = 114
img_width = 114

# the dataset is small, so we need to do some processing on data,
# for data augmentation
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

test_datagen = ImageDataGenerator(rescale=1./255)


train_generator = train_datagen.flow_from_directory(
    my_flower_train_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
)

test_generator = test_datagen.flow_from_directory(
    my_flower_test_dir,
    target_size=(img_height, img_width),
    batch_size=batch_size,
    class_mode='categorical',
)

Found 2500 images belonging to 5 classes.
Found 500 images belonging to 5 classes.


### Modifying the model for faster training 
reducing layers, and using GAP instead of flatten.  
(it's just a practice, no high accuracy is required. By the way by using all layers we can get a higher accuracy)

In [15]:
from tensorflow.keras import layers, models

def create_vgg_model():
    model = models.Sequential()

    model.add(layers.Input(shape=(img_height, img_width, 3)))

    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu', input_shape=(img_height, img_width, 3)))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(64, (3, 3), padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(128, (3, 3), padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.Conv2D(256, (3, 3), padding='same', activation='relu'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))

    # model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    # model.add(layers.BatchNormalization())
    # model.add(layers.Conv2D(512, (3, 3), padding='same', activation='relu'))
    # model.add(layers.BatchNormalization())
    # model.add(layers.MaxPooling2D((2, 2)))
    
    # model.add(layers.Flatten())
    # model.add(layers.Dense(512, activation='relu'))
    # model.add(layers.Dropout(0.5))
    # model.add(layers.Dense(5, activation='softmax'))


    # # instead of flattening, i employ global average pooling so as to reduce params
    
    model.add(layers.GlobalAveragePooling2D())
    model.add(layers.Dense(50, activation='relu'))
    model.add(layers.Dense(5, activation='softmax'))

    return model

model = create_vgg_model()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

In [17]:
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5)
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

In [18]:
history = model.fit(
    train_generator,
    validation_data=test_generator,
    epochs=100,
    callbacks=[reduce_lr, early_stopping]
)

Epoch 1/100
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 283ms/step - accuracy: 0.3801 - loss: 1.5379 - val_accuracy: 0.2020 - val_loss: 1.6309 - learning_rate: 0.0010
Epoch 2/100
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 199ms/step - accuracy: 0.4443 - loss: 1.4560 - val_accuracy: 0.2100 - val_loss: 1.6381 - learning_rate: 0.0010
Epoch 3/100
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 200ms/step - accuracy: 0.4905 - loss: 1.4031 - val_accuracy: 0.2240 - val_loss: 1.6235 - learning_rate: 0.0010
Epoch 4/100
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 202ms/step - accuracy: 0.4762 - loss: 1.3560 - val_accuracy: 0.3740 - val_loss: 1.4558 - learning_rate: 0.0010
Epoch 5/100
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 213ms/step - accuracy: 0.4856 - loss: 1.3327 - val_accuracy: 0.4700 - val_loss: 1.3384 - learning_rate: 0.0010
Epoch 6/100
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [19]:
model.save('./vgg_flower_2.h5')



In [20]:
model.save('drive/MyDrive/vgg_flower_2.h5')

