# Importing Libraries

In [1]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
import joblib
import time
from sklearn.model_selection import train_test_split
from keras.preprocessing.image import ImageDataGenerator

# Loading Data Set

In [2]:
DATADIR="/content/drive/MyDrive/Ai Training MetaPi/Week_6_Deep_Learning/W6_D3_CNN/flowers"

In [3]:
IMG_SIZE=180

In [4]:
categories=['daisy','dandelion','rose','sunflower','tulip']

In [5]:
from tqdm import tqdm
data=[]
def create_data():
  for category in categories:
    path=os.path.join(DATADIR,category)
    class_num=categories.index(category)
    print (path)
    for img in tqdm(os.listdir(path)):
      try:
        img_array=cv2.imread(os.path.join(path,img))
        new_array=cv2.resize(img_array,(IMG_SIZE,IMG_SIZE))
        data.append([new_array, class_num])
      except Exception as e:
        pass
create_data()

/content/drive/MyDrive/Ai Training MetaPi/Week_6_Deep_Learning/W6_D3_CNN/flowers/daisy


100%|██████████| 769/769 [00:10<00:00, 72.57it/s] 


/content/drive/MyDrive/Ai Training MetaPi/Week_6_Deep_Learning/W6_D3_CNN/flowers/dandelion


100%|██████████| 1055/1055 [00:16<00:00, 63.03it/s] 


/content/drive/MyDrive/Ai Training MetaPi/Week_6_Deep_Learning/W6_D3_CNN/flowers/rose


100%|██████████| 784/784 [00:10<00:00, 71.59it/s] 


/content/drive/MyDrive/Ai Training MetaPi/Week_6_Deep_Learning/W6_D3_CNN/flowers/sunflower


100%|██████████| 734/734 [00:10<00:00, 72.64it/s] 


/content/drive/MyDrive/Ai Training MetaPi/Week_6_Deep_Learning/W6_D3_CNN/flowers/tulip


100%|██████████| 984/984 [00:14<00:00, 66.08it/s] 


In [6]:
np.shape(data)

  result = asarray(a).shape


(4323, 2)

In [7]:
X=[]
Y=[]
for features,labels in tqdm(data):
  X.append(features)
  Y.append(labels)

100%|██████████| 4323/4323 [00:00<00:00, 2026371.95it/s]


# Converting data into numpy array

In [8]:
X=np.array(X)
Y=np.array(Y)

# Building CNN Model with (Kernels=32,64,96,96)

In [9]:
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Activation
from tensorflow.keras.models import Sequential

# Define the CNN model
model = Sequential()

# First convolutional layer
model.add(Conv2D(filters=32,kernel_size=(5,5),padding='Same', activation='relu', input_shape=(IMG_SIZE,IMG_SIZE,3)))
model.add(MaxPooling2D(pool_size=(2, 2)))

# Second convolutional layer

model.add(Conv2D(filters=64,kernel_size=(3,3),padding='Same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2)))

# Third convolutional layer
model.add(Conv2D(filters=96,kernel_size=(3,3),padding='Same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2)))
# Fourth convolutional layer
model.add(Conv2D(filters=96,kernel_size=(3,3),padding='Same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2),strides=(2,2)))

# Flatten the output from the convolutional layers
model.add(Flatten())

# Fully connected layers
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dense(5, activation='softmax'))

# Build the model
model.build(input_shape=(None, 128, 128, 3))

# Compile the model
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Print model summary
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 180, 180, 32)      2432      
                                                                 
 max_pooling2d (MaxPooling2  (None, 90, 90, 32)        0         
 D)                                                              
                                                                 
 conv2d_1 (Conv2D)           (None, 90, 90, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 45, 45, 64)        0         
 g2D)                                                            
                                                                 
 conv2d_2 (Conv2D)           (None, 45, 45, 96)        55392     
                                                                 
 max_pooling2d_2 (MaxPoolin  (None, 22, 22, 96)        0

# Splitting Data

In [14]:
x_train, x_test, y_train, y_test,= train_test_split(X,Y, test_size=0.15,random_state=100)

# Data Augmentation

In [15]:
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=False,
    rotation_range=15,
    width_shift_range=0.3,
    height_shift_range=0.4,
    horizontal_flip=True,
    validation_split=0.15
    )

# Training with Data Augmentation

In [20]:
S=time.time()
history=model.fit(datagen.flow(x_train, y_train, batch_size=128),epochs=10,
                  validation_data=datagen.flow(x_test, y_test),steps_per_epoch=x_train.shape[0] // 128)
E=time.time()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


*Accuracy with data augmentation*

In [21]:
test_loss,test_acc=model.evaluate(x_test,y_test)
print (f"Test accuracy:{test_acc}")
print ("Total Time taken",E-S)

Test accuracy:0.7134052515029907
Total Time taken 375.90352988243103


# Training **without** Data Augmentation

In [18]:
S=time.time()
model.fit(x_train,y_train,epochs=10,batch_size=128,validation_data=(x_test,y_test))
E=time.time()

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


*Accuracy without data augmentation*

In [19]:
test_loss,test_acc=model.evaluate(x_test,y_test)
print (f"Test accuracy:{test_acc}")
print ("Total Time taken",E-S)

Test accuracy:0.6178736686706543
Total Time taken 41.84006094932556


# Insights

- In this experiment, we applied data augmentation techniques to boost the performance of a convolutional neural network (CNN) for image classification.
- The results demonstrated a substantial accuracy improvement compared to training without data augmentation, highlighting its critical role in enhancing CNNs' generalization abilities.
- Techniques such as rotation, shifting, and flipping introduced diversity into the training dataset, mitigating overfitting and enhancing adaptability to new data, particularly valuable when working with limited training samples.

# Conclusion

While data augmentation offers clear benefits, it does come with the
drawback of increased training time due to the generation of augmented data. Thus, practitioners must consider the trade-off between performance improvement and extended training durations. To harness data augmentation effectively, selecting augmentation strategies tailored to the dataset and task is crucial. Furthermore, combining data augmentation with hyperparameter tuning and model architecture optimization can yield additional performance enhancements. In summary, this experiment underscores the vital role of data augmentation in bolstering CNNs' robustness and accuracy in image classification, along with the significance of thoughtful parameter tuning for optimal outcomes.