# Flower Image Classification

In [None]:
#import libararies
from google.colab import drive #for mounting Google drive
import os #for interacting with files
import matplotlib.pyplot as plt # for plotting images and results
import numpy as np # for numeric operations
import cv2 #for image processing
from sklearn.model_selection import train_test_split #for splitting data
from PIL import Image # for manipulating images
import shutil

#augmentation libraray
import albumentations as A # for image augmentaion
from albumentations.augmentations.transforms import *
from albumentations.pytorch import ToTensorV2 #to convert image into tensor format for Pytorch
from tqdm import tqdm

#model related imports
import tensorflow as tf
from tensorflow import keras
from sklearn.metrics import classification_report, confusion_matrix,ConfusionMatrixDisplay
import seaborn as sns
from tensorflow.keras.applications import DenseNet121

# Load Data

In [None]:
#load dataset from drive
drive.mount('/content/drive')

In [None]:
#define global variables
img_size = 240
data_path = '/content/drive/MyDrive/Flower_dataset/train'
categories = ['daisy','dandelion','rose','sunflower','tulip']

In [None]:
#define function to count images in each category
def count_images(directory):
  for category in categories:
    path = os.path.join(directory, category) #construct path
    images = os.listdir(path) #name of all images present
    print(f'{category} : {len(images)} images')
count_images(data_path)

In [None]:
#visualize data
for category in categories:
  path = os.path.join(data_path, category)
  images = os.listdir(path)

  fig, ax = plt.subplots(1, 3, figsize=(15, 3))
  fig.suptitle(f'{category}' , fontsize = 18)

  for i in range(3): #plot first three images
    img_name = images[np.random.randint(0, len(images))]
    img_path = os.path.join(path, img_name)
    img_array = cv2.imread(img_path)

    #converting the BGR images to RGB
    img_rgb = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)

    ax[i].imshow(img_rgb)
    ax[i].axis('off')

# Preprocessing

In [None]:
#visualize data
for category in categories:
  path = os.path.join(data_path, category)
  images = os.listdir(path)

  fig, ax = plt.subplots(1, 3, figsize=(15, 3))
  fig.suptitle(f'{category}' , fontsize = 18)

  for i in range(3): #plot first three images
    img_name = images[np.random.randint(0, len(images))]
    img_path = os.path.join(path, img_name)
    img_array = cv2.imread(img_path)

    #converting the BGR images to RGB
    img_rgb = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)

    ax[i].imshow(img_rgb)
    ax[i].axis('off')

In [None]:
#split data into train validation and test

In [None]:
base_dir = '/content/split_data'  # New folder to store split data

In [None]:
# Create train/val/test directories
for split in ['train', 'val', 'test']:
    for category in categories:
        os.makedirs(os.path.join(base_dir, split, category), exist_ok=True)

In [None]:
# Split each category
for category in categories:
    img_dir = os.path.join(data_path, category)
    images = os.listdir(img_dir)
    train_val, test = train_test_split(images, test_size=0.15, random_state=42)
    train, val = train_test_split(train_val, test_size=0.176, random_state=42)  # 0.176 x 85% ≈ 15%

    for split, split_data in zip(['train', 'val', 'test'], [train, val, test]):
        for img_name in split_data:
            src = os.path.join(img_dir, img_name)
            dst = os.path.join(base_dir, split, category, img_name)
            shutil.copy2(src, dst)

In [None]:
#check number of images in each folder
splits = ['train', 'val', 'test']
for split in splits:
    print(f"\n{split.upper()} DATA:")
    for category in categories:
        folder_path = os.path.join(base_dir, split, category)
        num_images = len(os.listdir(folder_path))
        print(f"  {category} : {num_images} images")

In [None]:
#Augmenttaion on train data only

In [None]:
# Define augmentation pipeline
transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.VerticalFlip(p=0.5),
    A.RandomRotate90(p=0.5),
    A.Rotate(limit=25, p=0.7),
    A.RandomBrightnessContrast(p=0.5),
    A.Resize(img_size, img_size),  # Ensure size remains compatible
])

# Set data directory and categories
data_dir = '/content/split_data/train'  # folder path
categories = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

In [None]:
# Function to augment and save images
def augment_and_save_images(data_dir, categories, transform, num_augments=1):
    for category in categories:
        path = os.path.join(data_dir, category)
        for filename in tqdm(os.listdir(path), desc=f'Augmenting {category}'):
            if filename.endswith(('.jpg', '.jpeg', '.png')):
                image_path = os.path.join(path, filename)
                image = cv2.imread(image_path)
                if image is None:
                    continue
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

                for i in range(num_augments):
                    augmented = transform(image=image)
                    augmented_image = augmented['image']
                    aug_filename = f"{os.path.splitext(filename)[0]}_aug{i}.jpg"
                    save_path = os.path.join(path, aug_filename)
                    cv2.imwrite(save_path, cv2.cvtColor(augmented_image, cv2.COLOR_RGB2BGR))

In [None]:
# Run the augmentation
augment_and_save_images(data_dir, categories, transform, num_augments=1)

In [None]:
# check number of images after augmenatation
base_path = '/content/split_data'  #base directory

splits = ['train', 'val', 'test']
categories = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

for split in splits:
    print(f"\n{split.upper()} DATA:")
    for category in categories:
        folder_path = os.path.join(base_path, split, category)
        num_images = len(os.listdir(folder_path))
        print(f"  {category} : {num_images} images")

In [None]:
#image data need to be converted into ndarray with proper label

In [None]:
import os
import cv2
import numpy as np

IMG_SIZE = 240
categories = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']
DATADIR_TRAIN = '/content/split_data/train'
DATADIR_VAL = '/content/split_data/val'
DATADIR_TEST = '/content/split_data/test'

# Function to load data
def load_data(DATADIR):
    data = []  # Will hold image arrays and labels
    for category in categories:
        path = os.path.join(DATADIR, category)  # Path to category folder
        label = categories.index(category)  # Label as index
        for img_name in os.listdir(path):  # Loop over images in category folder
            try:
                img_path = os.path.join(path, img_name)
                img_array = cv2.imread(img_path)  # Load image
                img_array = cv2.cvtColor(img_array, cv2.COLOR_BGR2RGB)  # Convert to RGB
                img_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))  # Resize image
                data.append([img_array, label])  # Append image and label
            except Exception as e:
                pass  # If an error occurs, skip this image
    return data

# Load training, validation, and test data
train_data = load_data(DATADIR_TRAIN)
val_data = load_data(DATADIR_VAL)
test_data = load_data(DATADIR_TEST)


In [None]:
# Separate features and labels
X_train = np.array([item[0] for item in train_data])
y_train = np.array([item[1] for item in train_data])

X_val = np.array([item[0] for item in val_data])
y_val = np.array([item[1] for item in val_data])

X_test = np.array([item[0] for item in test_data])
y_test = np.array([item[1] for item in test_data])

In [None]:
# Reshape X to the correct input shape for CNNs
X_train = X_train.reshape(-1, IMG_SIZE, IMG_SIZE, 3)
X_val = X_val.reshape(-1, IMG_SIZE, IMG_SIZE, 3)
X_test = X_test.reshape(-1, IMG_SIZE, IMG_SIZE, 3)

In [None]:
#normalization
X_train = X_train.astype('float32') / 255.0
X_val = X_val.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0


In [None]:
# Get total number of images in the training set
total_train_images = X_train.shape[0]
print(f'Total number of training images: {total_train_images}')


# Model building

In [None]:
# Step 1: Load DenseNet121 base model
base_model = tf.keras.applications.DenseNet121(
    weights='imagenet',
    include_top=False,
    input_shape=(240, 240, 3)
)

# Step 2: Freeze the base model
for layer in base_model.layers:
    layer.trainable = False

# Step 3: Build your model with Dropout
model = keras.Sequential([
    base_model,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Dense(1024, activation='relu'),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(512, activation='relu'),
    keras.layers.Dropout(0.3),
    keras.layers.Dense(5, activation='softmax')
])

# Step 4: Compile
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
model.summary()

In [None]:
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_val, y_val), batch_size=32)

In [None]:
# 1. Predict class probabilities or labels on the validation set
y_pred_probs = model.predict(X_val)

# 2.

# Binary classification
if y_pred_probs.shape[1] == 1:
    y_pred = (y_pred_probs > 0.5).astype("int32")
else:
    # Multi-class classification
    y_pred = np.argmax(y_pred_probs, axis=1)

# 3. Ensure y_val is in correct form
if y_val.ndim > 1 and y_val.shape[1] > 1:
    y_true = np.argmax(y_val, axis=1)
else:
    y_true = y_val

# 4. Generate confusion matrix
cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot(cmap='Blues')
plt.title("Confusion Matrix")
plt.show()


In [None]:
model.save("flower_classifier_model.keras")

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# 1. Predictions
y_train_pred_probs = model.predict(X_train)
y_val_pred_probs = model.predict(X_val)

# 2. Convert probabilities to class labels
y_train_pred = np.argmax(y_train_pred_probs, axis=1)
y_val_pred = np.argmax(y_val_pred_probs, axis=1)

# 3. Ground truth (already in integer format)
y_train_true = y_train
y_val_true = y_val

# 4. Compute metrics
def print_metrics(y_true, y_pred, dataset_name):
    print(f"\n📊 Metrics for {dataset_name} Set:")
    print("Accuracy :", accuracy_score(y_true, y_pred))
    print("Precision:", precision_score(y_true, y_pred, average='macro'))
    print("Recall   :", recall_score(y_true, y_pred, average='macro'))
    print("F1 Score :", f1_score(y_true, y_pred, average='macro'))

print_metrics(y_train_true, y_train_pred, "Train")
print_metrics(y_val_true, y_val_pred, "Validation/Test")


In [None]:
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.savefig('accuracy_plot.png', dpi=300)  # Save the figure
plt.show()

In [None]:
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.savefig('loss_plot.png', dpi=300)  # Save the figure
plt.show()

In [None]:
#check on unseen data

In [None]:
from tensorflow.keras.models import load_model

model = load_model("flower_classifier_model.keras")


In [None]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.densenet import preprocess_input
import numpy as np

# Load and preprocess image
img_path = '/content/split_data/test/sunflower/12471443383_b71e7a7480_m.jpg'  # change this to your actual image path
img = image.load_img(img_path, target_size=(240, 240))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
img_array = preprocess_input(img_array)        # Preprocessing for DenseNet


In [None]:
class_names = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']

In [None]:
pred = model.predict(img_array)
predicted_class = np.argmax(pred, axis=1)[0]
print("Predicted class:", class_names[predicted_class])