In [19]:
# Import the required libraries.
import os
import cv2
import albumentations as A
import xml.etree.ElementTree as ET
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import numpy as np

In [20]:
# Define the paths to the images and annotations folders
images_folder = 'C:/Users/haris/test-installation/Data/Face Mask/images'
annotations_folder = 'C:/Users/haris/test-installation/Data/Face Mask/annotations'
# output_images_folder = 'C:/Users/haris/test-installation/Data/Face Mask/images'
# output_annotations_folder = 'C:/Users/haris/test-installation/Data/Face Mask/annotations'

# Create the output folders if they don't exist
# os.makedirs(output_images_folder, exist_ok=True)
# os.makedirs(output_annotations_folder, exist_ok=True)

# List all image file names in the folder
image_files = os.listdir(images_folder)

# Define the augmentation pipeline
transform = A.Compose([
    # A.RandomCrop(width=256, height=256),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
])

In [21]:
# Loop through each image file
for image_file in image_files:
    # Read the image
    image_path = os.path.join(images_folder, image_file)
    image = cv2.imread(image_path)

    # Read the corresponding annotation XML file
    annotation_file = os.path.splitext(image_file)[0] + '.xml'
# os.path.splitext(image_file): This function splits the image_file into its base filename and extension. For example, if image_file is 'example.jpg', the function will return ('example', '.jpg').
# [0]: The [0] indexing is used to access the first element of the returned tuple, which is the base filename. In the example above, it will return 'example'.
# + '.xml': The + operator concatenates the base filename obtained in the previous step with the string '.xml'. This is done to create the filename of the corresponding XML annotation file. For example, if the base filename is 'example', the resulting annotation_file will be 'example.xml'.
# So, the line of code combines the base filename of an image file with the extension .xml to generate the corresponding filename for the XML annotation file associated with that image.
    # Read the corresponding annotation XML file
    annotation_path = os.path.join(annotations_folder, annotation_file)
    with open(annotation_path, 'r') as f:
        annotation_content = f.read()

    # Apply the augmentation pipeline to the image and annotation
    augmented = transform(image=image, xml=annotation_content)

    # Get the augmented image and annotation
    augmented_image = augmented['image']
    augmented_annotation = augmented['xml']

    # Save the augmented image
    output_image_file = os.path.splitext(image_file)[0] + 'A.png'
    output_image_path = os.path.join(images_folder, output_image_file)
    cv2.imwrite(output_image_path, augmented_image)

    # Save the augmented annotation
    output_annotation_file = os.path.splitext(image_file)[0] + 'A.xml'
    output_annotation_path = os.path.join(annotations_folder, output_annotation_file)
    with open(output_annotation_path, 'w') as f:
        f.write(augmented_annotation)

In [24]:
# Load the VGG16 model without the top layers
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze the pre-trained layers in the base model
for layer in base_model.layers:
    layer.trainable = False

# Add custom fully connected layers on top of the base model
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
predictions = Dense(2, activation='softmax')(x)

# Create the final model by combining the base model and custom layers
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model with an appropriate optimizer and loss function
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Set the batch size and image dimensions
batch_size = 32
image_width, image_height = 224, 224

# Create an ImageDataGenerator for training data
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(
    images_folder,
    target_size=(image_width, image_height),
    batch_size=batch_size,
    class_mode='categorical')

# Create an ImageDataGenerator for augmented data
augmented_datagen = ImageDataGenerator(rescale=1./255)
augmented_generator = augmented_datagen.flow_from_directory(
    output_images_folder,
    target_size=(image_width, image_height),
    batch_size=batch_size,
    class_mode='categorical')

# Create a function to extract annotations from XML files
def extract_annotations(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()
    annotations = []
    for obj in root.findall('object'):
        name = obj.find('name').text
        annotations.append(name)
    return annotations

# Generate training data by combining original and augmented images
train_data = []
train_labels = []
for image_file in train_generator.filenames:
    image_path = os.path.join(images_folder, image_file)
    annotation_file = os.path.splitext(image_file)[0] + '.xml'
    annotation_path = os.path.join(annotations_folder, annotation_file)
    
    annotations = extract_annotations(annotation_path)
    
    image = cv2.imread(image_path)
    train_data.append(image)
    train_labels.append(annotations)

for image_file in augmented_generator.filenames:
    image_path = os.path.join(output_images_folder, image_file)
    annotation_file = os.path.splitext(image_file)[0] + '_aug.xml'
    annotation_path = os.path.join(output_annotations_folder, annotation_file)
    
    annotations = extract_annotations(annotation_path)
    
    image = cv2.imread(image_path)
    train_data.append(image)
    train_labels.append(annotations)
num_classes = 3

# Convert the training data and labels to numpy arrays
train_data = np.array(train_data)
train_labels = np.array(train_labels)

# Perform one-hot encoding on the labels
train_labels_encoded = np.zeros((len(train_labels), num_classes))
for i, labels in enumerate(train_labels):
    for label in labels:
        label_index = class_mapping[label]
        train_labels_encoded[i, label_index] = 1


# Train the model
model.fit(
    train_generator,
    steps_per_epoch=train_generator.n // batch_size,
    epochs=10,
    validation_data=val_generator,
    validation_steps=val_generator.n // batch_size)

Found 0 images belonging to 0 classes.
Found 0 images belonging to 0 classes.


NameError: name 'val_generator' is not defined

<!-- Bounding boxes are rectangles that mark objects on an image.
There are multiple formats of bounding boxes annotations.
Each format uses its specific representation of bouning boxes coordinates.
Albumentations supports four formats: pascal_voc, albumentations, coco, and yolo . -->

<!-- RandomSizedBBoxSafeCrop crops a random part of the image. It ensures that the cropped part will contain all bounding boxes from the original image. Then the transform rescales the crop to height and width specified by the respective parameters. The erosion_rate parameter controls how much area of the original bounding box could be lost after cropping. erosion_rate = 0.2 means that the augmented bounding box's area could be up to 20% smaller than the area of the original bounding box. -->
<!-- Let's look at an example:
transform = A.Compose([
    A.RandomCrop(width=256, height=256),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
])
In the example, Compose receives a list with three augmentations: A.RandomCrop, A.HorizontalFlip, and A.RandomBrighntessContrast. You can find the full list of all available augmentations in the GitHub repository and in the API Docs. A demo playground that demonstrates how augmentations will transform the input image is available at https://demo.albumentations.ai.

To create an augmentation, you create an instance of the required augmentation class and pass augmentation parameters to it. A.RandomCrop receives two parameters, height and width. A.RandomCrop(width=256, height=256) means that A.RandomCrop will take an input image, extract a random patch with size 256 by 256 pixels from it and then pass the result to the next augmentation in the pipeline (in this case to A.HorizontalFlip).

A.HorizontalFlip in this example has one parameter named p. p is a special parameter that is supported by almost all augmentations. It controls the probability of applying the augmentation. p=0.5 means that with a probability of 50%, the transform will flip the image horizontally, and with a probability of 50%, the transform won't modify the input image.

A.RandomBrighntessContrast in the example also has one parameter, p. With a probability of 20%, this augmentation will change the brightness and contrast of the image received from A.HorizontalFlip. And with a probability of 80%, it will keep the received image unchanged. --><!-- Let's look at an example:
transform = A.Compose([
    A.RandomCrop(width=256, height=256),
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
])
In the example, Compose receives a list with three augmentations: A.RandomCrop, A.HorizontalFlip, and A.RandomBrighntessContrast. You can find the full list of all available augmentations in the GitHub repository and in the API Docs. A demo playground that demonstrates how augmentations will transform the input image is available at https://demo.albumentations.ai.

To create an augmentation, you create an instance of the required augmentation class and pass augmentation parameters to it. A.RandomCrop receives two parameters, height and width. A.RandomCrop(width=256, height=256) means that A.RandomCrop will take an input image, extract a random patch with size 256 by 256 pixels from it and then pass the result to the next augmentation in the pipeline (in this case to A.HorizontalFlip).

A.HorizontalFlip in this example has one parameter named p. p is a special parameter that is supported by almost all augmentations. It controls the probability of applying the augmentation. p=0.5 means that with a probability of 50%, the transform will flip the image horizontally, and with a probability of 50%, the transform won't modify the input image.

A.RandomBrighntessContrast in the example also has one parameter, p. With a probability of 20%, this augmentation will change the brightness and contrast of the image received from A.HorizontalFlip. And with a probability of 80%, it will keep the received image unchanged. -->

<!-- Bounding boxes can be stored on the disk in different serialization
formats: JSON, XML, YAML, CSV, etc. So the code to read bounding boxes
depends on the actual format of data on the disk.

From the provided XML file, the following information can be inferred:

Folder: The folder where the image file is located is "images".

Filename: The name of the image file is "maksssksksss3.png".

Size: The size of the image is specified with the following attributes:

Width: 400 pixels
Height: 271 pixels
Depth: 3 channels (indicating a color image with RGB channels)
Segmented: The segmented value is 0, indicating that the image is not segmented.

Objects: There are multiple objects present in the image, each represented by an "object" tag. The objects have the following attributes:

Name: All objects are labeled as "with_mask".
Pose: The pose is specified as "Unspecified" for all objects.
Truncated, Occluded, Difficult: These attributes are set to 0, indicating that the objects are not truncated, occluded, or considered difficult.
Bounding Box (Bndbox): Each object has a bounding box defined by four coordinates:
xmin: The minimum x-coordinate of the bounding box.
ymin: The minimum y-coordinate of the bounding box.
xmax: The maximum x-coordinate of the bounding box.
ymax: The maximum y-coordinate of the bounding box.
In summary, the XML file contains information about the image file, its size, the presence of objects, and their corresponding bounding boxes. This information is typically used for tasks like object detection or annotation in computer vision applications. -->