# **What's in here? Imports ⚛**

In [1]:
# Seems to be a good idea to be imports here ;)
import os
import pandas as pd
import numpy as np

from PIL import Image
import matplotlib.pyplot as plt

from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image

from PIL import Image

In [2]:
# I tried to mount google drive and this pop-up here
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
path = "/content/drive/MyDrive/Colab Notebooks/data/NistFormatted"

# **Data Augmentation** ⛓

**Removing the 32 rows of white space at the bottom of each image**

In [5]:
# Haha!? What did I do here? In short: From 5 labels to 3 labels
label_mapping = {
    "T": "A",
    "R": "L"
}

cropped_img_paths = []
cropped_labels = []
crop_path = "/content/drive/MyDrive/Colab Notebooks/cropped_images"


for file in os.listdir(path):
    if file.endswith('.txt'):
        with open(os.path.join(path, file), 'r') as t:
            content = t.readlines()
            img_name = content[2].rsplit(' ')[1][:-4] + '.png'
            img_path = os.path.join(path, img_name) # Here we are after the actual image
            img = Image.open(img_path)

            # Convert the image to a NumPy array
            image_array = np.array(img)

            # Find the top and bottom boundaries of the non-white region
            non_white_rows = np.where(image_array.sum(axis=1) < 255 * image_array.shape[1])[0]
            top_boundary = non_white_rows[0]
            bottom_boundary = non_white_rows[-1]

            # Crop the image to remove the white space at the bottom
            cropped_img = img.crop((0, top_boundary, img.width, bottom_boundary + 1))

            # Save the cropped image with the same file name
            cropped_file_path = os.path.join(crop_path, img_name)
            cropped_img.save(cropped_file_path)

            # Append the cropped image path and labels
            cropped_img_paths.append(cropped_file_path)

            label_initial = content[1].rsplit(' ')[1][0]
            label = label_mapping.get(label_initial, label_initial)
            cropped_labels.append(label)

In [6]:
df = pd.DataFrame({'CROPPED_IMAGE_PATH': cropped_img_paths, 'CROPPED_LABEL': cropped_labels})

In [7]:
df.head(20)

Unnamed: 0,CROPPED_IMAGE_PATH,CROPPED_LABEL
0,/content/drive/MyDrive/Colab Notebooks/cropped...,A
1,/content/drive/MyDrive/Colab Notebooks/cropped...,L
2,/content/drive/MyDrive/Colab Notebooks/cropped...,A
3,/content/drive/MyDrive/Colab Notebooks/cropped...,W
4,/content/drive/MyDrive/Colab Notebooks/cropped...,L
5,/content/drive/MyDrive/Colab Notebooks/cropped...,A
6,/content/drive/MyDrive/Colab Notebooks/cropped...,A
7,/content/drive/MyDrive/Colab Notebooks/cropped...,L
8,/content/drive/MyDrive/Colab Notebooks/cropped...,A
9,/content/drive/MyDrive/Colab Notebooks/cropped...,A


In [8]:
df.tail()

Unnamed: 0,CROPPED_IMAGE_PATH,CROPPED_LABEL
3995,/content/drive/MyDrive/Colab Notebooks/cropped...,L
3996,/content/drive/MyDrive/Colab Notebooks/cropped...,A
3997,/content/drive/MyDrive/Colab Notebooks/cropped...,W
3998,/content/drive/MyDrive/Colab Notebooks/cropped...,A
3999,/content/drive/MyDrive/Colab Notebooks/cropped...,L


In [17]:
# Initialize the ImageDataGenerator with augmentation settings
datagen = ImageDataGenerator(
    rotation_range=20,      # Random rotation up to 20 degrees
    width_shift_range=0.2,  # Random horizontal shift
    height_shift_range=0.2, # Random vertical shift
    shear_range=0.2,        # Shear transformations
    zoom_range=0.2,         # Random zoom
    horizontal_flip=True,   # Random horizontal flipping
    fill_mode='nearest'     # Fill mode for newly created pixels
)

In [40]:
aug_ind = []
augmented_image_paths = []
output_dir = "/content/drive/MyDrive/Colab Notebooks/Augmented_Images"  # Change this to your desired output directory

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

num_augmented_images = 6  # Move this line outside the loop if you want a fixed number of augmented images per input image

for image_i in range(len(cropped_img_paths)): # len(cropped_img_paths)
    img = Image.open(cropped_img_paths[image_i]).convert('RGB')
    img = img.resize((150, 150))
    x = np.array(img)
    x = x.reshape((1,) + x.shape)  # Reshape to (1, height, width, channels)

    aug_count = 0  # Initialize the count of augmented images for this input image

    for batch in datagen.flow(x, batch_size=1):
        if aug_count >= num_augmented_images:
            break

        augmented_image = batch[0].astype(np.uint8)

        # Construct the augmented image filename manually
        augmented_image_filename = f'aug_{image_i}_{aug_count}.png'
        augmented_image_path = os.path.join(output_dir, augmented_image_filename)

        # Save the augmented image manually using PIL
        pil_augmented_image = Image.fromarray(augmented_image)
        pil_augmented_image.save(augmented_image_path)

        augmented_image_paths.append(augmented_image_path)
        aug_ind.append(cropped_labels[image_i])
        aug_count += 1  # Increment the augmented image count


In [41]:
cropped_img_paths.extend(augmented_image_paths)
cropped_labels.extend(aug_ind)

In [46]:
df = pd.DataFrame({'CROPPED_IMAGE_PATH': cropped_img_paths, 'CROPPED_LABEL': cropped_labels})

In [48]:
df.tail(20)

Unnamed: 0,CROPPED_IMAGE_PATH,CROPPED_LABEL
27980,/content/drive/MyDrive/Colab Notebooks/Augment...,A
27981,/content/drive/MyDrive/Colab Notebooks/Augment...,A
27982,/content/drive/MyDrive/Colab Notebooks/Augment...,W
27983,/content/drive/MyDrive/Colab Notebooks/Augment...,W
27984,/content/drive/MyDrive/Colab Notebooks/Augment...,W
27985,/content/drive/MyDrive/Colab Notebooks/Augment...,W
27986,/content/drive/MyDrive/Colab Notebooks/Augment...,W
27987,/content/drive/MyDrive/Colab Notebooks/Augment...,W
27988,/content/drive/MyDrive/Colab Notebooks/Augment...,A
27989,/content/drive/MyDrive/Colab Notebooks/Augment...,A


# **Data Normalization** ⚓

In [None]:
# To be implemented