### Lo scopo di questo codice è prendere il dataset iniziale e trasformarlo tramite metodi di data augmentation in modo da migliorare le prestazioni della CNN.

### Caratteristiche del dataset iniziale: 1000 immagini di risoluzione 83x84.
### Caratteristiche del dataset finale: 1000 x (n+1) (n è un dato come argomento della funzione perform_augmentation) immagini di risoluzione 84x84 divise in 50 classi.

### Alle immagini iniziali èstato aggiunto un padding di un pixel (nero) in modo da ottenere la risoluzione di 84x84. Successivamente sono stati applicati casualmente una rotazione tra i -20 e i 20 gradi, un blur con random blur factor tra 0 e 0.1. 

### Anche le immagini iniziali sono contenute in questo dataset.

In [1]:
from pathlib import Path
from matplotlib import pyplot as plt
from PIL import Image, ImageEnhance, ImageOps, ImageChops, ImageDraw, ImageFilter, ImageTransform
import albumentations as A
import random
import shutil
import numpy as np
import cv2
import glob
import os

In [2]:
### Source of the hiragana images
destination = Path('C:/Users/andma/OneDrive/Documenti/hiragana images/hiragana_images')
# Set the source and destination directories
src_dir = r"C:\Users\andma\OneDrive\Documenti\hiragana images\hiragana_images_original\hiragana_images"
dst_dir = r"C:\Users\andma\OneDrive\Documenti\hiragana images\hiragana_images"

In [3]:
#Create the classes folders if they don't already exist
for file in os.listdir(src_dir):
    src_file = os.path.join(src_dir, file)
    name = ""
    for i in range(4, 7):
        if file[i].isdigit() == False:
            name = name + file[i]
    dst_folder_name = name  # get the letters after "kana"
    dst_folder = os.path.join(dst_dir, dst_folder_name)
    os.makedirs(dst_folder, exist_ok=True)

# Delete all the images in the folders contained in dst_dir
for folder in os.listdir(dst_dir):
    folder_path = os.path.join(dst_dir, folder)
    for file in os.listdir(folder_path):
        file_path = os.path.join(folder_path, file)
        os.remove(file_path)

# Copy all the images from src_dir to dst_dir
for file in os.listdir(src_dir):
    src_file = os.path.join(src_dir, file)
    name = ""
    for i in range(4, 7):
        if file[i].isdigit() == False:
            name = name + file[i]
    dst_folder_name = name  # get the letters after "kana"
    dst_folder = os.path.join(dst_dir, dst_folder_name)
    dst_file = os.path.join(dst_folder, file)
    shutil.copy(src_file, dst_folder)

### Codice scritto da me in precedenza (le 2 celle sotto)

In [4]:
# Obtain the complete paths for all source images
subdirect = [x for x in destination.iterdir() if x.is_dir()]

subdirect_string = [str(path) for path in subdirect]

source_images = [glob.glob(path + '/*.jpg') for path in subdirect_string]

images_path = []
for lists in source_images:
    for path in lists:
        images_path.append(path)

In [5]:
subdirect = [x for x in destination.iterdir() if x.is_dir()]

subdirect_string = [str(path) for path in subdirect]

for subfolder in subdirect_string:
    
    source_images = [glob.glob(subfolder + '/*.jpg')]  
    images_path = []
    for lists in source_images:
        for path in lists:
            images_path.append(path)
        
    for image in images_path:
        
        kana = cv2.imread(image) 
    
        # Add 1 pixel padding to the bottom of the images to obtain 84x84 pixels images
        padded_image = cv2.copyMakeBorder(kana, 0, 0, 1, 0, cv2.BORDER_CONSTANT)
        
        cv2.imwrite(image,padded_image) 

In [6]:
def augment_image(image):
    # Define data augmentation parameters
    angle = np.random.uniform(-20, 20)
    trans = {'x' : int(np.random.uniform(-10, 10)),  'y' : int(np.random.uniform(-8, 8))}
   
    # Define the sequence of augmentations
    aug = A.Compose([
        A.Affine(translate_px=trans,rotate=angle, p=1),
        A.GaussianBlur(blur_limit = [3, 5], sigma_limit = 0, always_apply=True)
    ])
    
    # Apply the augmentations
    augmented_image = aug(image=image)['image']
   
    # Return the augmented image
    return augmented_image

def perform_augmentation(dir,subdir,file, n):
    # Load the image
    image = cv2.imread(os.path.join(subdir, file))
    for i in range(0,n):
        # Perform data augmentation
        augmented_image = augment_image(image)

        # Save augmented image with a different name
        new_file_name = file.split(".")[0] + "_aug" + str(i+1) + ".jpg"
        cv2.imwrite(os.path.join(subdir, new_file_name), augmented_image)

# Iterate through all subdirectories and image files
for subdir, dirs, files in os.walk(dst_dir):
    for file in files:
        file_path = os.path.join(subdir, file)
        image = cv2.imread(file_path)
        
        # provide the directory and subdirectory where the images exist and the file name
        perform_augmentation(dirs,subdir,file, 1)

#### Tranne per le due celle che ho segnalato prima che sono state scritte interamente da me (e si vede aggiungerei), per le altre mi sono fatto aiutare da ChatGPT e poi io ho aggiunto le necessarie modifiche.