<a href="https://colab.research.google.com/github/Rad-Monk/TF-Notebooks/blob/main/M_webpage_element_identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import shutil

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
dataset_dir = "/content/drive/MyDrive/ui_elements_datasets"

In [None]:
os.listdir(dataset_dir)

In [None]:
labels_numerical = {
    "main_breadcrumbs": 0,
    "main_icons": 1,
    "main_container": 2,
    "main_modals": 3,
    "main_logos": 4,
    "main_text": 5,
    "main_input_fields": 6,
    "main_radio": 7
}

In [None]:
label_categorical = {
    0: "breadcrumbs",
    1: "icons",
    2: "container",
    3: "modals",
    4: "logos",
    5: "text",
    6: "input fields",
    7: "main_radio"
}

In [None]:
image_generator = ImageDataGenerator(
    rescale = 1./255,
    rotation_range = 10,
    width_shift_range = .5,
    height_shift_range = .5,
    shear_range = .1,
    horizontal_flip = True,
    vertical_flip = False,
    brightness_range = [.7, 1.3],
    channel_shift_range = 100.,
    zoom_range = .1
)
rescale_generator = ImageDataGenerator(
    rescale = 1./255
)

In [None]:
for folder in os.listdir(dataset_dir):
  folder_path = os.path.join(dataset_dir, folder)
  print(len(os.listdir(folder_path)), folder)

In [None]:
from PIL import Image
class_dir = '/content/drive/MyDrive/ui_elements_datasets/main_icons'

# List all files in the directory
files = os.listdir(class_dir)
print("Found files:", files)

# Sample image file path
image_path = os.path.join(class_dir, files[0])  # Assuming there is at least one file
try:
    img = Image.open(image_path)
    img.show()  # This will display the image if possible
    print("Image loaded successfully!")
except IOError:
    print("Image could not be opened!")

In [None]:
def image_generator_batch_size(folder,folder_path,prefix,batch_size, tot_img):
  image_gen = image_generator.flow_from_directory(
        f'{dataset_dir}/',
        classes = [folder],
        batch_size = batch_size,
        target_size = (224,224),
        save_to_dir = folder_path,
        save_prefix = prefix,
        save_format = "png",
        class_mode = None
        )
  for _ in range(tot_img):
    image_gen.next()

def rescale_generator_batch_size(folder,folder_path, batch_size, prefix, tot_img ):
      rescale_flow = rescale_generator.flow_from_directory(
        f'{dataset_dir}/',
        classes = [folder],
        batch_size = tot_img,
        target_size = (224,224),
        save_to_dir = folder_path,
        save_prefix = prefix,
        save_format = "png",
        class_mode = None
        )
      rescale_flow.next()



#for folder in os.listdir(dataset_dir):
 # if folder in labels_numerical:
  #  folder_path = os.path.join(dataset_dir, folder)
#
 #   tot_img = len(os.listdir(folder_path))
#
 #   if folder == "main_breadcrumbs":
  #    image_generator_batch_size(folder,folder_path,"aug_", 2, tot_img*16)
   # if folder == "main_container":
    #  image_generator_batch_size(folder,folder_path,"aug_", 2, tot_img*73)
    #if folder == "main_icons":
    #  rescale_generator_batch_size(folder,folder_path, 1, "", tot_img)
    #if folder == "main_input_fields":
    #  image_generator_batch_size(folder,folder_path,"", tot_img, 1)
    #if folder == "main_logos":
    #  rescale_generator_batch_size(folder ,folder_path, 1, "aug_", tot_img)
    #if folder == "main_modals":
    #  image_generator_batch_size(folder,folder_path,"aug_", 24, tot_img)
    #if folder == "main_radio":
    #  image_generator_batch_size(folder,folder_path,"aug_", 32, tot_img )
    #if folder == "main_text":
    #  image_generator_batch_size(folder,folder_path, "aug_",12, tot_img)



In [None]:

import random

def breadcrumb():
  folder_icon_path = os.path.join(dataset_dir, "main_breadcrumbs")
  icons = os.listdir(folder_icon_path)
  tot_icons = len(icons)
  images_to_remove = tot_icons // 2

  random.shuffle(icons)
  files_to_delete = icons[:200]

  for filename in files_to_delete:
    file_path = os.path.join(folder_icon_path, filename)
    os.remove(file_path)

def icons():
  folder_icon_path = os.path.join(dataset_dir, "main_icons")
  icons = os.listdir(folder_icon_path)
  tot_icons = len(icons)
  images_to_remove = tot_icons // 2

  random.shuffle(icons)
  files_to_delete = icons[:200]

  for filename in files_to_delete:
    file_path = os.path.join(folder_icon_path, filename)
    os.remove(file_path)

def input_fields():
  folder_icon_path = os.path.join(dataset_dir, "main_input_fields")
  icons = os.listdir(folder_icon_path)
  tot_icons = len(icons)
  images_to_remove = tot_icons // 2

  random.shuffle(icons)
  files_to_delete = icons[:200]

  for filename in files_to_delete:
    file_path = os.path.join(folder_icon_path, filename)
    os.remove(file_path)

In [None]:
dataset_path = "/content/dataset"
os.makedirs(dataset_path, exist_ok = True)

In [None]:
os.makedirs(f"{dataset_path}/train", exist_ok = True)
os.makedirs(f"{dataset_path}/val", exist_ok = True)
os.makedirs(f"{dataset_path}/test", exist_ok = True)

In [None]:
train_ds_path = os.path.join(dataset_path, "train")
val_ds_path = os.path.join(dataset_path, "val")
test_ds_path = os.path.join(dataset_path, "test")

In [None]:
os.listdir(dataset_path)

In [None]:
ratios = [.7,.1,.2]

for folder_name in os.listdir(dataset_dir):
  folder_path = os.path.join(dataset_dir, folder_name)
  files = os.listdir(folder_path)
  random.shuffle(files)

  start_train = 0
  end_train = int(len(files) * ratios[0])
  start_val = end_train
  end_val = start_val + int(len(files) * ratios[1])

  for i,filename in enumerate(files):
    src_path = os.path.join(folder_path, filename)
    if i >= start_train and i < end_train:
      dest_path = os.path.join(train_ds_path, folder_name, filename)
    elif i>= start_val and i< end_val:
      dest_path = os.path.join(val_ds_path, folder_name, filename)
    else:
      dest_path = os.path.join(test_ds_path, folder_name, filename)

    os.makedirs(os.path.dirname(dest_path), exist_ok = True)
    shutil.copyfile(src_path, dest_path)



In [None]:
len(os.listdir(f"{train_ds_path}/main_icons"))

In [None]:
from PIL import Image

image_path = f"{train_ds_path}/main_icons/24-hours-fill.png"
image = Image.open(image_path)
width, height = image.size
print(f"Image Scale (Width x Height): {width} x {height} pixels")


In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras import losses

In [None]:
batch_size = 32
target_size = (224,224)

train_datagen = ImageDataGenerator(rescale = 1./255)
val_datagen = ImageDataGenerator(rescale = 1./255)
test_datagen = ImageDataGenerator(rescale = 1./255)

train_generator = train_datagen.flow_from_directory(
    train_ds_path,
    batch_size = batch_size,
    target_size = target_size,
    class_mode = "sparse"
)
val_generator = val_datagen.flow_from_directory(
    val_ds_path,
    batch_size = batch_size,
    target_size = target_size,
    class_mode = "sparse"
)
test_generator = test_datagen.flow_from_directory(
    test_ds_path,
    batch_size = batch_size,
    target_size = target_size,
    class_mode = "sparse"
)

In [None]:
test_ds = test_generator

In [None]:
len(test_ds)

In [None]:
import glob
def list_all_files(directory_path):
  return glob.glob(f"{directory_path}/**/*", recursive = True)

all_files = list_all_files(train_ds_path)
train_images = [file for file in all_files if os.path.isfile(file)]

print(len(train_images))

In [None]:
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=target_size + (3,)),
    MaxPooling2D(),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(),
    Flatten(),
    Dense(64, activation='relu'),
    Dense(train_generator.num_classes, activation='softmax')
])


In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate = .001)
model.compile(
    loss = "SparseCategoricalCrossentropy",
    metrics = "accuracy",
    optimizer = optimizer
)


In [None]:
import math
from tensorflow.keras.callbacks import ModelCheckpoint

checkpoint_dir = 'ui_detection_model'
os.makedirs(checkpoint_dir, exist_ok=True)
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch:04d}_batch_{batch:05d}")

class CustomCheckpoint(tf.keras.callbacks.Callback):
  def __init__(self, save_freq, checkpoint_prefix):
    super(CustomCheckpoint, self).__init__()
    self.save_freq = save_freq
    self.checkpoint_prefix = checkpoint_prefix
    self.batch = 0
    self.epoch = 0

  def on_epoch_begin(self, epoch, logs=None):
        self.epoch = epoch + 1

  def on_train_batch_end(self, batch, logs = None):
    self.batch += 1
    if self.batch % self.save_freq == 0:
      checkpoint = tf.train.Checkpoint(model=self.model, optimizer=self.model.optimizer)
      checkpoint.save(file_prefix=self.checkpoint_prefix.format(epoch=self.epoch, batch=self.batch))


n_batch = math.ceil(len(train_images) / batch_size)

def load_latest_checkpoint(model, optmizer, checkpoint_dir):
  latest_checkpoint = tf.train.latest_checkpoint(checkpoint_dir)
  if latest_checkpoint:
    print(f"loading checkpoint: {latest_checkpoint}....")
    checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer)
    checkpoint.restore(latest_checkpoint).expect_partial()
    parts = latest_checkpoint.split('/')[1].split('_')
    print(parts)
    epoch = int(parts[1])
    batch = int(parts[3][:-2])
    print(f"loaded checkpoint at epoch {epoch} and batch {batch}")
    return epoch, batch
  else:
    print("no checkpoint found, starting from scratch")
    return 1, 0

intial_epoch, initial_batch = load_latest_checkpoint(model, optimizer, checkpoint_dir)

custom_cp_callback = CustomCheckpoint(save_freq = 5, checkpoint_prefix = checkpoint_prefix)

epoch = 10

history = model.fit(
    train_generator,
    initial_epoch = intial_epoch - 1,
    epochs = epoch,
    validation_data = val_generator,
    callbacks = [custom_cp_callback]
)


In [None]:

def delete_checkpoint_files():
    checkpoint_pattern = "/content/ui_detection_model/ckpt_0010*"  # Pattern to match files starting with ckpt-0001 to ckpt-0006

    # List all files matching the pattern
    matching_files = glob.glob(checkpoint_pattern)

    # Delete each matching file
    for file in matching_files:
        try:
            os.remove(file)
            print(f"Deleted file: {file}")
        except OSError as e:
            print(f"Error deleting file {file}: {e}")

# Example usage:
delete_checkpoint_files()

In [None]:
def save_model_locally(model, save_path):
    try:
        # Save the model
        model.save(save_path)
        print(f"Model saved successfully at: {save_path}")
    except Exception as e:
        print(f"Error saving model: {str(e)}")

In [None]:
save_model_locally(model, "no_tf_ui_classifier")

In [None]:
loss,acc= model.evaluate(test_generator)

In [None]:
print(acc)