In [None]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img, array_to_img

In [None]:
# Paths
input_dir = "drive/MyDrive/screenshots10cropped"
output_dir = "drive/MyDrive/screenshots10cropped_augmented"
label_file = os.path.join(input_dir, "labels.txt")
output_label_file = os.path.join(output_dir, "labels.txt")

In [None]:
# Make sure output folder exists
os.makedirs(output_dir, exist_ok=True)

# Load label data
df = pd.read_csv(label_file, header=None, names=["filename", "emoji", "unicode"], sep=",\s*", engine='python')

In [None]:
# ImageDataGenerator with more transformations
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    shear_range=10,
    brightness_range=[0.8, 1.2],
    fill_mode='nearest'
)

# Read existing label file if it already has previous entries
if os.path.exists(output_label_file):
    existing_labels = pd.read_csv(output_label_file, header=None, names=["filename", "emoji", "unicode"], sep=",\s*", engine='python')
else:
    existing_labels = pd.DataFrame(columns=["filename", "emoji", "unicode"])

In [None]:
print("Starting additional augmentation...")

for idx, row in df.iterrows():
    base_filename = row['filename']
    emoji = row['emoji']
    unicode_val = row['unicode']
    img_path = os.path.join(input_dir, base_filename)

    try:
        img = load_img(img_path, target_size=(64, 64))  # Optional: resize if needed
        x = img_to_array(img)
        x = x.reshape((1,) + x.shape)

        prefix = os.path.splitext(base_filename)[0]
        count = len([f for f in os.listdir(output_dir) if f.startswith(prefix + "_aug")])

        # Generate 5 *more* augmentations
        i = 0
        for batch in datagen.flow(x, batch_size=1):
            aug_filename = f"{prefix}_aug_extra{i + count}.png"
            aug_path = os.path.join(output_dir, aug_filename)
            array_to_img(batch[0]).save(aug_path)

            # Append new label
            existing_labels = pd.concat([
                existing_labels,
                pd.DataFrame([[aug_filename, emoji, unicode_val]], columns=["filename", "emoji", "unicode"])
            ], ignore_index=True)

            i += 1
            if i >= 5:  # Number of new augmentations
                break

        print(f"Augmented {base_filename} -> {i} new samples")

    except Exception as e:
        print(f"Failed to process {base_filename}: {e}")

# Save combined label file
existing_labels.to_csv(output_label_file, index=False, header=False)

print("✅ Additional augmentation complete!")

Starting additional augmentation...
Augmented screenshot_1.png -> 5 new samples
Augmented screenshot_2.png -> 5 new samples
Augmented screenshot_3.png -> 5 new samples
Augmented screenshot_4.png -> 5 new samples
Augmented screenshot_5.png -> 5 new samples
Augmented screenshot_6.png -> 5 new samples
Augmented screenshot_7.png -> 5 new samples
Augmented screenshot_8.png -> 5 new samples
Augmented screenshot_9.png -> 5 new samples
Augmented screenshot_10.png -> 5 new samples
Augmented screenshot_11.png -> 5 new samples
Augmented screenshot_12.png -> 5 new samples
Augmented screenshot_13.png -> 5 new samples
Augmented screenshot_14.png -> 5 new samples
Augmented screenshot_15.png -> 5 new samples
Augmented screenshot_16.png -> 5 new samples
Augmented screenshot_17.png -> 5 new samples
Augmented screenshot_18.png -> 5 new samples
Augmented screenshot_19.png -> 5 new samples
Augmented screenshot_20.png -> 5 new samples
Augmented screenshot_21.png -> 5 new samples
Augmented screenshot_22.png 