# Pipeline for Creating Training Images

Code for Tile Splitting (2x2) with Adjustments on Bounding Boxes (50% Tile Splitting, 50% original)

NOTE: Augmentation is being done in Roboflow!
For Tiles the Augmentation can be found here: https://app.roboflow.com/footballai-xndiy/1-2-tileaugmentation/1

For Original Frames the Augmentation can be found here: https://app.roboflow.com/footballai-xndiy/2-2-originalaugmentation/1



# Selection of 45% Images for Tile Splitting


In [6]:
import os
import random
import shutil

# 🗂️ Pfade definieren
input_images_dir = "raw_training_images/images"
input_labels_dir = "raw_training_images/labels"
output_tiles_images_dir = "train_tiles/images"
output_tiles_labels_dir = "train_tiles/labels"
output_non_tiles_images_dir = "non_tiles/images"
output_non_tiles_labels_dir = "non_tiles/labels"

# 🔹 Ordner erstellen
os.makedirs(output_tiles_images_dir, exist_ok=True)
os.makedirs(output_tiles_labels_dir, exist_ok=True)
os.makedirs(output_non_tiles_images_dir, exist_ok=True)
os.makedirs(output_non_tiles_labels_dir, exist_ok=True)

# 📌 Alle Bilder auflisten & zufällig 45% für Tile-Splitting auswählen
all_images = [f for f in os.listdir(input_images_dir) if f.endswith(".jpg") or f.endswith(".png")]
random.shuffle(all_images)

split_2x2 = set(all_images[:int(len(all_images) * 0.45)])  # 45% der Bilder für Tile-Splitting

print(f"📌 Gesamtanzahl Bilder: {len(all_images)}")
print(f"🔹 {len(split_2x2)} Bilder für Tile-Splitting ausgewählt.")
print(f"🔹 {len(all_images) - len(split_2x2)} Bilder verbleiben in non_tiles.")

# 📌 Bilder kopieren
tiles_count = 0
non_tiles_count = 0

for img in all_images:
    label_file = img.replace(".jpg", ".txt").replace(".png", ".txt")

    if img in split_2x2:
        # → in tiles kopieren
        shutil.copy(os.path.join(input_images_dir, img), os.path.join(output_tiles_images_dir, img))
        if os.path.exists(os.path.join(input_labels_dir, label_file)):
            shutil.copy(os.path.join(input_labels_dir, label_file), os.path.join(output_tiles_labels_dir, label_file))
        tiles_count += 1
    else:
        # → in non_tiles kopieren
        shutil.copy(os.path.join(input_images_dir, img), os.path.join(output_non_tiles_images_dir, img))
        if os.path.exists(os.path.join(input_labels_dir, label_file)):
            shutil.copy(os.path.join(input_labels_dir, label_file), os.path.join(output_non_tiles_labels_dir, label_file))
        non_tiles_count += 1

# ✅ Abschlussmeldung
print("\n✅ Kopiervorgang abgeschlossen.")
print(f"📁 Tiles-Bilder:     {tiles_count}")
print(f"📁 Non-Tiles-Bilder: {non_tiles_count}")


📌 Gesamtanzahl Bilder: 1248
🔹 561 Bilder für Tile-Splitting ausgewählt.
🔹 687 Bilder verbleiben in non_tiles.

✅ Kopiervorgang abgeschlossen.
📁 Tiles-Bilder:     561
📁 Non-Tiles-Bilder: 687


# ~~~~~~~~~~~~~~~~~~~~~~~

# Tile Splitting
Execution of Tile Splitting on Images in train_tiles folder.
Therefore only Tiles with an object are respected and split into tiles and then resized.
For the non-tile images only a resizing to 640px is done.

Later a Folder roboflow_images is created. The content of this folder can be found on the upper linked roboflow projects where the augmentation is being carried out.

In [7]:
import os
import cv2

# 📁 Eingabeordner für vollständige Bilder + Labels
input_tiles_images_dir = "train_tiles/images"
input_tiles_labels_dir = "train_tiles/labels"

# 📁 Zielordner für erzeugte Tiles
output_tiles_images_dir = "roboflow_images/tiles_resized/images"
output_tiles_labels_dir = "roboflow_images/tiles_resized/labels"
os.makedirs(output_tiles_images_dir, exist_ok=True)
os.makedirs(output_tiles_labels_dir, exist_ok=True)

def split_image_and_adjust_labels(image_path, label_path, output_img_dir, output_lbl_dir):
    img = cv2.imread(image_path)
    h, w, _ = img.shape
    tile_size = 640

    base_name = os.path.splitext(os.path.basename(image_path))[0]
    label_data = []

    if os.path.exists(label_path):
        with open(label_path, 'r') as f:
            label_data = [line.strip().split() for line in f.readlines()]

    if not label_data:
        return 0, 0  # Kein Objekt → überspringen

    tile_coords = [(0, 0), (0, w//2), (h//2, 0), (h//2, w//2)]
    saved_tiles = 0

    for i, (ty, tx) in enumerate(tile_coords):
        tile = img[int(ty):int(ty+tile_size), int(tx):int(tx+tile_size)]
        tile_name = f"{base_name}_tile{i}.jpg"

        new_labels = []
        contains_ball = False

        for label in label_data:
            cls, x, y, bw, bh = map(float, label)
            x_abs, y_abs = x * w, y * h
            bw_abs, bh_abs = bw * w, bh * h

            x_min, x_max = x_abs - bw_abs / 2, x_abs + bw_abs / 2
            y_min, y_max = y_abs - bh_abs / 2, y_abs + bh_abs / 2

            if x_max > tx and x_min < tx + tile_size and y_max > ty and y_min < ty + tile_size:
                x_min = max(x_min, tx)
                x_max = min(x_max, tx + tile_size)
                y_min = max(y_min, ty)
                y_max = min(y_max, ty + tile_size)

                new_x = ((x_min + x_max) / 2 - tx) / tile_size
                new_y = ((y_min + y_max) / 2 - ty) / tile_size
                new_bw = (x_max - x_min) / tile_size
                new_bh = (y_max - y_min) / tile_size

                if new_bw > 0 and new_bh > 0:
                    new_labels.append(f"{int(cls)} {new_x:.6f} {new_y:.6f} {new_bw:.6f} {new_bh:.6f}\n")
                    if int(cls) == 0:
                        contains_ball = True

        if contains_ball:
            cv2.imwrite(os.path.join(output_img_dir, tile_name), tile)
            with open(os.path.join(output_lbl_dir, tile_name.replace(".jpg", ".txt")), 'w') as f:
                f.writelines(new_labels)
            saved_tiles += 1

    return 1, saved_tiles  # 1 verarbeitetes Bild, n erzeugte Tiles

# 🔁 Verarbeitung starten
tile_images = [f for f in os.listdir(input_tiles_images_dir) if f.endswith(('.jpg', '.png'))]
total_processed = 0
total_generated_tiles = 0
total_skipped = 0

for img_file in tile_images:
    img_path = os.path.join(input_tiles_images_dir, img_file)
    label_path = os.path.join(input_tiles_labels_dir, img_file.replace('.jpg', '.txt').replace('.png', '.txt'))

    processed, generated = split_image_and_adjust_labels(img_path, label_path, output_tiles_images_dir, output_tiles_labels_dir)
    if processed == 0:
        total_skipped += 1
    total_processed += processed
    total_generated_tiles += generated

# 📊 Statistik
print("\n✅ Tile-Splitting abgeschlossen!")
print(f"🔍 Ursprünglich verarbeitete Bilder: {len(tile_images)}")
print(f"🚫 Bilder ohne Objekte (verworfen): {total_skipped}")
print(f"🧩 Erwartete Tiles (max): {len(tile_images) * 4}")
print(f"📦 Tatsächlich erzeugte Tiles (mit Ball): {total_generated_tiles}")



✅ Tile-Splitting abgeschlossen!
🔍 Ursprünglich verarbeitete Bilder: 561
🚫 Bilder ohne Objekte (verworfen): 0
🧩 Erwartete Tiles (max): 2244
📦 Tatsächlich erzeugte Tiles (mit Ball): 474


In [8]:
import os
import cv2

input_images_dir = "non_tiles/images"
input_labels_dir = "non_tiles/labels"
output_resized_images_dir = "roboflow_images/original_resized/images"
output_resized_labels_dir = "roboflow_images/original_resized/labels"
os.makedirs(output_resized_images_dir, exist_ok=True)
os.makedirs(output_resized_labels_dir, exist_ok=True)

def resize_image_and_adjust_labels(image_path, label_path, output_img_dir, output_lbl_dir):
    img = cv2.imread(image_path)
    h, w, _ = img.shape
    target_size = 640

    if w > h:
        crop_x = (w - h) // 2
        crop_y = 0
        crop_w = h
        crop_h = h
    else:
        crop_x = 0
        crop_y = (h - w) // 2
        crop_w = w
        crop_h = w

    cropped_img = img[crop_y:crop_y + crop_h, crop_x:crop_x + crop_w]
    resized_img = cv2.resize(cropped_img, (target_size, target_size))
    base_name = os.path.basename(image_path)

    if os.path.exists(label_path):
        with open(label_path, 'r') as f:
            labels = [line.strip().split() for line in f.readlines()]

        if not labels:
            return 0

        new_labels = []
        scale_x = target_size / crop_w
        scale_y = target_size / crop_h

        for label in labels:
            cls, x, y, bw, bh = map(float, label)
            x_abs = x * w - crop_x
            y_abs = y * h - crop_y
            bw_abs = bw * w
            bh_abs = bh * h

            x_abs *= scale_x
            y_abs *= scale_y
            bw_abs *= scale_x
            bh_abs *= scale_y

            new_x = x_abs / target_size
            new_y = y_abs / target_size
            new_bw = bw_abs / target_size
            new_bh = bh_abs / target_size

            if 0 <= new_x <= 1 and 0 <= new_y <= 1 and new_bw > 0 and new_bh > 0:
                new_labels.append(f"{int(cls)} {new_x:.6f} {new_y:.6f} {new_bw:.6f} {new_bh:.6f}\n")

        cv2.imwrite(os.path.join(output_img_dir, base_name), resized_img)
        with open(os.path.join(output_lbl_dir, base_name.replace('.jpg', '.txt').replace('.png', '.txt')), 'w') as f:
            f.writelines(new_labels)
        return 1

    return 0

# 🔁 Verarbeitung
images = [f for f in os.listdir(input_images_dir) if f.endswith(('.jpg', '.png'))]
saved = 0

for img_file in images:
    img_path = os.path.join(input_images_dir, img_file)
    label_path = os.path.join(input_labels_dir, img_file.replace('.jpg', '.txt').replace('.png', '.txt'))
    saved += resize_image_and_adjust_labels(img_path, label_path, output_resized_images_dir, output_resized_labels_dir)

print("\n✅ Resize abgeschlossen!")
print(f"🖼️ Ursprünglich: {len(images)} Bilder")
print(f"🎯 Gespeichert (mit gültigen Labels): {saved}")
print("📐 Bilder wurden korrekt gecropped & rescaled.")



✅ Resize abgeschlossen!
🖼️ Ursprünglich: 687 Bilder
🎯 Gespeichert (mit gültigen Labels): 687
📐 Bilder wurden korrekt gecropped & rescaled.


# Resize der valid und test images
Validation und Test Images werden auch auf 640 x 640 gecropped, um konsistenz zu gewährleisten

In [9]:
import os
import cv2
import numpy as np

# 🗂️ Pfade definieren
input_val_images_dir = "../valid_original/images"
input_val_labels_dir = "../valid_original/labels"
output_val_images_dir = "../valid/images"
output_val_labels_dir = "../valid/labels"

input_test_images_dir = "../test_original/images"
input_test_labels_dir = "../test_original/labels"
output_test_images_dir = "../test/images"
output_test_labels_dir = "../test/labels"

# 🔹 Ordner für das Resizing erstellen
os.makedirs(output_val_images_dir, exist_ok=True)
os.makedirs(output_val_labels_dir, exist_ok=True)
os.makedirs(output_test_images_dir, exist_ok=True)
os.makedirs(output_test_labels_dir, exist_ok=True)

# 📌 Funktion zum Croppen mit Bounding Box Anpassung
def crop_image_and_adjust_labels(image_path, label_path, output_img_dir, output_lbl_dir):
    img = cv2.imread(image_path)
    h, w, _ = img.shape

    target_size = 640  # Endgröße

    # Prüfen, ob Bild größer ist als 640x640
    if w < target_size or h < target_size:
        print(f"⚠️ {image_path} ist kleiner als 640x640. Übersprungen.")
        return

    # Berechnung der Crop-Koordinaten für zentriertes Cropping
    crop_x = (w - target_size) // 2
    crop_y = (h - target_size) // 2
    cropped_img = img[crop_y:crop_y + target_size, crop_x:crop_x + target_size]

    base_name = os.path.basename(image_path)

    # Labels anpassen
    if os.path.exists(label_path):
        with open(label_path, 'r') as f:
            labels = [line.strip().split() for line in f.readlines()]
        
        if not labels:
            return  # Falls keine Objekte vorhanden sind, wird das Bild verworfen
        
        new_labels = []
        for label in labels:
            cls, x, y, bw, bh = map(float, label)
            x_abs = x * w
            y_abs = y * h
            bw_abs = bw * w
            bh_abs = bh * h

            # Cropping-Anpassung für Bounding Boxes
            x_abs -= crop_x
            y_abs -= crop_y

            # Bounding Box in 640x640 umrechnen
            new_x = x_abs / target_size
            new_y = y_abs / target_size
            new_bw = bw_abs / target_size
            new_bh = bh_abs / target_size

            # Sicherstellen, dass Bounding Boxes im validen Bereich liegen
            if 0 <= new_x <= 1 and 0 <= new_y <= 1 and 0 < new_bw <= 1 and 0 < new_bh <= 1:
                new_labels.append(f"{int(cls)} {new_x:.6f} {new_y:.6f} {new_bw:.6f} {new_bh:.6f}\n")

        # Speichern des Bildes & Labels
        cv2.imwrite(os.path.join(output_img_dir, base_name), cropped_img)
        with open(os.path.join(output_lbl_dir, base_name.replace('.jpg', '.txt').replace('.png', '.txt')), 'w') as f:
            f.writelines(new_labels)

print("✅ Cropping ohne Padding erfolgreich!")

# 🔄 Resize alle Validation-Bilder
val_images = [f for f in os.listdir(input_val_images_dir) if f.endswith(('.jpg', '.png'))]
for img_file in val_images:
    img_path = os.path.join(input_val_images_dir, img_file)
    label_path = os.path.join(input_val_labels_dir, img_file.replace('.jpg', '.txt').replace('.png', '.txt'))
    crop_image_and_adjust_labels(img_path, label_path, output_val_images_dir, output_val_labels_dir)

print("✅ Validation-Bilder wurden korrekt gecropped!")

# 🔄 Resize alle Test-Bilder
test_images = [f for f in os.listdir(input_test_images_dir) if f.endswith(('.jpg', '.png'))]
for img_file in test_images:
    img_path = os.path.join(input_test_images_dir, img_file)
    label_path = os.path.join(input_test_labels_dir, img_file.replace('.jpg', '.txt').replace('.png', '.txt'))
    crop_image_and_adjust_labels(img_path, label_path, output_test_images_dir, output_test_labels_dir)

print("✅ Test-Bilder wurden korrekt gecropped!")


✅ Cropping ohne Padding erfolgreich!
✅ Validation-Bilder wurden korrekt gecropped!
✅ Test-Bilder wurden korrekt gecropped!
