In [15]:
# Importy do całego projektu
import os
from IPython.display import display, clear_output
from ultralytics import YOLO
from pathlib import Path
import cv2
import numpy as np
import random
from tqdm import tqdm
import shutil

In [2]:
import torch
torch.cuda.is_available()

True

In [9]:
import kagglehub
dataset_path = kagglehub.dataset_download("jessicali9530/lfw-dataset")
#dataset_path = kagglehub.dataset_download("chiragsaipanuganti/morph")

In [10]:
# Pobranie ścieżek do obrazów
image_paths = []
for root, dirs, files in os.walk(dataset_path):
    for file in files:
        if file.lower().endswith(('.jpg')):
            image_paths.append(os.path.join(root, file))

In [11]:
def populate_card_labels_v3(image_path, height=400, width=400, im_width=250, im_height=250):
	card = np.ones((height, width, 3), dtype=np.uint8) * 255
	img = cv2.imread(image_path)
	img = cv2.resize(img, (im_width, im_height))
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

	diagonal_size = int(np.sqrt(im_width**2 + im_height**2)) + 10
	rot_canvas = np.ones((diagonal_size, diagonal_size, 3), dtype=np.uint8) * 255
	start_x = (diagonal_size - im_width) // 2
	start_y = (diagonal_size - im_height) // 2
	rot_canvas[start_y:start_y + im_height, start_x:start_x + im_width] = img

	rot_angle = random.uniform(-179, 180)
	center = (diagonal_size // 2, diagonal_size // 2)
	rot_matrix = cv2.getRotationMatrix2D(center, rot_angle, 1.0)
	rot_img = cv2.warpAffine(rot_canvas, rot_matrix, (diagonal_size, diagonal_size), borderValue=(255, 255, 255))

	card_x = (width - diagonal_size) // 2
	card_y = (height - diagonal_size) // 2
	card[card_y:card_y+diagonal_size, card_x:card_x+diagonal_size] = rot_img

	corners = np.array([
		[start_x, start_y],
		[start_x + im_width, start_y],
		[start_x + im_width, start_y + im_height],
		[start_x, start_y + im_height]
	], dtype=np.float32)

	ones = np.ones((corners.shape[0], 1))
	corners_homo = np.hstack([corners, ones])
	rotated_corners = (rot_matrix @ corners_homo.T).T
	rotated_corners += np.array([card_x, card_y])

	x_min, y_min = rotated_corners.min(axis=0)
	x_max, y_max = rotated_corners.max(axis=0)

	bbox_cx = (x_min + x_max) / 2 / width
	bbox_cy = (y_min + y_max) / 2 / height
	bbox_w = (x_max - x_min) / width
	bbox_h = (y_max - y_min) / height

	norm_corners = []
	for x, y in rotated_corners:
		norm_corners.extend([x / width, y / height, 2])

	label = [0, bbox_cx, bbox_cy, bbox_w, bbox_h] + norm_corners

	return card, label

def get_cards_labels(image_paths, num, im_width, im_height, output_path, starting_idx=0):
	output_path = Path(output_path)
	images_path = "images" / output_path
	labels_path = "labels" / output_path
	images_path.mkdir(parents=True, exist_ok=True)
	labels_path.mkdir(parents=True, exist_ok=True)
	cards = []
    
	for i in range(starting_idx, starting_idx+num):
		card, label = populate_card_labels_v3(
			image_paths[i],
			im_width=im_width,
			im_height=im_height
		)

		filename = f"card_{i+1}"
		image_file = images_path / f"{filename}.jpg"
		label_file = labels_path / f"{filename}.txt"
        
		card_bgr = cv2.cvtColor(card, cv2.COLOR_RGB2BGR)
		cv2.imwrite(str(image_file), card_bgr)
        
		label_line = " ".join(f"{v:.6f}" if isinstance(v, float) else str(v) for v in label)

		with open(label_file, "w") as f:
			f.write(label_line)

		cards.append({"filename": f"{filename}.jpg", "label": label})
    
	return cards

In [None]:
shutil.rmtree("images", ignore_errors=True)
shutil.rmtree("labels", ignore_errors=True)

random.shuffle(image_paths)

i = 0
max = len(image_paths)

train_p = int(max * 0.7)
valid_p = int(max * 0.2)
test_p = max - train_p - valid_p

res = [250, 200, 150, 175, 275]
res_n = int(train_p / len(res))
res_idx = 0

iter = len(res)
progress = None
progress = tqdm(total=3*len(res), desc="Generating cards", unit="card")
for j in res:
	cards = get_cards_labels(image_paths, num=res_n, im_width=j, im_height=j, output_path='train', starting_idx=res_idx)
	progress.update(1)
	res_idx += res_n
res_n = int(valid_p / len(res))
for j in res:
	cards = get_cards_labels(image_paths, num=res_n, im_width=j, im_height=j, output_path='valid', starting_idx=res_idx)
	progress.update(1)
	res_idx += res_n
res_n = int(test_p / len(res))
for j in res:
	cards = get_cards_labels(image_paths, num=res_n, im_width=j, im_height=j, output_path='test', starting_idx=res_idx)
	progress.update(1)
	res_idx += res_n

if progress:
	progress.close()

Generating cards: 100%|██████████| 15/15 [04:16<00:00, 17.12s/card]
Generating cards: 100%|██████████| 15/15 [00:15<00:00,  2.32card/s]

In [None]:
model = YOLO("runs/pose/image_rotation_model_main/weights/best.pt")

In [None]:
model.train(
    data='image_rotation.yaml',
    epochs=10,
    imgsz=416,
    batch=32,
    degrees=5.0,
    mosaic=0.0,
    auto_augment='none',
    erasing=0.0,
    cache='ram',
    name='image_rotation_model_train',
    exist_ok=True,
)

New https://pypi.org/project/ultralytics/8.3.151 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.144 🚀 Python-3.11.9 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4070 Ti SUPER, 16376MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=none, batch=32, bgr=0.0, box=7.5, cache=ram, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=image_rotation.yaml, degrees=5.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.0, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=416, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=runs/pose/image_rotation_model6/weights/best.pt, momentum=0.937, mosaic=0.0, multi_scale=False, n

[34m[1mtrain: [0mScanning /repos/image-corner-rotation/labels/train_v3.cache... 2500 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2500/2500 [00:00<?, ?it/s]




[34m[1mtrain: [0mCaching images (1.0GB RAM):  82%|████████▏ | 2050/2500 [00:01<00:00, 1727.04it/s]


KeyboardInterrupt: 

In [None]:
model = YOLO('runs/pose/image_rotation_model3/weights/best.pt')

results = model.predict(
    source='images/test_v3',
    save=True,
    conf=0.25,
    imgsz=320
)