In [1]:
# Importy do całego projektu
import os
from IPython.display import display, clear_output
from ultralytics import YOLO
from pathlib import Path
import cv2
import numpy as np
import random
from tqdm import tqdm
import shutil

In [2]:
import torch
torch.cuda.is_available()

True

In [3]:
import kagglehub
dataset_path1 = kagglehub.dataset_download("jessicali9530/lfw-dataset")
dataset_path2 = kagglehub.dataset_download("chiragsaipanuganti/morph")
#dataset_path3 = kagglehub.dataset_download("jehanbhathena/weather-dataset")

In [4]:
# Pobranie ścieżek do obrazów
image_paths = []
datasets = [dataset_path1, dataset_path2]
for dataset_path in datasets:
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.lower().endswith(('.jpg')):
                image_paths.append(os.path.join(root, file))

In [None]:
def populate_card_labels_v3(image_path, im_width=250):
	img = cv2.imread(image_path)
	if img is None:
		return None, None
	height, width = img.shape[:2]
	im_height = int(height * im_width / width)
	img = cv2.resize(img, (im_width, im_height))
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

	diagonal_size = int(np.sqrt(im_width**2 + im_height**2)) + 10
	rot_canvas = np.ones((diagonal_size, diagonal_size, 3), dtype=np.uint8) * 255
	start_x = (diagonal_size - im_width) // 2
	start_y = (diagonal_size - im_height) // 2
	rot_canvas[start_y:start_y + im_height, start_x:start_x + im_width] = img

	rot_angle = random.uniform(-179, 180)
	center = (diagonal_size // 2, diagonal_size // 2)
	rot_matrix = cv2.getRotationMatrix2D(center, rot_angle, 1.0)
	rot_img = cv2.warpAffine(rot_canvas, rot_matrix, (diagonal_size, diagonal_size), borderValue=(255, 255, 255))

	height, width = rot_img.shape[:2]
	card = np.ones((height+10, width+10, 3), dtype=np.uint8) * 255
	card[5:5+height, 5:5+width] = rot_img

	corners = np.array([
		[start_x, start_y],
		[start_x + im_width, start_y],
		[start_x + im_width, start_y + im_height],
		[start_x, start_y + im_height]
	], dtype=np.float32)

	ones = np.ones((corners.shape[0], 1))
	corners_homo = np.hstack([corners, ones])
	rotated_corners = (rot_matrix @ corners_homo.T).T
	rotated_corners += np.array([5, 5])

	x_min, y_min = rotated_corners.min(axis=0)
	x_max, y_max = rotated_corners.max(axis=0)

	bbox_cx = (x_min + x_max) / 2 / width+5
	bbox_cy = (y_min + y_max) / 2 / height+5
	bbox_w = (x_max - x_min) / width+5
	bbox_h = (y_max - y_min) / height+5

	norm_corners = []
	for x, y in rotated_corners:
		norm_corners.extend([x / width+5, y / height+5, 2])

	label = [0, bbox_cx, bbox_cy, bbox_w, bbox_h] + norm_corners

	return card, label

def get_cards_labels(image_paths, num, im_width, output_path, starting_idx=0):
	output_path = Path(output_path)
	images_path = "images" / output_path
	labels_path = "labels" / output_path
	images_path.mkdir(parents=True, exist_ok=True)
	labels_path.mkdir(parents=True, exist_ok=True)
	cards = []
	wrong = 0
    
	for i in range(starting_idx, starting_idx+num):
		card, label = populate_card_labels_v3(
			image_paths[i],
			im_width=im_width
		)
		if card is None or label is None:
			wrong += 1
			continue

		filename = f"card_{i+1}"
		image_file = images_path / f"{filename}.jpg"
		label_file = labels_path / f"{filename}.txt"
        
		card_bgr = cv2.cvtColor(card, cv2.COLOR_RGB2BGR)
		cv2.imwrite(str(image_file), card_bgr)
        
		label_line = " ".join(f"{v:.6f}" if isinstance(v, float) else str(v) for v in label)

		with open(label_file, "w") as f:
			f.write(label_line)

		cards.append({"filename": f"{filename}.jpg", "label": label})
    
	if wrong > 0:
		print(f"Skipped {wrong} images due to errors.")
	return cards

In [None]:
shutil.rmtree("images", ignore_errors=True)
shutil.rmtree("labels", ignore_errors=True)

random.shuffle(image_paths)

i = 0
max = len(image_paths)

train_p = int(max * 0.7)
valid_p = int(max * 0.2)
test_p = max - train_p - valid_p

res = [250, 300, 150, 400, 500]
res_n = int(train_p / len(res))
res_idx = 0-res_n

iter = len(res)
progress = None
progress = tqdm(total=3*len(res), desc="Generating cards", unit="card")
for j in res:
	res_idx += res_n
	cards = get_cards_labels(image_paths, num=res_n, im_width=j, output_path='train', starting_idx=res_idx)
	progress.update(1)
res_n = int(valid_p / len(res))
for j in res:
	res_idx += res_n
	cards = get_cards_labels(image_paths, num=res_n, im_width=j, output_path='valid', starting_idx=res_idx)
	progress.update(1)
res_n = int(test_p / len(res))
for j in res:
	res_idx += res_n
	cards = get_cards_labels(image_paths, num=res_n, im_width=j, output_path='test', starting_idx=res_idx)
	progress.update(1)

if progress:
	progress.close()

Generating cards: 100%|██████████| 15/15 [01:19<00:00,  5.30s/card]


In [46]:
model = YOLO("runs/pose/image_rotation_model_train/weights/best.pt")

In [7]:
model = YOLO("yolo11x-pose.pt")

In [8]:
model.train(
    data='image_rotation.yaml',
    epochs=10,
    imgsz=416,
    batch=32,
    degrees=5.0,
    mosaic=0.0,
    auto_augment='none',
    erasing=0.0,
    cache='ram',
    name='image_rotation_model_anew',
    exist_ok=True,
)

New https://pypi.org/project/ultralytics/8.3.151 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.144 🚀 Python-3.11.9 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4070 Ti SUPER, 16376MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=none, batch=32, bgr=0.0, box=7.5, cache=ram, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=image_rotation.yaml, degrees=5.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.0, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=416, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolo11x-pose.pt, momentum=0.937, mosaic=0.0, multi_scale=False, name=image_rotation_model_anew, nb

[34m[1mtrain: [0mScanning /repos/image-corner-rotation/labels/train... 49074 images, 0 backgrounds, 0 corrupt: 100%|██████████| 49074/49074 [00:47<00:00, 1041.80it/s]


[34m[1mtrain: [0mNew cache created: /repos/image-corner-rotation/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 723.1±322.7 MB/s, size: 16.0 KB)


[34m[1mval: [0mScanning /repos/image-corner-rotation/labels/valid... 14019 images, 0 backgrounds, 0 corrupt: 100%|██████████| 14019/14019 [00:13<00:00, 1038.17it/s]


[34m[1mval: [0mNew cache created: /repos/image-corner-rotation/labels/valid.cache


[34m[1mval: [0mCaching images (6.8GB RAM): 100%|██████████| 14019/14019 [00:07<00:00, 1858.10it/s]


Plotting labels to runs/pose/image_rotation_model_anew/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 173 weight(decay=0.0), 183 weight(decay=0.0005), 182 bias(decay=0.0)
Image sizes 416 train, 416 val
Using 8 dataloader workers
Logging results to [1mruns/pose/image_rotation_model_anew[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       1/10      12.8G     0.4747      1.255     0.1708     0.2732     0.9842         18        416: 100%|██████████| 1534/1534 [09:46<00:00,  2.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [03:07<00:00,  1.17it/s]

                   all      14019      14019      0.999          1      0.995      0.868      0.903      0.904      0.871      0.782






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       2/10      13.4G     0.4175     0.8035    0.08868     0.2313     0.9289         18        416: 100%|██████████| 1534/1534 [09:38<00:00,  2.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [03:43<00:00,  1.02s/it]

                   all      14019      14019      0.999      0.999      0.995      0.887      0.944      0.944       0.92      0.884






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       3/10      13.6G     0.3689     0.6019    0.06378     0.2049     0.9068         18        416: 100%|██████████| 1534/1534 [09:14<00:00,  2.77it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [05:12<00:00,  1.42s/it]

                   all      14019      14019          1          1      0.995      0.927      0.949      0.949       0.93      0.904






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       4/10      13.6G     0.3003      0.519     0.0532     0.1767     0.8821         18        416: 100%|██████████| 1534/1534 [09:16<00:00,  2.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [05:31<00:00,  1.51s/it]

                   all      14019      14019          1          1      0.995      0.945       0.95       0.95      0.942      0.924






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       5/10      13.6G     0.2571     0.4251    0.04222     0.1522     0.8664         18        416: 100%|██████████| 1534/1534 [09:09<00:00,  2.79it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [05:02<00:00,  1.38s/it]

                   all      14019      14019          1          1      0.995      0.993      0.962      0.962       0.95      0.932






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       6/10      13.6G     0.2335     0.3745    0.03472     0.1392     0.8593         18        416: 100%|██████████| 1534/1534 [09:18<00:00,  2.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [05:02<00:00,  1.37s/it]

                   all      14019      14019          1          1      0.995      0.992      0.961      0.961      0.952      0.939






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       7/10      13.6G     0.2118     0.3424    0.03065     0.1264     0.8528         18        416: 100%|██████████| 1534/1534 [09:15<00:00,  2.76it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [05:06<00:00,  1.39s/it]

                   all      14019      14019          1          1      0.995      0.994      0.964      0.964      0.952       0.94






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       8/10      13.6G     0.1974     0.3198     0.0265     0.1172     0.8505         18        416: 100%|██████████| 1534/1534 [09:07<00:00,  2.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [05:20<00:00,  1.46s/it]

                   all      14019      14019          1          1      0.995      0.995      0.967      0.967      0.959      0.946






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       9/10      13.6G      0.181     0.2845    0.02255     0.1062     0.8461         18        416: 100%|██████████| 1534/1534 [09:11<00:00,  2.78it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [04:58<00:00,  1.36s/it]

                   all      14019      14019          1          1      0.995      0.995      0.971      0.971      0.964      0.955






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


      10/10      13.6G     0.1693     0.2624    0.02035    0.09796      0.843         18        416: 100%|██████████| 1534/1534 [09:17<00:00,  2.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [05:04<00:00,  1.39s/it]

                   all      14019      14019          1          1      0.995      0.995      0.973      0.973      0.968      0.961






10 epochs completed in 2.364 hours.
Optimizer stripped from runs/pose/image_rotation_model_anew/weights/last.pt, 118.2MB
Optimizer stripped from runs/pose/image_rotation_model_anew/weights/best.pt, 118.2MB

Validating runs/pose/image_rotation_model_anew/weights/best.pt...
Ultralytics 8.3.144 🚀 Python-3.11.9 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4070 Ti SUPER, 16376MiB)
YOLO11x-pose summary (fused): 199 layers, 58,739,959 parameters, 0 gradients, 202.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 220/220 [01:38<00:00,  2.23it/s]


                   all      14019      14019          1          1      0.995      0.995      0.973      0.973      0.968       0.96
Speed: 0.0ms preprocess, 3.0ms inference, 0.0ms loss, 0.9ms postprocess per image
Results saved to [1mruns/pose/image_rotation_model_anew[0m


ultralytics.utils.metrics.PoseMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7e26a8d02e50>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)', 'Precision-Recall(P)', 'F1-Confidence(P)', 'Precision-Confidence(P)', 'Recall-Confidence(P)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    

In [35]:
model = YOLO('runs/pose/image_rotation_model_train/weights/best.pt')

results = model.predict(
    source='images/test',
    save=True,
    conf=0.25,
    imgsz=416
)


inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

image 1/1320 /repos/image-corner-rotation/images/test/card_11906.jpg: 416x416 1 rotated_photo, 14.8ms
image 2/1320 /repos/image-corner-rotation/images/test/card_11907.jpg: 416x416 1 rotated_photo, 31.7ms
image 3/1320 /repos/image-corner-rotation/images/test/card_11908.jpg: 416x416 1 rotated_photo, 21.0ms
image 4/1320 /repos/image-corner-rotation/images/test/card_11909.jpg: 416x416 1 rotated_photo, 15.2ms
image 5/1320 /repos/image-corner-rotation/images/test/card_1