In [15]:
# Importy do całego projektu
import os
from IPython.display import display, clear_output
from ultralytics import YOLO
from pathlib import Path
import cv2
import numpy as np
import random
from tqdm import tqdm
import shutil

In [2]:
import torch
torch.cuda.is_available()

True

In [None]:
import kagglehub
dataset_path1 = kagglehub.dataset_download("jessicali9530/lfw-dataset")
dataset_path2 = kagglehub.dataset_download("chiragsaipanuganti/morph")
dataset_path3 = kagglehub.dataset_download("jehanbhathena/weather-dataset")

In [None]:
# Pobranie ścieżek do obrazów
image_paths = []
datasets = [dataset_path1, dataset_path2, dataset_path3]
for dataset_path in datasets:
    for root, dirs, files in os.walk(dataset_path):
        for file in files:
            if file.lower().endswith(('.jpg')):
                image_paths.append(os.path.join(root, file))

In [44]:
def populate_card_labels_v3(image_path, height=400, width=400, im_width=250, im_height=250):
	card = np.ones((height, width, 3), dtype=np.uint8) * 255
	img = cv2.imread(image_path)
	if img is None:
		return None, None
	img = cv2.resize(img, (im_width, im_height))
	img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

	diagonal_size = int(np.sqrt(im_width**2 + im_height**2)) + 10
	rot_canvas = np.ones((diagonal_size, diagonal_size, 3), dtype=np.uint8) * 255
	start_x = (diagonal_size - im_width) // 2
	start_y = (diagonal_size - im_height) // 2
	rot_canvas[start_y:start_y + im_height, start_x:start_x + im_width] = img

	rot_angle = random.uniform(-179, 180)
	center = (diagonal_size // 2, diagonal_size // 2)
	rot_matrix = cv2.getRotationMatrix2D(center, rot_angle, 1.0)
	rot_img = cv2.warpAffine(rot_canvas, rot_matrix, (diagonal_size, diagonal_size), borderValue=(255, 255, 255))

	card_x = (width - diagonal_size) // 2
	card_y = (height - diagonal_size) // 2
	card[card_y:card_y+diagonal_size, card_x:card_x+diagonal_size] = rot_img

	corners = np.array([
		[start_x, start_y],
		[start_x + im_width, start_y],
		[start_x + im_width, start_y + im_height],
		[start_x, start_y + im_height]
	], dtype=np.float32)

	ones = np.ones((corners.shape[0], 1))
	corners_homo = np.hstack([corners, ones])
	rotated_corners = (rot_matrix @ corners_homo.T).T
	rotated_corners += np.array([card_x, card_y])

	x_min, y_min = rotated_corners.min(axis=0)
	x_max, y_max = rotated_corners.max(axis=0)

	bbox_cx = (x_min + x_max) / 2 / width
	bbox_cy = (y_min + y_max) / 2 / height
	bbox_w = (x_max - x_min) / width
	bbox_h = (y_max - y_min) / height

	norm_corners = []
	for x, y in rotated_corners:
		norm_corners.extend([x / width, y / height, 2])

	label = [0, bbox_cx, bbox_cy, bbox_w, bbox_h] + norm_corners

	return card, label

def get_cards_labels(image_paths, num, im_width, im_height, output_path, starting_idx=0):
	output_path = Path(output_path)
	images_path = "images" / output_path
	labels_path = "labels" / output_path
	images_path.mkdir(parents=True, exist_ok=True)
	labels_path.mkdir(parents=True, exist_ok=True)
	cards = []
	wrong = 0
    
	for i in range(starting_idx, starting_idx+num):
		card, label = populate_card_labels_v3(
			image_paths[i],
			im_width=im_width,
			im_height=im_height
		)
		if card is None or label is None:
			wrong += 1
			continue

		filename = f"card_{i+1}"
		image_file = images_path / f"{filename}.jpg"
		label_file = labels_path / f"{filename}.txt"
        
		card_bgr = cv2.cvtColor(card, cv2.COLOR_RGB2BGR)
		cv2.imwrite(str(image_file), card_bgr)
        
		label_line = " ".join(f"{v:.6f}" if isinstance(v, float) else str(v) for v in label)

		with open(label_file, "w") as f:
			f.write(label_line)

		cards.append({"filename": f"{filename}.jpg", "label": label})
    
	if wrong > 0:
		print(f"Skipped {wrong} images due to errors.")
	return cards

In [45]:
shutil.rmtree("images", ignore_errors=True)
shutil.rmtree("labels", ignore_errors=True)

random.shuffle(image_paths)

i = 0
max = len(image_paths)

train_p = int(max * 0.7)
valid_p = int(max * 0.2)
test_p = max - train_p - valid_p

res = [250, 200, 150, 175, 275]
res_n = int(train_p / len(res))
res_idx = 0-res_n

iter = len(res)
progress = None
progress = tqdm(total=3*len(res), desc="Generating cards", unit="card")
for j in res:
	res_idx += res_n
	cards = get_cards_labels(image_paths, num=res_n, im_width=j, im_height=j, output_path='train', starting_idx=res_idx)
	progress.update(1)
res_n = int(valid_p / len(res))
for j in res:
	res_idx += res_n
	cards = get_cards_labels(image_paths, num=res_n, im_width=j, im_height=j, output_path='valid', starting_idx=res_idx)
	progress.update(1)
res_n = int(test_p / len(res))
for j in res:
	res_idx += res_n
	cards = get_cards_labels(image_paths, num=res_n, im_width=j, im_height=j, output_path='test', starting_idx=res_idx)
	progress.update(1)

if progress:
	progress.close()

Generating cards:   0%|          | 0/15 [03:45<?, ?card/s]
Generating cards:  20%|██        | 3/15 [00:06<00:26,  2.24s/card]

Skipped 1 images due to errors.


Generating cards: 100%|██████████| 15/15 [00:16<00:00,  1.09s/card]


In [46]:
model = YOLO("runs/pose/image_rotation_model_train/weights/best.pt")

In [None]:
model = YOLO("yolo11x-pose.pt")

In [47]:
model.train(
    data='image_rotation.yaml',
    epochs=10,
    imgsz=416,
    batch=32,
    degrees=5.0,
    mosaic=0.0,
    auto_augment='none',
    erasing=0.0,
    cache='ram',
    name='image_rotation_model_train2',
    exist_ok=True,
)

New https://pypi.org/project/ultralytics/8.3.151 available 😃 Update with 'pip install -U ultralytics'
Ultralytics 8.3.144 🚀 Python-3.11.9 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4070 Ti SUPER, 16376MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=none, batch=32, bgr=0.0, box=7.5, cache=ram, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=image_rotation.yaml, degrees=5.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=10, erasing=0.0, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=416, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=runs/pose/image_rotation_model_train/weights/best.pt, momentum=0.937, mosaic=0.0, multi_scale=Fals

[34m[1mtrain: [0mScanning /repos/image-corner-rotation/labels/train... 4799 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4799/4799 [00:04<00:00, 979.26it/s] 


[34m[1mtrain: [0mNew cache created: /repos/image-corner-rotation/labels/train.cache


[34m[1mtrain: [0mCaching images (2.3GB RAM): 100%|██████████| 4799/4799 [00:03<00:00, 1589.68it/s]

[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))





[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 171.1±159.9 MB/s, size: 30.8 KB)


[34m[1mval: [0mScanning /repos/image-corner-rotation/labels/valid... 1370 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1370/1370 [00:02<00:00, 682.42it/s]

[34m[1mval: [0mNew cache created: /repos/image-corner-rotation/labels/valid.cache



[34m[1mval: [0mCaching images (0.7GB RAM): 100%|██████████| 1370/1370 [00:01<00:00, 744.19it/s]


Plotting labels to runs/pose/image_rotation_model_train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 173 weight(decay=0.0), 183 weight(decay=0.0005), 182 bias(decay=0.0)
Image sizes 416 train, 416 val
Using 8 dataloader workers
Logging results to [1mruns/pose/image_rotation_model_train[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       1/10      15.2G     0.2349      4.421     0.1687     0.1432     0.8739         31        416: 100%|██████████| 150/150 [02:16<00:00,  1.10it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:53<00:00,  2.41s/it]

                   all       1370       1370      0.674      0.599      0.666      0.344      0.367      0.418      0.307      0.149






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       2/10      13.3G     0.2577      3.377     0.1142     0.1645     0.8761         31        416: 100%|██████████| 150/150 [01:03<00:00,  2.37it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:20<00:00,  1.09it/s]

                   all       1370       1370      0.997          1      0.995      0.982      0.602      0.604      0.467        0.3






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       3/10      13.7G     0.2728      3.188      0.103      0.173     0.8815         31        416: 100%|██████████| 150/150 [00:57<00:00,  2.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:44<00:00,  2.02s/it]

                   all       1370       1370      0.997      0.999      0.995      0.942      0.631      0.634      0.497      0.331






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       4/10      13.5G     0.2697      3.057    0.09688     0.1687     0.8835         31        416: 100%|██████████| 150/150 [01:01<00:00,  2.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:45<00:00,  2.06s/it]

                   all       1370       1370      0.999          1      0.995      0.985      0.628      0.628      0.506      0.348






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       5/10      13.6G     0.2561       2.99     0.1011     0.1619      0.876         31        416: 100%|██████████| 150/150 [01:01<00:00,  2.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:30<00:00,  1.41s/it]

                   all       1370       1370          1          1      0.995      0.944      0.635      0.635      0.503       0.33






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       6/10      13.5G     0.2409      2.784     0.0972     0.1537     0.8701         31        416: 100%|██████████| 150/150 [01:03<00:00,  2.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:39<00:00,  1.79s/it]

                   all       1370       1370          1          1      0.995      0.992      0.652      0.652      0.538      0.376






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       7/10      13.6G     0.2295      2.666    0.09128     0.1432     0.8685         31        416: 100%|██████████| 150/150 [01:04<00:00,  2.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:36<00:00,  1.66s/it]

                   all       1370       1370      0.999      0.999      0.995      0.979      0.637      0.635      0.511      0.344






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       8/10      13.6G     0.2137      2.557    0.08936     0.1338     0.8642         31        416: 100%|██████████| 150/150 [01:01<00:00,  2.44it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:41<00:00,  1.88s/it]

                   all       1370       1370          1          1      0.995      0.993      0.674      0.674       0.57       0.43






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


       9/10      13.6G     0.2092      2.474    0.07608     0.1282     0.8596         31        416: 100%|██████████| 150/150 [00:59<00:00,  2.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:27<00:00,  1.26s/it]

                   all       1370       1370          1      0.999      0.995      0.992      0.671      0.672      0.565      0.425






      Epoch    GPU_mem   box_loss  pose_loss  kobj_loss   cls_loss   dfl_loss  Instances       Size


      10/10      13.5G     0.2022      2.399    0.07975      0.123     0.8583         31        416: 100%|██████████| 150/150 [00:56<00:00,  2.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:35<00:00,  1.62s/it]

                   all       1370       1370      0.999      0.999      0.995      0.993        0.7      0.701      0.604      0.489






10 epochs completed in 0.306 hours.
Optimizer stripped from runs/pose/image_rotation_model_train/weights/last.pt, 118.2MB
Optimizer stripped from runs/pose/image_rotation_model_train/weights/best.pt, 118.2MB

Validating runs/pose/image_rotation_model_train/weights/best.pt...
Ultralytics 8.3.144 🚀 Python-3.11.9 torch-2.6.0+cu124 CUDA:0 (NVIDIA GeForce RTX 4070 Ti SUPER, 16376MiB)
YOLO11x-pose summary (fused): 199 layers, 58,739,959 parameters, 0 gradients, 202.7 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95)     Pose(P          R      mAP50  mAP50-95): 100%|██████████| 22/22 [00:14<00:00,  1.49it/s]


                   all       1370       1370      0.999      0.999      0.995      0.993        0.7      0.701      0.604      0.489
Speed: 0.2ms preprocess, 5.9ms inference, 0.0ms loss, 1.2ms postprocess per image
Results saved to [1mruns/pose/image_rotation_model_train[0m


ultralytics.utils.metrics.PoseMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7b4989d4bcd0>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)', 'Precision-Recall(P)', 'F1-Confidence(P)', 'Precision-Confidence(P)', 'Recall-Confidence(P)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    

In [35]:
model = YOLO('runs/pose/image_rotation_model_train/weights/best.pt')

results = model.predict(
    source='images/test',
    save=True,
    conf=0.25,
    imgsz=416
)


inference results will accumulate in RAM unless `stream=True` is passed, causing potential out-of-memory
errors for large sources or long-running streams and videos. See https://docs.ultralytics.com/modes/predict/ for help.

Example:
    results = model(source=..., stream=True)  # generator of Results objects
    for r in results:
        boxes = r.boxes  # Boxes object for bbox outputs
        masks = r.masks  # Masks object for segment masks outputs
        probs = r.probs  # Class probabilities for classification outputs

image 1/1320 /repos/image-corner-rotation/images/test/card_11906.jpg: 416x416 1 rotated_photo, 14.8ms
image 2/1320 /repos/image-corner-rotation/images/test/card_11907.jpg: 416x416 1 rotated_photo, 31.7ms
image 3/1320 /repos/image-corner-rotation/images/test/card_11908.jpg: 416x416 1 rotated_photo, 21.0ms
image 4/1320 /repos/image-corner-rotation/images/test/card_11909.jpg: 416x416 1 rotated_photo, 15.2ms
image 5/1320 /repos/image-corner-rotation/images/test/card_1