In [None]:
%pip install opencv-python numpy pandas xmltodict tqdm

In [None]:
import os
import cv2
import numpy as np
import xml.etree.ElementTree as ET
from tqdm.auto import tqdm  # Automatycznie wykrywa środowisko

In [None]:

def create_dirs():
    """Tworzy strukturę folderów dla danych po fuzji"""
    os.makedirs("datasets/llvip_fused_yuv/images/train", exist_ok=True)
    os.makedirs("datasets/llvip_fused_yuv/images/test", exist_ok=True)
    os.makedirs("datasets/llvip_fused_yuv/labels/train", exist_ok=True)
    os.makedirs("datasets/llvip_fused_yuv/labels/test", exist_ok=True)

In [None]:
def fuse_images_yuv(rgb_path, ir_path, output_path):
    """Łączy obrazy RGB i IR w przestrzeni YUV"""
    # Wczytaj obrazy
    rgb_img = cv2.imread(rgb_path)
    ir_img = cv2.imread(ir_path, cv2.IMREAD_GRAYSCALE)  # IR jako 1-kanałowy
    
    # Konwersja RGB → YUV
    yuv = cv2.cvtColor(rgb_img, cv2.COLOR_BGR2YUV)
    y, u, v = cv2.split(yuv)
    
    # Dopasuj IR do rozmiaru Y i znormalizuj
    ir_resized = cv2.resize(ir_img, (y.shape[1], y.shape[0]))
    ir_normalized = cv2.normalize(ir_resized, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8)
    
    # Fuzja i konwersja do RGB
    fused_yuv = cv2.merge([ir_normalized, u, v])
    fused_rgb = cv2.cvtColor(fused_yuv, cv2.COLOR_YUV2BGR)
    
    # Zapisz wynik
    cv2.imwrite(output_path, fused_rgb)

def convert_voc_to_yolo(xml_path, output_txt_path):
    """Konwertuje adnotacje z formatu VOC do YOLO"""
    tree = ET.parse(xml_path)
    root = tree.getroot()
    
    # Pobierz rozmiar obrazu
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)
    
    # Przygotuj linie dla YOLO
    yolo_lines = []
    for obj in root.findall('object'):
        class_name = obj.find('name').text.lower()
        class_idx = 0  # Zakładamy, że wszystkie obiekty to "person"
        
        bbox = obj.find('bndbox')
        xmin = int(bbox.find('xmin').text)
        ymin = int(bbox.find('ymin').text)
        xmax = int(bbox.find('xmax').text)
        ymax = int(bbox.find('ymax').text)
        
        # Konwersja do formatu YOLO
        x_center = (xmin + xmax) / 2 / width
        y_center = (ymin + ymax) / 2 / height
        w = (xmax - xmin) / width
        h = (ymax - ymin) / height
        
        yolo_lines.append(f"{class_idx} {x_center:.6f} {y_center:.6f} {w:.6f} {h:.6f}")
    
    # Zapisz do pliku
    with open(output_txt_path, 'w') as f:
        f.write("\n".join(yolo_lines))

In [None]:

def process_dataset():
    create_dirs()
    
    for split in ["train", "test"]:
        rgb_dir = os.path.join("LLVIP/visible", split)
        ir_dir = os.path.join("LLVIP/infrared", split)
        
        # Przetwórz wszystkie obrazy w folderze visible
        for img_name in tqdm(os.listdir(rgb_dir), desc=f"Przetwarzanie {split}"):
            rgb_path = os.path.join(rgb_dir, img_name)
            
            # Szukaj odpowiadającego IR po nazwie (ignoruj rozszerzenie)
            base_name = os.path.splitext(img_name)[0]
            ir_candidates = [
                f for f in os.listdir(ir_dir) 
                if os.path.splitext(f)[0] == base_name
            ]
            
            if not ir_candidates:
                print(f"Brak IR dla {img_name} – pomijam.")
                continue
                
            # Jeśli wiele pasujących IR, weź pierwszy
            ir_filename = ir_candidates[0]
            ir_path = os.path.join(ir_dir, ir_filename)
            
            # Ścieżki wynikowe
            fused_img_path = os.path.join("datasets/llvip_fused_yuv/images", split, img_name)
            txt_path = os.path.join("datasets/llvip_fused_yuv/labels", split, 
                                   os.path.splitext(img_name)[0] + ".txt")
            
            # Fuzja obrazów
            fuse_images_yuv(rgb_path, ir_path, fused_img_path)
            
            # Konwersja adnotacji
            xml_path = os.path.join("Annotations", os.path.splitext(img_name)[0] + ".xml")
            if os.path.exists(xml_path):
                convert_voc_to_yolo(xml_path, txt_path)

In [None]:
import torch
print(f"CUDA dostępne: {torch.cuda.is_available()}")
print(f"Wersja CUDA: {torch.version.cuda}")
print(f"Nazwa GPU: {torch.cuda.get_device_name(0)}")

In [None]:
import subprocess
import os
import sys
data_yaml = os.path.abspath("./datasets/llvip_fused_yuv/data.yaml")


In [None]:
import subprocess
import os
import sys
# Ścieżki bezwzględne
data_yaml = os.path.abspath("./datasets/llvip_fused_yuv/data.yaml")
os.environ['CUDA_VISIBLE_DEVICES'] = '0'  # Wymuś użycie GPU o indeksie 0
command = [
    sys.executable, "yolov5/train.py",
    "--img", "640",
    "--batch", "16",
    "--epochs", "25",
    "--data", data_yaml,
    "--weights", "yolov5s.pt",
    "--device", "0",
    "--name", "fused_yuv_exp",
    "--workers", "4"  # Zmniejsz jeśli masz mniej niż 4 rdzenie
]

try:
    result = subprocess.run(
        command,
        check=True,
        capture_output=True,
        text=True,
        encoding='utf-8',
        errors='replace'
    )
    print("Output:")
    print(result.stdout)
except subprocess.CalledProcessError as e:
    print(f"Error (code {e.returncode}):")
    print(e.stderr)
    print(e.stdout)
except Exception as e:
    print(f"Critical error: {str(e)}")

In [None]:
import sys
import subprocess
from IPython.display import clear_output

command = [
    sys.executable, 
    "yolov5/train.py",
    "--img", "640",
    "--batch", "16",
    "--epochs", "50",
    "--data", data_yaml,
    "--weights", "yolov5s.pt",
    "--device", "0",
    "--name", "fused_yuv_exp",
    "--workers", "4"
]

process = subprocess.Popen(
    command,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
    bufsize=1,
    encoding='utf-8',  # Wymuś kodowanie UTF-8
    errors='replace'   # Zastąp błędne znaki symbolem �
)

# Wyświetlaj logi na żywo
while True:
    line = process.stdout.readline()
    if not line and process.poll() is not None:
        break
    if line:
        print(line.strip())

In [None]:
import sys
import subprocess
import os
import csv
import re
from datetime import datetime
from IPython.display import clear_output

# Konfiguracja ścieżek
log_dir = "training_logs"
os.makedirs(log_dir, exist_ok=True)
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

# Pliki wyjściowe
log_file = os.path.join(log_dir, f"train_log_{timestamp}.txt")
csv_file = os.path.join(log_dir, f"metrics_{timestamp}.csv")

# Nagłówki CSV
csv_headers = ["epoch", "gpu_mem", "box_loss", "obj_loss", "cls_loss", "labels", "mAP@0.5", "mAP@0.5:0.95"]

# Regular expressions
progress_pattern = re.compile(r"(\d+/\d+).*?(\d+\.\d+)(?=it/s)")  # Wykrywa linie z postępem
metrics_pattern = re.compile(
    r"\s*(\d+)\s+(\d+\.?\d*)G\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+)\s+(\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)"
)

command = [
    sys.executable, "-u", "yolov5/train.py",
    "--img", "640",
    "--batch", "16",
    "--epochs", "25",
    "--data", os.path.abspath("./datasets/llvip_fused_yuv/data.yaml"),
    "--weights", "yolov5s.pt",
    "--device", "0",
    "--name", "fused_yuv_exp",
    "--workers", "4"
]

with open(log_file, "w", encoding="utf-8") as log_f, \
     open(csv_file, "w", newline="", encoding="utf-8") as csv_f:

    writer = csv.DictWriter(csv_f, fieldnames=csv_headers)
    writer.writeheader()

    process = subprocess.Popen(
        command,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        bufsize=1,
        encoding="utf-8",
        errors="replace",
        text=True
    )

    current_epoch = 0
    last_progress = ""  # Przechowuje ostatnią linię postępu
    
    while True:
        line = process.stdout.readline()
        if not line:
            if process.poll() is not None: break
            continue
            
        # Zawsze zapisuj do logu
        log_f.write(line)
        log_f.flush()

        # Obsługa wyświetlania
        if progress_pattern.search(line):
            # Czyść tylko linię postępu
            clear_output(wait=True)
            print(line.strip(), end="\r")
            last_progress = line.strip()
        else:
            # Dla pozostałych linii - nowa linia
            print(line.strip())
        
        # Parsowanie metryk
        try:
            if metrics_pattern.search(line):
                match = metrics_pattern.search(line)
                metrics = {
                    "epoch": current_epoch,
                    "gpu_mem": float(match.group(2).replace('G', '')),
                    "box_loss": float(match.group(3)),
                    "obj_loss": float(match.group(4)),
                    "cls_loss": float(match.group(5)),
                    "labels": int(match.group(6)),
                    "mAP@0.5": float(match.group(7)),
                    "mAP@0.5:0.95": float(match.group(8))
                }
                writer.writerow(metrics)
                csv_f.flush()
                
        except Exception as e:
            print(f"Błąd parsowania: {str(e)}")

print(f"\nTrening zakończony! Ostatni postęp: {last_progress}")
print(f"Pełny log: {log_file}")
print(f"Metryki CSV: {csv_file}")

In [None]:
import subprocess

command = [
    sys.executable, 
    "yolov5/train.py",
    "--img", "640",
    "--batch", "16",
    "--epochs", "50",
    "--data", "datasets/llvip_fused_yuv/data.yaml",
    "--weights", "yolov5s.pt",
    "--device", "0"
]

subprocess.run(command)

In [2]:
import sys
import subprocess
import re
from IPython.display import clear_output
from time import time

def format_metrics(metrics):
    """Formatuje metryki w czytelny sposób"""
    return (
        f"Epoka: {metrics['epoch']:>2}/{metrics['epochs']} | "
        f"Loss: {metrics['box_loss']:.3f} | "
        f"mAP@0.5: {metrics['mAP@0.5']:.3f} | "
        f"GPU: {metrics['gpu_mem']:.1f}GB | "
        f"Czas: {metrics['time']:>5}"
    )

def train_yolo():
    command = [
        sys.executable, 
        "yolov5/train.py",
        "--img", "640",
        "--batch", "16",
        "--epochs", "50",
        "--data", "datasets/llvip_fused_yuv/data.yaml",
        "--weights", "yolov5s.pt",
        "--device", "0",
        "--name", "fused_yuv_exp",
        "--exist-ok"
    ]

    process = subprocess.Popen(
        command,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        bufsize=1,
        text=True,
        encoding="utf-8",
        errors="replace"
    )

    # Wzorce regex do parsowania logów
    epoch_pattern = re.compile(r"(\d+)/(\d+).*?(\d+\.\d+)it/s.*?(\d+:\d+<.*?) ")
    metric_pattern = re.compile(
        r"(\d+)\s+(\d+\.\d+)G\s+(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)\s+\d+\s+\d+\s+(\d+\.\d+)\s+(\d+\.\d+)"
    )

    current_metrics = {}
    last_update = time()
    
    while True:
        line = process.stdout.readline()
        if not line:
            if process.poll() is not None:
                break
            continue

        # Parsuj metryki
        if metric_match := metric_pattern.search(line):
            current_metrics.update({
                "epoch": int(metric_match.group(1)),
                "gpu_mem": float(metric_match.group(2)),
                "box_loss": float(metric_match.group(3)),
                "obj_loss": float(metric_match.group(4)),
                "cls_loss": float(metric_match.group(5)),
                "mAP@0.5": float(metric_match.group(6)),
                "mAP@0.5:0.95": float(metric_match.group(7)),
                "epochs": 30
            })

        # Aktualizuj postęp co 0.5 sekundy
        if (time() - last_update) > 0.5 and (progress_match := epoch_pattern.search(line)):
            current_metrics["time"] = progress_match.group(4)
            clear_output(wait=True)
            print(format_metrics(current_metrics))
            last_update = time()

        # Wyświetl ważne komunikaty
        if any(x in line for x in ["Saved model", "Results saved to", "Best mAP"]):
            clear_output(wait=True)
            print(line.strip())

    print("\nTrening zakończony!")

# Uruchom trening
train_yolo()

Results saved to [1myolov5\runs\train\fused_yuv_exp[0m

Trening zakończony!
