In [None]:
def nn_train_multi(model, num_epochs, batch_size, samples_per_epoch, model_file_name, images,
                   optimizer, criterion, checkpoint_dir="checkpoints"):
    os.makedirs(checkpoint_dir, exist_ok=True)
    start_epoch = 0

    # Resume from checkpoint
    checkpoints = sorted([f for f in os.listdir(checkpoint_dir) if f.endswith(".pth")])
    checkpoints = sorted(checkpoints, key=extract_epoch)
    if checkpoints:
        latest_ckpt = os.path.join(checkpoint_dir, checkpoints[-1])
        checkpoint = torch.load(latest_ckpt, map_location=device)
        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        start_epoch = checkpoint["epoch"]
        print(f"‚úÖ Resuming from checkpoint: {latest_ckpt} (epoch {start_epoch})")
    else:
        print("üöÄ Starting training from scratch.")

    writer = SummaryWriter(log_dir=os.path.join(checkpoint_dir, "runs"))

    dataset = HomographyPairDataset(images, samples_per_epoch)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=False,
                            num_workers=0, pin_memory=True)

    for epoch in range(start_epoch, num_epochs):
        model.train()
        epoch_loss = 0.0
        epoch_mae = 0.0

        # Progress bar for batches within epoch
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch + 1}/{num_epochs}", ncols=120, leave=False)

        for batch_pairs, batch_offsets in progress_bar:
            batch_pairs = batch_pairs.to(device)
            batch_offsets = batch_offsets.to(device)

            # Forward
            preds = model(batch_pairs)
            loss = criterion(preds, -batch_offsets)

            # Backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Metrics
            batch_loss = loss.item()
            epoch_loss += batch_loss

            with torch.no_grad():
                mae = torch.mean(torch.abs(preds - (-batch_offsets))).item()
                epoch_mae += mae

            # Update progress bar
            progress_bar.set_postfix(loss=f"{batch_loss:.6f}", mae=f"{mae:.4f}")

        # Average metrics for the epoch
        avg_loss = epoch_loss / len(dataloader)
        avg_mae = epoch_mae / len(dataloader)
        avg_rmse = np.sqrt(avg_loss)

        # Log to TensorBoard
        writer.add_scalar("Loss/RMSE", avg_rmse, epoch)
        writer.add_scalar("Error/MAE", avg_mae, epoch)

        # Save checkpoint
        if (epoch + 1) % 100 == 0 or (epoch + 1) == num_epochs:
            checkpoint_path = os.path.join(checkpoint_dir, f"checkpoint_epoch_{epoch + 1}.pth")
            torch.save({
                "epoch": epoch + 1,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
            }, checkpoint_path)

            # Keep only last 4 checkpoints
            checkpoints = sorted([f for f in os.listdir(checkpoint_dir) if f.endswith(".pth")])
            checkpoints = sorted(checkpoints, key=extract_epoch)
            while len(checkpoints) > 4:
                old_ckpt = os.path.join(checkpoint_dir, checkpoints[0])
                os.remove(old_ckpt)
                checkpoints.pop(0)

    writer.close()

    # Save final model
    torch.save(model.state_dict(), model_file_name)
    print(f"‚úÖ Final model saved: {model_file_name}")

    if torch.cuda.is_available():
        torch.cuda.empty_cache()

In [None]:
def nn_train_single(model, num_epochs, model_file_name, img, optimizer, criterion, checkpoint_dir="checkpoints"):
    os.makedirs(checkpoint_dir, exist_ok=True)
    start_epoch = 0

    # üîÑ Resume if checkpoint exists
    checkpoints = sorted([f for f in os.listdir(checkpoint_dir) if f.endswith(".pth")])
    checkpoints = sorted(checkpoints, key=extract_epoch)
    if checkpoints:
        latest_ckpt = os.path.join(checkpoint_dir, checkpoints[-1])
        checkpoint = torch.load(latest_ckpt, map_location=device)
        model.load_state_dict(checkpoint["model_state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
        start_epoch = checkpoint["epoch"]
        print(f"‚úÖ Resuming from checkpoint: {latest_ckpt} (epoch {start_epoch})")
    else:
        print("üöÄ Starting training from scratch.")

    # ‚úÖ TensorBoard logger
    writer = SummaryWriter(log_dir=os.path.join(checkpoint_dir, "runs"))

    # üîÅ Training loop with progress bar for epochs
    progress_bar = tqdm(range(start_epoch, num_epochs), desc="Training", ncols=100)

    # pair, offsets, *_ = generate_pair(
    #     img=random.choice(img) if isinstance(img, list) else img,
    #     window_size=64,
    #     margin=16,
    #     disp_range=(-16, 16)
    # )
    # pair = torch.from_numpy(pair).permute(2, 0, 1).unsqueeze(0).to(device).float()  # 1x2x64x64
    # offsets = torch.from_numpy(offsets.flatten()).unsqueeze(0).to(device).float()  # 1x8

    for epoch in progress_bar:
        model.train()

        pair, offsets, *_ = generate_pair(
            img=random.choice(img) if isinstance(img, list) else img,
            window_size=64,
            margin=16,
            disp_range=(-16, 16)
        )

        pair = torch.from_numpy(pair).permute(2, 0, 1).unsqueeze(0).to(device).float()  # 1x2x64x64
        offsets = torch.from_numpy(offsets.flatten()).unsqueeze(0).to(device).float()  # 1x8

        # Forward
        preds = model(pair)
        loss = criterion(preds, -offsets)

        # Backprop
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # üîπ Log loss to TensorBoard
        rmse = torch.sqrt(loss + 1e-8)
        writer.add_scalar("Loss/RMSE", rmse.item(), epoch)
        with torch.no_grad():
            mae = torch.mean(torch.abs(preds - -offsets))
        writer.add_scalar("Error/MAE", mae.item(), epoch)

        # Update progress bar with current epoch and loss
        progress_bar.set_description(f"Epoch {epoch + 1}/{num_epochs}")
        progress_bar.set_postfix(loss=f"{loss.item():.6f}")

        # üíæ Save checkpoint every 1000 epochs
        if (epoch + 1) % 1000 == 0 or (epoch + 1) == num_epochs:
            checkpoint_path = os.path.join(checkpoint_dir, f"checkpoint_epoch_{epoch + 1}.pth")
            torch.save({
                "epoch": epoch + 1,
                "model_state_dict": model.state_dict(),
                "optimizer_state_dict": optimizer.state_dict(),
            }, checkpoint_path)
            # print(f"\nüíæ Saved checkpoint: {checkpoint_path}")

            # üßπ Keep only last 4 checkpoints
            checkpoints = sorted([f for f in os.listdir(checkpoint_dir) if f.endswith(".pth")])
            checkpoints = sorted(checkpoints, key=extract_epoch)
            while len(checkpoints) > 4:
                old_ckpt = os.path.join(checkpoint_dir, checkpoints[0])
                os.remove(old_ckpt)
                # print(f"üóëÔ∏è Removed old checkpoint: {old_ckpt}")
                checkpoints.pop(0)

    writer.close()
    progress_bar.close()

    # ‚úÖ Save final model
    torch.save(model.state_dict(), model_file_name)
    print(f"‚úÖ Final model saved: {model_file_name}")

    # üßπ Clear GPU cache
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        print(f"üßπ GPU memory cleared. Current allocated: {torch.cuda.memory_allocated(device) / 1e9:.2f} GB")


def check_gpu_memory():
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated(device) / 1e9
        reserved = torch.cuda.memory_reserved(device) / 1e9
        total = torch.cuda.get_device_properties(device).total_memory / 1e9
        print(f"GPU Memory Status:")
        print(f"  Allocated: {allocated:.2f} GB")
        print(f"  Reserved:  {reserved:.2f} GB")
        print(f"  Total:     {total:.2f} GB")
        print(f"  Free:      {total - reserved:.2f} GB")
    else:
        print("CUDA not available")


def clear_gpu_memory():
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()
        print("‚úÖ GPU cache cleared")
        check_gpu_memory()
    else:
        print("CUDA not available")

In [None]:
# # TRAIN REGRESSOR
#
# num_epochs = 30000
# batch_size = 32
# learning_rate = 1e-4
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# print(f"Using device: {device}")
#
# model = HomographyRegressor(dropout_rate=0.1).to(device)
# criterion = nn.MSELoss()
# optimizer = optim.Adam(model.parameters(), lr=learning_rate)
#
# # image_names = [
# #     "000000002299.jpg",
# #     # "000000000285.jpg",
# #     # "000000000632.jpg",
# # ]
# # images = get_images_from_names(image_names, PREPROCESSED_DIR)
# images = get_random_images(image_dir=PREPROCESSED_DIR)
# print(f"üì∑ Loaded {len(images)} image(s) for training")
#
# # nn_train_single(
# #     model=model,
# #     num_epochs=num_epochs,
# #     model_file_name=f"h_regressor_ep{num_epochs}_I{len(images)}.pth",
# #     img=images[0] if len(images) == 1 else images,
# #     optimizer=optimizer,
# #     criterion=criterion,
# #     checkpoint_dir="checkpoints_homography_regressor_oneImage"
# # )
#
# nn_train_multi(
#     model=model,
#     num_epochs=num_epochs,
#     batch_size=batch_size,
#     samples_per_epoch=64,
#     model_file_name=f"h_regressor_multi.pth",
#     images=images,
#     optimizer=optimizer,
#     criterion=criterion,
#     checkpoint_dir="checkpoints_homography_regressor_multi"
# )

In [None]:
# -------------------------
# EVALUACIJSKI MODUL
# -------------------------


# --- 1) Generiranje testnega nabora (~100 slik √ó 10 primerov) ---
def generate_test_set(images, n_images=100, samples_per_image=10,
                      window_size=64, margin=16, disp_range=(-16, 16), seed=42):
    """
    images: lista numpy sivinskih slik
    n_images: koliko razliƒçnih izvornih slik izbrati (max len(images))
    samples_per_image: ≈°tevilo primerov na sliko
    returns: lista sample dictov:
      {
        'image': full_image,
        'pair': pair (2chan 64x64 float32),
        'offsets': gt_offsets (4x2 float32),
        'src_corners': src_corners (4x2),
        'dst_corners': dst_corners (4x2),
        'x': x, 'y': y,
        'orig_patch': orig_patch,
        'warped_patch': warped_patch
      }
    """
    random.seed(seed)
    np.random.seed(seed)

    selected = images[:min(n_images, len(images))]
    samples = []
    for img in selected:
        for _ in range(samples_per_image):
            pair, offsets, src_corners, warped_full = generate_pair(img, window_size=window_size, margin=margin,
                                                                    disp_range=disp_range)
            dst_corners = src_corners + offsets
            # Find x,y from src_corners (top-left)
            x = int(src_corners[0, 0]);
            y = int(src_corners[0, 1])
            orig_patch = (img[y:y + window_size, x:x + window_size]).astype(np.uint8)
            warped_patch = (warped_full[y:y + window_size, x:x + window_size]).astype(np.uint8)
            samples.append({
                'image': img,
                'pair': pair,  # float32 [H,W,2] / normalized [0,1]
                'offsets': offsets.reshape(4, 2),
                'src_corners': src_corners,
                'dst_corners': dst_corners,
                'x': x, 'y': y,
                'orig_patch': orig_patch,
                'warped_patch': warped_patch
            })
    print(f"‚û°Ô∏è Generiranih {len(samples)} testnih primerov iz {len(selected)} slik.")
    return samples


# --- 2) RMSE utility ---
def corner_rmse(pred_offsets, gt_offsets):
    """
    pred_offsets, gt_offsets: (4,2) arrays
    return scalar RMSE over 8 values
    """
    diff = (pred_offsets - gt_offsets).astype(np.float32).reshape(-1)
    return float(np.sqrt(np.mean(diff ** 2) + 1e-12))


# --- 3) Eval: Nevronski model (regresor ali klasifikator) ---
def eval_model_on_testset(model, test_samples, device,
                          model_type='regressor',  # 'regressor' or 'classifier'
                          disp_range=(-16, 16), negate_pred=False,
                          soft_decode=False, batch_size=32):
    """
    model_type: 'regressor' -> model returns (B,8) offsets; 'classifier' -> logits (B,num_classes,8)
    negate_pred: if your regressor predicts negative offsets during training, set True
    soft_decode: if classifier, whether to soft-decode expected value
    returns: dict with 'rmses' list, 'per_sample' list of dicts with preds etc.
    """
    model.eval()
    results = {'rmses': [], 'per_sample': []}
    with torch.no_grad():
        for i in range(0, len(test_samples), batch_size):
            batch = test_samples[i:i + batch_size]
            pairs = np.stack([s['pair'] for s in batch], axis=0)  # (B,H,W,2)
            pairs_t = torch.from_numpy(pairs).permute(0, 3, 1, 2).float().to(device)  # (B,2,H,W)
            preds = model(pairs_t)
            if model_type == 'classifier':
                # preds shape (B, num_classes, 8)
                pred_offsets = classes_to_offsets(preds, disp_range, soft=soft_decode).cpu().numpy()  # (B,8)
            else:
                pred_offsets = preds.cpu().numpy()  # (B,8)
                if negate_pred:
                    pred_offsets = -pred_offsets
            # reshape Bx8 -> Bx4x2
            pred_offsets = pred_offsets.reshape(pred_offsets.shape[0], 4, 2)
            for j, s in enumerate(batch):
                rmse = corner_rmse(pred_offsets[j], s['offsets'])
                results['rmses'].append(rmse)
                results['per_sample'].append({
                    'pred_offsets': pred_offsets[j],
                    'gt_offsets': s['offsets'],
                    'src_corners': s['src_corners'],
                    'dst_corners_gt': s['dst_corners'],
                    'orig_patch': s['orig_patch'],
                    'warped_patch': s['warped_patch'],
                    'image': s['image'],
                    'x': s['x'], 'y': s['y'],
                    'rmse': rmse
                })
    return results


# --- 4) Klasiƒçni OpenCV pristop (SIFT/SURF/ORB + findHomography) ---
def estimate_homography_classical(orig_patch, warped_patch, min_matches=4):
    """
    SIFT-only homography estimator.
    Returns: (success:bool, H:ndarray|None, num_matches:int, message:str)
    """
    assert orig_patch.ndim == 2 and warped_patch.ndim == 2

    # create SIFT detector
    try:
        sift = cv2.SIFT_create()
    except Exception as e:
        return False, None, 0, f"SIFT not available: {e}"

    # detect and compute
    kp1, des1 = sift.detectAndCompute(orig_patch, None)
    kp2, des2 = sift.detectAndCompute(warped_patch, None)

    if des1 is None or des2 is None or len(kp1) < 2 or len(kp2) < 2:
        return False, None, 0, f"Not enough keypoints ({len(kp1) if kp1 else 0}, {len(kp2) if kp2 else 0})"

    # FLANN matcher for SIFT
    index_params = dict(algorithm=1, trees=5)  # KDTree
    search_params = dict(checks=50)
    matcher = cv2.FlannBasedMatcher(index_params, search_params)

    try:
        knn_matches = matcher.knnMatch(des1, des2, k=2)
    except Exception as e:
        return False, None, 0, f"FLANN matching failed: {e}"

    # Lowe's ratio test
    good = []
    for m in knn_matches:
        if len(m) == 2:
            a, b = m
            if a.distance < 0.75 * b.distance:
                good.append(a)

    if len(good) < min_matches:
        return False, None, len(good), f"Too few good matches ({len(good)})"

    # build point arrays and estimate homography
    src_pts = np.float32([kp1[m.queryIdx].pt for m in good]).reshape(-1, 1, 2)
    dst_pts = np.float32([kp2[m.trainIdx].pt for m in good]).reshape(-1, 1, 2)

    try:
        H, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
    except Exception as e:
        return False, None, len(good), f"findHomography failed: {e}"

    if H is None:
        return False, None, len(good), "findHomography returned None"

    inliers = int(mask.sum()) if mask is not None else 0
    return True, H, len(good), f"SIFT: success, inliers={inliers}"


def eval_classical_on_testset(test_samples, fallback_identity=True,
                              use_256_for_classical=False):
    """
    Evaluate classical approach on list of test_samples (as generated).
    If use_256_for_classical: will upsample patches to 256x256 before detection (to help classical),
      then the computed corner error is rescaled (divided by 4) as requested.
    Returns results dict similar to eval_model_on_testset plus num_failures count.
    """
    results = {'rmses': [], 'per_sample': [], 'num_failures': 0, 'num_total': len(test_samples)}
    for s in test_samples:
        orig = s['orig_patch']
        warped = s['warped_patch']
        # optionally upsample to 256x256:
        scale_factor = 1
        if use_256_for_classical:
            target = 256
            scale_factor = target / orig.shape[0]
            orig_up = cv2.resize(orig, (target, target), interpolation=cv2.INTER_LINEAR)
            warped_up = cv2.resize(warped, (target, target), interpolation=cv2.INTER_LINEAR)
            ok, H, nm, msg = estimate_homography_classical(orig_up, warped_up)
            if ok:
                # we computed H that maps src->dst in upsampled coordinates.
                # To apply on original coordinates, adjust H for scaling:
                S = np.array([[1 / scale_factor, 0, 0], [0, 1 / scale_factor, 0], [0, 0, 1]])
                H_adj = S @ H @ np.linalg.inv(S)
                H_used = H_adj
            else:
                H_used = None
        else:
            ok, H, nm, msg = estimate_homography_classical(orig, warped)
            H_used = H if ok else None

        if H_used is None and fallback_identity:
            # identity homography => predicted dst corners = src_corners
            pred_dst = s['src_corners']
            results['num_failures'] += 1
            comment = 'fallback_identity'
        elif H_used is None:
            pred_dst = s['src_corners']
            comment = 'failed_no_fallback'
            results['num_failures'] += 1
        else:
            # apply H to src_corners (each point as homogenous)
            pts = s['src_corners'].reshape(-1, 2)
            ones = np.ones((pts.shape[0], 1))
            hom_pts = np.concatenate([pts, ones], axis=1).T  # 3x4
            mapped = (H_used @ hom_pts).T  # 4x3
            mapped = mapped[:, :2] / mapped[:, 2:3]
            pred_dst = mapped.astype(np.float32)
            comment = f"ok_matches={nm}"

        pred_offsets = pred_dst - s['src_corners']
        rmse = corner_rmse(pred_offsets, s['offsets'])
        # adjust RMSE if we used 256-upsample and user requested dividing by 4
        if use_256_for_classical:
            rmse = float(rmse / 4.0)

        results['rmses'].append(rmse)
        results['per_sample'].append({
            'pred_offsets': pred_offsets,
            'gt_offsets': s['offsets'],
            'src_corners': s['src_corners'],
            'dst_corners_gt': s['dst_corners'],
            'rmse': rmse,
            'comment': comment
        })
    return results


# --- 5) Plotting and statistics ---
def summarize_and_plot(
        results_dict,
        labels,
        outdir='eval_results',
        bins=40,
        save_plots=True,
        ymax=200
):
    """
    results_dict: list of results (each has 'rmses')
    labels: list of labels
    ymax: y-axis limit for boxplot + histogram (None = auto)
    """

    os.makedirs(outdir, exist_ok=True)

    rmse_arrays = [np.array(r['rmses'], dtype=np.float32) for r in results_dict]

    # ---- SUMMARY PRINT ----
    summary = {}
    for lab, arr, r in zip(labels, rmse_arrays, results_dict):
        mean = float(np.mean(arr))
        med = float(np.median(arr))
        std = float(np.std(arr))
        num = len(arr)
        num_fail = r.get('num_failures', 0)
        summary[lab] = {'mean': mean, 'median': med, 'std': std, 'n': num, 'failures': num_fail}
        print(f"--- {lab} --- n={num}, failures={num_fail}\n  mean={mean:.3f}, median={med:.3f}, std={std:.3f}")

    # ---- BOXPLOT ----
    plt.figure(figsize=(8, 5))
    # plt.boxplot(rmse_arrays, labels=labels, showfliers=True)
    plt.boxplot(rmse_arrays, tick_labels=labels, showfliers=True)

    if ymax is not None:
        plt.ylim(0, ymax)

    plt.ylabel("RMSE (px)")
    plt.title("RMSE boxplot")
    if save_plots:
        p = os.path.join(outdir, "rmse_boxplot.png")
        plt.savefig(p, dpi=150)
        print("‚û°Ô∏è Shrani:", p)
    plt.show()

    return summary


# -------------------------
# EXAMPLE uporabe (po treningu)
# -------------------------
# Predpostavke:
# - ima≈° nalo≈æen model (regressor ali classifier) kot 'model' v device
# - ima≈° seznam sivinskih slik 'images' (npr. get_random_images(...))
# - ƒçe uporablja≈° classifier, nastavi model_type='classifier' pri eval_model_on_testset
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = HomographyRegressor().to(device)
# state = torch.load("checkpoints_homography_regressor_oneImage/checkpoint_epoch_50000.pth")["model_state_dict"]
state = torch.load("checkpoints_homography_regressor_oneImage/h_regressor_ep50000_I1.pth")
model.load_state_dict(state)

# Primer: generiraj testset
test_samples = generate_test_set(
    images=get_random_images(
        num_images=100,
        image_dir=PREPROCESSED_DIR
    ),
    n_images=100,
    samples_per_image=10,
    window_size=64,
    margin=16,
    disp_range=(-16, 16)
)

# Primer: ocena tvojega nevronskega modela (classifier ali regressor)
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# model.to(device)
model_type = 'regressor'  # ali 'regressor'
neg = False  # ƒçe tvoj regressor v treningu uƒçi negativne pomike, nastavi True
model_results = eval_model_on_testset(
    model,
    test_samples,
    device,
    model_type=model_type,
    negate_pred=neg,
    soft_decode=True
)

# Primer: ocena klasiƒçnega pristopa
classical_results = eval_classical_on_testset(
    test_samples,
    fallback_identity=True,
    use_256_for_classical=True
)

# Primer: primerjava in ploti
summary = summarize_and_plot(
    [model_results, classical_results],
    labels=[model.__class__.__name__, 'Classical_OpenCV'],
    outdir='eval_results',
    save_plots=False,
)
