In [None]:
import cv2
import numpy as np
import scipy.ndimage.filters as filters
import math
import matplotlib.pyplot as plt
import os
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import svm
import random
from skimage.transform import rotate
import tqdm
from sklearn.model_selection import train_test_split

11.1

In [None]:
def calculate_gradients(image_rgb):
    gradients = []
    for i in range(image_rgb.shape[2]):
        channel = image_rgb[:, :, i].astype(np.int32)
        dx_channel = filters.convolve1d(channel, np.array([-1, 0, 1]), axis=1, output=np.float64, mode='constant', cval=0.0)
        dy_channel = filters.convolve1d(channel, np.array([-1, 0, 1]), axis=0, output=np.float64, mode='constant', cval=0.0)
        magnitude_channel = np.sqrt(dx_channel**2 + dy_channel**2)
        orientation_channel = np.rad2deg(np.arctan2(dy_channel, dx_channel))
        gradients.append((magnitude_channel, orientation_channel))

    magnitudes_rgb = np.stack([grad[0] for grad in gradients], axis=-1)
    orientations_rgb = np.stack([grad[1] for grad in gradients], axis=-1)
    max_indices = np.argmax(magnitudes_rgb, axis=-1)
    final_magnitudes = np.take_along_axis(magnitudes_rgb, np.expand_dims(max_indices, axis=-1), axis=-1).squeeze()
    final_orientations = np.take_along_axis(orientations_rgb, np.expand_dims(max_indices, axis=-1), axis=-1).squeeze()
    final_orientations[final_orientations < 0] += 180
    final_orientations[final_orientations == 180] = 0
    return final_magnitudes, final_orientations

In [None]:
def create_cell_histograms(magnitudes, orientations, cell_size=(8, 8), num_bins=9):
    img_height, img_width = magnitudes.shape
    cells_y = img_height // cell_size[0]
    cells_x = img_width // cell_size[1]
    cell_histograms = np.zeros((cells_y, cells_x, num_bins), dtype=np.float64)
    bin_width = 180.0 / num_bins
    for y_cell in range(cells_y):
        for x_cell in range(cells_x):
            cell_mag = magnitudes[y_cell*cell_size[0] : (y_cell+1)*cell_size[0],
                                  x_cell*cell_size[1] : (x_cell+1)*cell_size[1]]
            cell_ori = orientations[y_cell*cell_size[0] : (y_cell+1)*cell_size[0],
                                    x_cell*cell_size[1] : (x_cell+1)*cell_size[1]]
            for r in range(cell_mag.shape[0]):
                for c in range(cell_mag.shape[1]):
                    mag = cell_mag[r, c]
                    ori = cell_ori[r, c]
                    bin_index = int(ori // bin_width)
                    alpha = (ori % bin_width) / bin_width
                    current_bin = bin_index % num_bins
                    next_bin = (bin_index + 1) % num_bins
                    cell_histograms[y_cell, x_cell, current_bin] += mag * (1 - alpha)
                    cell_histograms[y_cell, x_cell, next_bin] += mag * alpha
    return cell_histograms

In [None]:
def normalize_blocks(cell_histograms, block_size=(2, 2), block_stride=(1, 1), epsilon=1e-5):
    cells_y, cells_x, num_bins = cell_histograms.shape
    num_blocks_y = cells_y - block_size[0] + block_stride[0]
    num_blocks_x = cells_x - block_size[1] + block_stride[1]
    normalized_block_vectors = []
    for y_block in range(0, num_blocks_y, block_stride[0]):
        for x_block in range(0, num_blocks_x, block_stride[1]):
            block_hist_cells = cell_histograms[y_block : y_block + block_size[0],
                                               x_block : x_block + block_size[1], :]
            block_vector = block_hist_cells.flatten()
            norm_sq = np.sum(block_vector**2)
            norm = np.sqrt(norm_sq + epsilon**2)
            if norm > 1e-6: normalized_vector = block_vector / norm
            else: normalized_vector = block_vector 
            normalized_block_vectors.append(normalized_vector)
    return np.array(normalized_block_vectors)

In [None]:
def compute_hog(image_path_or_matrix,
                cell_size=(8, 8), num_bins=9,
                block_size_cells=(2, 2), block_stride_cells=(1, 1),
                norm_epsilon=1e-5):
    if isinstance(image_path_or_matrix, str):
        image_rgb = cv2.imread(image_path_or_matrix)
        if image_rgb is None: raise ValueError(f"Nie można wczytać obrazu: {image_path_or_matrix}")
    else:
        image_rgb = image_path_or_matrix

    if len(image_rgb.shape) == 2: image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_GRAY2BGR)
    elif image_rgb.shape[2] == 1: image_rgb = cv2.cvtColor(image_rgb, cv2.COLOR_GRAY2BGR)

    magnitudes, orientations = calculate_gradients(image_rgb)
    cell_histograms_arr = create_cell_histograms(magnitudes, orientations, cell_size, num_bins)
    normalized_blocks_list = normalize_blocks(cell_histograms_arr,
                                              block_size_cells, block_stride_cells, norm_epsilon)
    if len(normalized_blocks_list) > 0: hog_descriptor = np.concatenate(normalized_blocks_list)
    else: hog_descriptor = np.array([], dtype=np.float64)
    return hog_descriptor

In [None]:
def HOGpicture(cell_histograms, cell_size=8):
    num_cells_y, num_cells_x, num_bins = cell_histograms.shape
    base_line_img = np.zeros((cell_size, cell_size), dtype=np.float64)
    center_y = cell_size // 2
    base_line_img[center_y - 1 : center_y + 1, :] = 1.0
    angle_step = 180.0 / num_bins
    rotated_lines = np.zeros((cell_size, cell_size, num_bins), dtype=np.float64)
    for k in range(num_bins):
        angle = k * angle_step
        rotated_img_k = rotate(base_line_img, -angle, resize=False, center=None,
                            order=0, mode='constant', cval=0, clip=True, preserve_range=True)
        rotated_lines[:, :, k] = rotated_img_k
    output_image_height = num_cells_y * cell_size
    output_image_width = num_cells_x * cell_size
    hog_visualization = np.zeros((output_image_height, output_image_width), dtype=np.float64)
    max_hist_val = np.max(cell_histograms)
    scaled_cell_histograms = cell_histograms
    if max_hist_val > 1e-6: scaled_cell_histograms = cell_histograms / max_hist_val
    for r_cell in range(num_cells_y):
        for c_cell in range(num_cells_x):
            y_start, y_end = r_cell * cell_size, (r_cell + 1) * cell_size
            x_start, x_end = c_cell * cell_size, (c_cell + 1) * cell_size
            current_cell_visualization = np.zeros((cell_size, cell_size), dtype=np.float64)
            for k_bin in range(num_bins):
                weight = scaled_cell_histograms[r_cell, c_cell, k_bin]
                current_cell_visualization += rotated_lines[:, :, k_bin] * weight
            current_cell_visualization[current_cell_visualization > 1.0] = 1.0
            hog_visualization[y_start:y_end, x_start:x_end] = current_cell_visualization
    return hog_visualization

In [None]:
example_img_path = 'pedestrians/pos/per00060.ppm'

example_image_bgr = cv2.imread(example_img_path)
example_image_rgb = cv2.cvtColor(example_image_bgr, cv2.COLOR_BGR2RGB)

plt.imshow(example_image_rgb)
plt.title("Oryginalny obraz (per00060.ppm)")
plt.show()

hog_features_example = compute_hog(example_image_rgb,
                                   cell_size=(8, 8), num_bins=9,
                                   block_size_cells=(2, 2), block_stride_cells=(1, 1))

print(f"Długość deskryptora HOG: {len(hog_features_example)}")
if len(hog_features_example) == 3780:
    print("Pierwsze 10 wartości HOG:", hog_features_example[:10])
else:
    print("Nie udało się obliczyć HOG lub długość jest niepoprawna.")

print("\nGenerowanie HOGPicture...")
mags_ex, oris_ex = calculate_gradients(example_image_rgb)
cell_hists_ex = create_cell_histograms(mags_ex, oris_ex, cell_size=(8,8), num_bins=9)
hog_viz_image = HOGpicture(cell_hists_ex, cell_size=8)

plt.figure(figsize=(8,10))
plt.imshow(hog_viz_image, cmap='gray')
plt.title(f"Wizualizacja HOG dla {os.path.basename(example_img_path)}")
plt.axis('off')
plt.show()

11.2

In [None]:
def load_image_paths(folder_path, num_samples):
    image_files = sorted([os.path.join(folder_path, f)
                          for f in os.listdir(folder_path)
                          if os.path.isfile(os.path.join(folder_path, f)) and (f.endswith(".ppm") or f.endswith(".png"))])
    return image_files[:num_samples]

def prepare_training_data(positive_folder, negative_folder, num_samples_each_class, hog_params, target_size=(64,128)):
    pos_image_paths = load_image_paths(positive_folder, num_samples_each_class)
    neg_image_paths = load_image_paths(negative_folder, num_samples_each_class)
    hog_features_list = []
    labels_list = []
    
    print(f"Przetwarzanie {len(pos_image_paths)} obrazów pozytywnych...")
    for img_path in tqdm.tqdm(pos_image_paths, desc="Pozytywne"):
        img = cv2.imread(img_path)
        if img is None: continue
        img_resized = cv2.resize(img, target_size)
        features = compute_hog(img_resized, **hog_params)
        if features.size > 0:
            hog_features_list.append(features)
            labels_list.append(1)
            
    print(f"Przetwarzanie {len(neg_image_paths)} obrazów negatywnych...")
    for img_path in tqdm.tqdm(neg_image_paths, desc="Negatywne"):
        img = cv2.imread(img_path)
        if img is None: continue
        img_resized = cv2.resize(img, target_size)
        features = compute_hog(img_resized, **hog_params)
        if features.size > 0:
            hog_features_list.append(features)
            labels_list.append(0)
            
    return np.array(hog_features_list, dtype=np.float32), np.array(labels_list, dtype=np.int32)

print("Funkcje 'load_image_paths' i 'prepare_training_data' zdefiniowane.")

In [None]:
POSITIVE_SAMPLES_FOLDER = "pedestrians/pos/"
NEGATIVE_SAMPLES_FOLDER = "pedestrians/neg/"
NUM_SAMPLES_EACH_CLASS = 1000

for folder in [POSITIVE_SAMPLES_FOLDER, NEGATIVE_SAMPLES_FOLDER]:
    if not os.path.exists(folder):
        os.makedirs(folder)
        print(f"Utworzono folder {folder}. Proszę dodać obrazy.")
    if not os.listdir(folder) and NUM_SAMPLES_EACH_CLASS > 0:
        print(f"Ostrzeżenie: Folder {folder} jest pusty. Trening SVM może się nie udać.")


hog_parameters_train = {
    'cell_size': (8, 8), 'num_bins': 9,
    'block_size_cells': (2, 2), 'block_stride_cells': (1, 1),
    'norm_epsilon': 1e-5
}

train_hog_features, train_labels = prepare_training_data(
    POSITIVE_SAMPLES_FOLDER, NEGATIVE_SAMPLES_FOLDER,
    NUM_SAMPLES_EACH_CLASS, hog_parameters_train
)

svm_classifier = None
if train_hog_features.size > 0 and train_labels.size > 0:
    print(f"\nPrzygotowano dane: {train_hog_features.shape[0]} próbek.")
    if train_hog_features.shape[0] > 1:
        X_train, X_test, y_train, y_test = train_test_split(
            train_hog_features, train_labels, test_size=0.1, random_state=42, stratify=train_labels
        )
        print(f"Rozmiar zbioru treningowego: {X_train.shape[0]}, testowego: {X_test.shape[0]}")

        print("\nTrening klasyfikatora SVM...")
        svm_classifier = svm.SVC(kernel='linear', C=1.0, probability=True)
        svm_classifier.fit(X_train, y_train)
        print("Trening zakończony.")

        print("\nOcena klasyfikatora na zbiorze testowym (z podziału)...")
        predicted_labels_test = svm_classifier.predict(X_test)
        cm_test = confusion_matrix(y_test, predicted_labels_test)
        acc_test = accuracy_score(y_test, predicted_labels_test)
        print("Macierz pomyłek (zbiór testowy):")
        print(cm_test)
        print(f"Dokładność na zbiorze testowym: {acc_test * 100:.2f}%")
    else:
        print("Za mało danych do podziału na zbiór treningowy/testowy i treningu SVM.")
else:
    print("Nie udało się przygotować wystarczających danych treningowych dla SVM.")

11.3

In [None]:
WINDOW_SIZE = (64, 128)
HOG_PARAMS_DETECTION = hog_parameters_train

def sliding_window_detection_matrix_input(image_matrix, window_size, step_size, svm_model, hog_params_dict):
    detections = []
    img_h, img_w = image_matrix.shape[:2]
    win_w, win_h = window_size
    
    for y in tqdm.tqdm(range(0, img_h - win_h + 1, step_size), desc="Przesuwanie okna", leave=False):
        for x in range(0, img_w - win_w + 1, step_size):
            window = image_matrix[y:y + win_h, x:x + win_w]
            if window.shape[0] != win_h or window.shape[1] != win_w: continue
            
            hog_features_window = compute_hog(window, **hog_params_dict)
            if hog_features_window.size == 0: continue

            reshaped_hog = hog_features_window.reshape(1, -1)
            confidence = svm_model.decision_function(reshaped_hog)[0]
            
            prediction = svm_model.predict(reshaped_hog)[0]

            if prediction == 1: 
                detections.append((x, y, win_w, win_h, confidence))
    return detections

def non_max_suppression_cv(original_detections_list, overlap_thresh):
    
    if not original_detections_list: 
        return []

    boxes_xywh = [[int(d[0]), int(d[1]), int(d[2]), int(d[3])] for d in original_detections_list]
    scores = [float(d[4]) for d in original_detections_list]

    score_thresh_for_nms = 0.0
    
    indices = cv2.dnn.NMSBoxes(boxes_xywh, scores, 
                               score_threshold=score_thresh_for_nms,
                               nms_threshold=overlap_thresh)
    
    final_detections = []
    if isinstance(indices, np.ndarray) and indices.size > 0:
        for i in indices.flatten():
            final_detections.append(original_detections_list[i])
    elif isinstance(indices, tuple) and len(indices) > 0 and len(indices[0]) > 0:
         for i in indices[0]:
            final_detections.append(original_detections_list[i])
            
    return final_detections


def detect_objects_and_show(image_path, svm_clf,
                            window_sz=(64,128), stride_val=16,
                            hog_prms=HOG_PARAMS_DETECTION,
                            nms_thresh=0.1,
                            n_scales=5,
                            pyramid_scale_factor=1.2
                           ):
    if svm_clf is None:
        print("Klasyfikator SVM nie jest wytrenowany. Pomijanie detekcji.")
        return

    img_orig_bgr = cv2.imread(image_path)
    if img_orig_bgr is None:
        print(f"Nie można wczytać obrazu testowego: {image_path}")
        return
    
    img_orig_rgb = cv2.cvtColor(img_orig_bgr, cv2.COLOR_BGR2RGB)
    
    all_detections_multiscale = []

    initial_h, initial_w = img_orig_rgb.shape[:2]
    min_dim_after_scale = min(window_sz)
    
    current_scale_factor_total = 1.0
    current_img_for_pyramid = img_orig_rgb.copy()

    for scale_idx in tqdm.tqdm(range(n_scales), desc=f"Skale dla {os.path.basename(image_path)}", leave=False):
        if scale_idx > 0:
            new_w = int(current_img_for_pyramid.shape[1] / pyramid_scale_factor)
            new_h = int(current_img_for_pyramid.shape[0] / pyramid_scale_factor)
            if new_w < window_sz[0] or new_h < window_sz[1]:
                break
            current_img_for_pyramid = cv2.resize(current_img_for_pyramid, (new_w, new_h))
            current_scale_factor_total *= pyramid_scale_factor
        
        detections_at_this_scale = sliding_window_detection_matrix_input(
            current_img_for_pyramid, window_sz, stride_val, svm_clf, hog_prms
        )
        
        for (x, y, w, h, conf) in detections_at_this_scale:
            orig_x = int(x * current_scale_factor_total)
            orig_y = int(y * current_scale_factor_total)
            orig_w = int(w * current_scale_factor_total)
            orig_h = int(h * current_scale_factor_total)
            all_detections_multiscale.append((orig_x, orig_y, orig_w, orig_h, conf))

    print(f"Liczba detekcji przed NMS dla {os.path.basename(image_path)}: {len(all_detections_multiscale)}")
    
    final_detections = non_max_suppression_cv(all_detections_multiscale, nms_thresh)

    print(f"Liczba detekcji po NMS dla {os.path.basename(image_path)}: {len(final_detections)}")
    
    img_to_show_detections = img_orig_rgb.copy()
    for (x, y, w, h, _) in final_detections:
        cv2.rectangle(img_to_show_detections, (x, y), (x + w, y + h), (0, 255, 0), 2)
        
    plt.figure(figsize=(10, 8))
    plt.imshow(img_to_show_detections)
    plt.title(f"Detekcje dla {os.path.basename(image_path)}")
    plt.axis('off')
    plt.show()


test_image_dir = "Test images-pedestrians/"
test_images_pdf = [
    os.path.join(test_image_dir, "testImage1.png"),
    os.path.join(test_image_dir, "testImage2.png"),
    os.path.join(test_image_dir, "testImage3.png"),
    os.path.join(test_image_dir, "testImage4.png")
]

if not os.path.exists(test_image_dir):
    os.makedirs(test_image_dir)
    print(f"Utworzono folder {test_image_dir}. Proszę dodać obrazy testowe.")

for img_path_test in test_images_pdf:
    if not os.path.exists(img_path_test):
        print(f"Plik testowy {img_path_test} nie istnieje. Tworzę dummy obraz.")
        dummy_test = np.zeros((300, 400, 3), dtype=np.uint8) 
        cv2.rectangle(dummy_test, (100,50), (100+64, 50+128), (200,100,50), -1)
        cv2.imwrite(img_path_test, dummy_test)
    
    print(f"\n--- Detekcja dla: {os.path.basename(img_path_test)} ---")
    detect_objects_and_show(img_path_test, svm_classifier, stride_val=16, n_scales=5)