In [None]:
import os
import cv2
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from joblib import dump, load
import zipfile


In [None]:
zip_file_path = '/content/Judol Detection v2.v9i.yolov11.zip'

destination_path = '/content/dataset'

os.makedirs(destination_path, exist_ok=True)

with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(destination_path)

print(f"Files extracted to {destination_path}")


Files extracted to /content/dataset


In [None]:
train_images_path = "dataset/train/images"
train_labels_path = "dataset/train/labels"
val_images_path = "dataset/valid/images"
val_labels_path = "dataset/valid/labels"
test_images_path = "dataset/test/images"
test_labels_path = "dataset/test/labels"
output_images_path = "predicted_images"
output_video_path = "predicted_video.mp4"

class_names = {
    0: "BK8",
    1: "Gate of Olympus",
    2: "Starlight Princess",
    3: "Princess",
    4: "Zeus",
}

WINDOW_SIZE = (128, 128)
STEP_SIZE = 64

In [None]:
def compute_hog(image):
    hog = cv2.HOGDescriptor(
        _winSize=(128, 128), _blockSize=(16, 16), _blockStride=(8, 8),
        _cellSize=(8, 8), _nbins=9
    )
    return hog.compute(image).flatten()


def compute_iou(box1, box2):

    x1_inter = max(box1[0], box2[0])
    y1_inter = max(box1[1], box2[1])
    x2_inter = min(box1[2], box2[2])
    y2_inter = min(box1[3], box2[3])


    intersection_area = max(0, x2_inter - x1_inter) * max(0, y2_inter - y1_inter)


    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])


    union_area = box1_area + box2_area - intersection_area


    iou = intersection_area / union_area
    return iou

def image_pyramid(image, scale=1.5, min_size=(30, 30)):
    yield image
    while True:

        width = int(image.shape[1] / scale)
        height = int(image.shape[0] / scale)

        if width < min_size[0] or height < min_size[1]:
            break

        image = cv2.resize(image, (width, height))
        yield image

In [None]:
def prepare_dataset(images_path, labels_path):
    features = []
    labels = []

    for image_file in os.listdir(images_path):
        if image_file.endswith(".jpg"):
            image_path = os.path.join(images_path, image_file)
            label_path = os.path.join(labels_path, image_file.replace(".jpg", ".txt"))

            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
            h, w = image.shape

            with open(label_path, "r") as f:
                for line in f:
                    parts = line.strip().split()
                    class_id = int(parts[0])
                    x_center, y_center, width, height = map(float, parts[1:])
                    x1 = int((x_center - width / 2) * w)
                    y1 = int((y_center - height / 2) * h)
                    x2 = int((x_center + width / 2) * w)
                    y2 = int((y_center + height / 2) * h)

                    roi = image[y1:y2, x1:x2]
                    if roi.size > 0:
                        roi_resized = cv2.resize(roi, WINDOW_SIZE)
                        features.append(compute_hog(roi_resized))
                        labels.append(class_id)

    return np.array(features), np.array(labels)


def sliding_window(image, step_size, window_size):
    for y in range(0, image.shape[0] - window_size[1], step_size):
        for x in range(0, image.shape[1] - window_size[0], step_size):
            yield (x, y, image[y:y + window_size[1], x:x + window_size[0]])

In [None]:
def non_max_suppression(detections, iou_threshold=0.3):

    detections = sorted(detections, key=lambda x: x[5], reverse=True)

    suppressed = []

    while detections:

        current_detection = detections.pop(0)
        current_box = current_detection[:4]


        suppressed.append(current_detection)

        detections = [det for det in detections if compute_iou(current_box, det[:4]) < iou_threshold]

    return suppressed



def visualize_detections(image, detections, ground_truth_boxes=None):

    for (x1, y1, x2, y2, class_id, confidence) in detections:
        color = (0, 255, 0)
        cv2.rectangle(image, (x1, y1), (x2, y2), color, 2)


        cv2.putText(image, f"{class_names[class_id]}: {confidence:.2f}",
                    (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)


    if ground_truth_boxes:
        for (x1, y1, x2, y2) in ground_truth_boxes:
            cv2.rectangle(image, (x1, y1), (x2, y2), (0, 0, 255), 2)

    return image



def detect_objects(image, model, confidence_threshold=0.9):
    image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    detections = []

    for resized_image in image_pyramid(image_gray, scale=1.5):
        for (x, y, window) in sliding_window(resized_image, STEP_SIZE, WINDOW_SIZE):
            if window.shape[:2] != WINDOW_SIZE:
                continue

            features = compute_hog(window)
            prediction = model.predict([features])[0]
            confidence = model.predict_proba([features])[0][prediction]

            if confidence > confidence_threshold:
                scale_factor = image_gray.shape[0] / resized_image.shape[0]
                x1 = int(x * scale_factor)
                y1 = int(y * scale_factor)
                x2 = int((x + WINDOW_SIZE[0]) * scale_factor)
                y2 = int((y + WINDOW_SIZE[1]) * scale_factor)

                detections.append((x1, y1, x2, y2, prediction, confidence))

    detections = non_max_suppression(detections)
    return detections

In [None]:
X_train, y_train = prepare_dataset(train_images_path, train_labels_path)
print(f"Training data: {len(X_train)} samples")

X_val, y_val = prepare_dataset(val_images_path, val_labels_path)
print(f"Validation data: {len(X_val)} samples")

print("Training SVM...")
svm = SVC(kernel='rbf', probability=True)
svm.fit(X_train, y_train)
print("SVM training completed.")

X_test, y_test = prepare_dataset(test_images_path, test_labels_path)
test_predictions = svm.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
print(f"Test accuracy: {test_accuracy:.2f}")

In [None]:
def process_video(input_video_path, output_video_path, model):
    cap = cv2.VideoCapture(input_video_path)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_video_path, fourcc, 30, (640, 480))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        detections = detect_objects(frame, model)
        result_frame = visualize_detections(frame, detections)

        out.write(result_frame)

    cap.release()
    out.release()

def visualize_and_save_gradients(image_path, output_path):

    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if image is None:
        print(f"Failed to load {image_path}")
        return


    grad_x = cv2.Sobel(image, cv2.CV_64F, 1, 0, ksize=3)
    grad_y = cv2.Sobel(image, cv2.CV_64F, 0, 1, ksize=3)


    magnitude = np.sqrt(grad_x ** 2 + grad_y ** 2)
    angle = np.arctan2(grad_y, grad_x) * (180 / np.pi) % 360


    magnitude_norm = (magnitude / magnitude.max()) * 255
    magnitude_norm = magnitude_norm.astype(np.uint8)


    filename = os.path.basename(image_path)
    filename_no_ext = os.path.splitext(filename)[0]


    plt.figure(figsize=(15, 5))

    plt.subplot(1, 3, 1)
    plt.title("Original Image")
    plt.imshow(image, cmap="gray")
    plt.axis("off")

    plt.subplot(1, 3, 2)
    plt.title("Gradient Magnitude")
    plt.imshow(magnitude_norm, cmap="gray")
    plt.axis("off")

    plt.subplot(1, 3, 3)
    plt.title("Gradient Orientation")
    plt.imshow(angle, cmap="hsv")
    plt.axis("off")


    output_file = os.path.join(output_path, f"{filename_no_ext}_gradients.png")
    plt.savefig(output_file)
    plt.close()

    print(f"Saved gradient visualization for {filename} to {output_file}")

In [None]:
os.makedirs(output_images_path, exist_ok=True)
for image_file in os.listdir(test_images_path):
    if image_file.endswith(".jpg"):
        image_path = os.path.join(test_images_path, image_file)
        image = cv2.imread(image_path)

        detections = detect_objects(image, svm)

        label_path = os.path.join(test_labels_path, image_file.replace(".jpg", ".txt"))
        ground_truth_boxes = []
        with open(label_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                x_center, y_center, width, height = map(float, parts[1:])
                x1 = int((x_center - width / 2) * image.shape[1])
                y1 = int((y_center - height / 2) * image.shape[0])
                x2 = int((x_center + width / 2) * image.shape[1])
                y2 = int((y_center + height / 2) * image.shape[0])
                ground_truth_boxes.append((x1, y1, x2, y2))

        result_image = visualize_detections(image, detections, ground_truth_boxes)

        output_path = os.path.join(output_images_path, image_file)
        cv2.imwrite(output_path, result_image)
        print(f"Processed {image_file}")


# Uncomment If want process video
# process_video("input_video.mp4", "output_video.mp4", svm)

import matplotlib.pyplot as plt


test_images_path = "dataset/test/images"
gradient_output_path = "gradient_visualization"

os.makedirs(gradient_output_path, exist_ok=True)

for image_file in os.listdir(test_images_path):
    if image_file.endswith(".jpg"):
        image_path = os.path.join(test_images_path, image_file)
        visualize_and_save_gradients(image_path, gradient_output_path)

print(f"All gradient visualizations saved in {gradient_output_path}")




Preparing training data...
Training data: 1971 samples
Preparing validation data...
Validation data: 177 samples
Training SVM...
SVM training completed.
Test accuracy: 0.97
Processed bk8_jpg.rf.7ffab985073d109b16e18625b5988f0c.jpg
Processed maxresdefault-18-_jpg.rf.2439a6fa40b31753e4621fa98ddb54b4.jpg
Processed hq720-32-_jpg.rf.f5ffaf4faf04da11804763247e877862.jpg
Processed 467155472_122105823158610608_4654678954588885548_n_jpg.rf.8ccdd073b18be3c6d4626c293f4dc834.jpg
Processed hq720-69-_jpg.rf.11c96a2213a964692960e2e506c33967.jpg
Processed Screenshot-2024-12-14-192520_png.rf.95cdaaae4492e824b38c8a4e5a908022.jpg
Processed 33199069_2105952949681663_4193847788573818880_n_jpg.rf.983b10d9de92dc8952811d50008812f2.jpg
Processed GB7JdnwWgAAq2rb_jpg.rf.db2f7dfc7e6230faf8d3907e2cddef69.jpg
Processed 0a3c7711d581c552971470fc30975ba7_jpg.rf.9c29b60d0598fc5273847f5bda4a7082.jpg
Processed hq720-21-_jpg.rf.f13c0fa45ebb25d64da56ab16240b065.jpg
Processed hq720-40-_jpg.rf.f329237ab8c191c5f5e6fc30c4111cc

In [None]:
visualize_and_save_gradients("cat.jpg", gradient_output_path)

Saved gradient visualization for cat.jpg to gradient_visualization/cat_gradients.png
