In [2]:
import cv2
import cupy as cp
import numpy as np
import requests
from PIL import Image, ImageDraw
import os

In [3]:
# Check for CUDA availability
if cp.is_available():
    print("CuPy is using CUDA.")
    print("Number of available GPUs:", cp.cuda.runtime.getDeviceCount())
else:
    print("CuPy is not using CUDA.")

CuPy is using CUDA.
Number of available GPUs: 1


In [4]:
BASE_URL = "http://sdp.sdpgroup62025.workers.dev"

PREDICT_URL = BASE_URL + "/predict"
RESULTS_URL = BASE_URL + "/results"

KEYBOARD_THRESHOLD = 0.50

In [5]:
def predict_image(path):

    # Get the image data from the path
    with open(path, "rb") as image_file:
        image_data = image_file.read()

    # The headers for the request
    headers = {
        "Content-Type": "image/jpeg",
    }

    # Make a POST request to the server
    response = requests.post(PREDICT_URL, headers=headers, data=image_data)

    # Check if the request was successful
    if response.status_code != 200:
        raise Exception("The request failed with status code: " + str(response.status_code))

    # Return the response
    return response.json()

In [6]:
def extract_keyboard_and_detect_edges(path, predictions):
    image = cv2.imread(path)
    if image is None:
        raise FileNotFoundError(f"Unable to load image at path: {path}")

    results = predictions['predictions']['result']
    # print("Results", results)

    keyboards_data = {}

    for result in results:

        if result['label'] == 'keyboard' and result['score'] >= KEYBOARD_THRESHOLD:

            keyboard_bounding_box = result['box']

            # print("Keyboard bounding box", keyboard_bounding_box)

            if keyboard_bounding_box is None:
                raise ValueError("No keyboard bounding box found.")

            image_height, image_width = image.shape[:2]

            # print("Image height", image_height)
            # print("Image width", image_width)

            xmin = keyboard_bounding_box['xmin']
            ymin = keyboard_bounding_box['ymin']
            xmax = keyboard_bounding_box['xmax']
            ymax = keyboard_bounding_box['ymax']

            if xmin < 0 or xmax >= image_width or ymin < 0 or ymax >= image_height or xmax <= xmin or ymax <= ymin:
                # Region is not fully contained within image boundaries, skip it
                continue

            keyboard_region = image[ymin : ymax, xmin : xmax]

            edges = cv2.Canny(keyboard_region, 100, 200)
            if edges is None or edges.size == 0:
                raise ValueError("Edge detection failed or returned empty results.")

            # Ensure edges is a 2D image
            if edges.ndim == 3:
                edges = edges[:, :, 0]

            if edges.dtype != np.uint8:
                edges = edges.astype(np.uint8)

            # Ensure edges is not empty
            if edges.size == 0:
                raise ValueError("No edges to process.")

            # Find contours of the edges
            contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            all_points = []

            for contour in contours:
                contour = np.array(contour, dtype=int)

                # Ensure contour has at least 2 points before drawing
                if len(contour) >= 2:
                    dx = xmin  # Horizontal translation to move the contour to inside the bounding box
                    dy = ymin  # Vertical translation to move the contour to inside the bounding box

                    # Translate the contour points by (dx, dy)
                    translated_contour = contour + [dx, dy]

                    # Convert the translated contour to a list of points
                    points = [tuple(p) for p in translated_contour[:, 0]]
                    all_points.append(points)

            keyboards_data[len(keyboards_data) + 1] = {
                "keyboard_bounding_box": keyboard_bounding_box,
                "edges": edges,
                "all_points": all_points
            }

    if len(keyboards_data) == 0:
        print(f"No Keyboards detected at {path}")
        
    if len(keyboards_data) != 0:
        print(keyboards_data[1]["keyboard_bounding_box"])

    return keyboards_data

In [7]:
def draw_keyboard_bounding_box_on_image(image_path, keyboards_data, output_path):
    # Load the original image and convert to a NumPy array
    image = Image.open(image_path)
    image_array = np.array(image)

    # Move image array to GPU using CuPy
    image_gpu = cp.asarray(image_array)

    # Convert to BGR format for OpenCV (PIL uses RGB, OpenCV uses BGR)
    image_gpu = image_gpu[:, :, ::-1]

    # Create a blank white image on GPU
    height, width, _ = image_gpu.shape
    blank_image_gpu = cp.ones((height, width, 3), dtype=cp.uint8) * 255

    # Convert blank image to NumPy array before using OpenCV drawing functions
    blank_image_cpu = cp.asnumpy(blank_image_gpu)

    # Draw bounding boxes and polygons on the CPU array using OpenCV
    for keyboard_id, keyboard_data in keyboards_data.items():
        keyboard_bounding_box = keyboard_data['keyboard_bounding_box']
        all_points = keyboard_data['all_points']

        # Get bounding box coordinates
        x_min = keyboard_bounding_box['xmin']
        y_min = keyboard_bounding_box['ymin']
        x_max = keyboard_bounding_box['xmax']
        y_max = keyboard_bounding_box['ymax']

        # Draw the bounding box on the image
        cv2.rectangle(blank_image_cpu, (x_min, y_min), (x_max, y_max), (255, 0, 0), 3)

        # Draw each contour (polygon) on the image
        for points in all_points:
            points_array = np.array(points, dtype=np.int32).reshape((-1, 1, 2))
            cv2.polylines(blank_image_cpu, [points_array], isClosed=True, color=(0, 0, 255), thickness=3)

    # Move the final result back to GPU for any further processing
    blank_image_gpu = cp.asarray(blank_image_cpu)

    # Convert the final image back to RGB format and save
    result_image_rgb = cp.asnumpy(blank_image_gpu)[:, :, ::-1]  # Convert BGR back to RGB
    output_image = Image.fromarray(result_image_rgb)
    output_image.save(output_path)

    print(f"Saved keyboard bounding box image to {output_path}")

In [8]:
input_folder_path = "/home/tan90/SDP/Images"

In [9]:
file_list = os.listdir(input_folder_path)
print(file_list)

['Input_018.jpg', 'Input_072.jpg', 'Input_033.jpg', 'Input_084.jpg', 'Input_032.jpg', 'Input_028.jpg', 'Input_046.jpg', 'Input_042.jpg', 'Input_076.jpg', 'Input_031.jpg', 'Input_047.jpg', 'Input_048.jpg', 'Input_079.jpg', 'Input_081.jpg', 'Input_005.jpg', 'Input_010.jpg', 'Input_011.jpg', 'Input_034.jpg', 'Input_070.jpg', 'Input_050.jpg', 'Input_060.jpg', 'Input_049.jpg', 'Input_020.jpg', 'Input_074.jpg', 'Input_015.jpg', 'Input_055.jpg', 'Input_003.jpg', 'Input_063.jpg', 'Input_069.jpg', 'Input_008.jpg', 'Input_083.jpg', 'Input_073.jpg', 'Input_062.jpg', 'Input_014.jpg', 'Input_082.jpg', 'Input_004.jpg', 'Input_075.jpg', 'Input_019.jpg', 'Input_002.jpg', 'Input_057.jpg', 'Input_030.jpg', 'Input_038.jpg', 'Input_021.jpg', 'Input_077.jpg', 'Input_024.jpg', 'Input_064.jpg', 'Input_051.jpg', 'Input_080.jpg', 'Input_013.jpg', 'Input_058.jpg', 'Input_059.jpg', 'Input_044.jpg', 'Input_043.jpg', 'Input_039.jpg', 'Input_016.jpg', 'Input_052.jpg', 'Input_061.jpg', 'Input_023.jpg', 'Input_066.jp

In [10]:
output_folder_path = '/home/tan90/SDP/Output'

In [11]:
# Create output folder if it doesn't exist
os.makedirs(output_folder_path, exist_ok=True)

In [None]:
def folder_classify(file_list, input_folder_path, output_folder_path):
  for filename in file_list:
    if filename.endswith(".jpg"):  # Process only Image files
      input_file_path = os.path.join(input_folder_path, filename)
      output_file_path = os.path.join(output_folder_path, filename)

      predictions = predict_image(input_file_path)
      keyboards_data = extract_keyboard_and_detect_edges(input_file_path, predictions)
      draw_keyboard_bounding_box_on_image(input_file_path, keyboards_data, output_file_path)

In [None]:
def file_classify(filename, input_folder_path, output_folder_path):
    if filename.endswith(".jpg"):  # Process only Image files
      input_file_path = os.path.join(input_folder_path, filename)
      output_file_path = os.path.join(output_folder_path, filename)

      predictions = predict_image(input_file_path)
      keyboards_data = extract_keyboard_and_detect_edges(input_file_path, predictions)
      draw_keyboard_bounding_box_on_image(input_file_path, keyboards_data, output_file_path)

In [None]:
file_classify("Input_002.jpg", input_folder_path, output_folder_path)

{'xmin': 720, 'ymin': 1715, 'xmax': 2625, 'ymax': 2225}
Saved keyboard bounding box image to /home/tan90/SDP/Images/Input_001.jpg
