In [None]:
import cv2
import numpy as np

In [None]:
if "google.colab" in str(get_ipython()):
    from google.colab.patches import cv2_imshow

    imshow = cv2_imshow
else:

    def imshow(img):
        cv2.imshow("ImageWindow", img)
        cv2.waitKey()

        cv2.destroyAllWindows()


In [None]:
img1  = cv2.imread('resources/img1.jpg')
img2  = cv2.imread('resources/img4.jpg')
img1 = cv2.resize(img1, None, fx=0.2, fy=0.2)
img2 = cv2.resize(img2, None, fx=0.2, fy=0.2)

In [None]:
img1_gray = cv2.cvtColor(img1, cv2.COLOR_BGR2GRAY)

## Helper functions

In [None]:
def perspective_transform(image, corners):
    # ordered_corners = order_corner_points(corners)
    ordered_corners = corners
    bottom_l, bottom_r, top_r, top_l = ordered_corners

    width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
    width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
    width = max(int(width_A), int(width_B))

    height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
    height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
    height = max(int(height_A), int(height_B))

    dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
                    [0, height - 1]], dtype = "float32")

    ordered_corners = np.array(ordered_corners, dtype="float32")

    matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)

    return cv2.warpPerspective(image, matrix, (width, height))

def order_corner_points(corners):
    corners = [(corner[0][0], corner[0][1]) for corner in corners]
    top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
    return (top_l, top_r, bottom_r, bottom_l)

def find_extreme_points(contour):
    rect = cv2.minAreaRect(contour)
    box = cv2.boxPoints(rect)
    box = np.int0(box)

    bottom_left = tuple(box[0])
    top_left = tuple(box[3])
    bottom_right = tuple(box[1])
    top_right = tuple(box[2])

    return bottom_left, bottom_right, top_right, top_left

def detect_corners(image):
    image = image.copy()
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    kernel_size = (5, 5)

    blurred_image = cv2.blur(gray, kernel_size)

    sobel_x = cv2.Sobel(blurred_image, cv2.CV_64F, 1, 0, ksize=3)
    sobel_y = cv2.Sobel(blurred_image, cv2.CV_64F, 0, 1, ksize=3)
    magnitude = np.sqrt(sobel_x**2 + sobel_y**2)
    magnitude = np.uint8(magnitude)

    _, thresh = cv2.threshold(magnitude, 30, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    largest_contour = max(contours, key=cv2.contourArea)
    corners = find_extreme_points(largest_contour)

    return np.array(corners)

    # corner_points = []

    # epsilon = 0.02 * cv2.arcLength(largest_contour, True)
    # approx = cv2.approxPolyDP(largest_contour, epsilon, True)

    # if len(approx) == 4:
    #     corner_points.extend(approx)

    # corner_points = np.array(corner_points)
    # return corner_points

## Detecting objects on a photo

In [None]:
def detect_objects(image):

  gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

  sobel_x = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
  sobel_y = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)

  sobel_combined = np.sqrt(sobel_x**2 + sobel_y**2)

  sobel_combined = np.uint8(sobel_combined)

  _, binary_image = cv2.threshold(sobel_combined, np.mean(sobel_combined) * 2, 255, cv2.THRESH_BINARY)

  contours, _ = cv2.findContours(binary_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

  valid_contours = [cnt for cnt in contours if cv2.contourArea(cnt) > 4_000]

  detected_objects = []
  detected_cords = []
  for contour in valid_contours:
      x, y, w, h = cv2.boundingRect(contour)
      detected_cords.append((x, y, w, h))
      cropped_image = image[y:y + h, x:x + w]
      detected_objects.append(cropped_image)

  return detected_objects, detected_cords

In [None]:
detected_objects, detected_cords = detect_objects(img2)

In [None]:
for i, obj in enumerate(detected_objects):
  corners = detect_corners(obj)
  if len(corners > 0):
    transformed_image = perspective_transform(obj, corners)
    detected_objects[i] = transformed_image

### Idea is to create templates for matching different objects, so far:
 - Capitol
 - Any non reversed card (template for a card is just a random card)

In [None]:
# corners = detect_corners(template4)
# print(len(corners))
# transformed = perspective_transform(template4, corners)
# imshow(transformed)

In [None]:
classes = ['Capitol', 'Unit', 'Support', 'Deck', 'Opponent_capitol', 'Opponent_deck', 'Opponent_support']

In [None]:
template1 = cv2.imread('resources/template3.png')
template2 = cv2.imread('resources/template4.png')
template3 = cv2.imread('resources/template6.png')
template4 = cv2.imread('resources/deck_template.png')
template5 = cv2.imread('resources/reverse_deck.png')
template6 = cv2.imread('resources/template1.png')
Opp_capitol = cv2.imread('resources/template2.png')
templates = [template1, template2, template3, template4, template5, template6]
# for template in templates:
#   imshow(template)

### Used templates:
![Capitol template](resources/template3.png)
![Unit template](resources/template4.png)
![Support template](resources/template6.png)
![Deck template](resources/deck_template.png)

## Matching templates

Templates are scaled to size of detected object and then each template is matched to each object to return which objects match given template, instead of hard coded value we would like to explore determining objects by most simmilar template given some small threshold is passed to exclude noise

In [None]:
def match_templates(templates, detected_objects):
  matches = []
  determined_classes = [[] for _ in range(len(detected_objects))]
  for template in templates:
    for i, obj in enumerate(detected_objects):
      template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)

      template_resized = cv2.resize(template_gray, (obj.shape[:2][::-1]))
      obj_gray = cv2.cvtColor(obj, cv2.COLOR_BGR2GRAY)
      result = cv2.matchTemplate(obj_gray, template_resized, cv2.TM_CCOEFF_NORMED)
      min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
      determined_classes[i].append(max_val)
      matches.append(max_val)

  return determined_classes

In [None]:
for obj in detected_objects:
  imshow(obj)

In [None]:
determined_classes = match_templates(templates, detected_objects)

In [None]:
def draw_detections(image, detected_cords, determined_classes, classes):
  original_image = image.copy()
  for i, obj in enumerate(detected_cords):
      x, y, width, height = obj
      class_num = np.argmax(determined_classes[i])
      obj_class = classes[np.argmax(determined_classes[i])]

      colors = [(0, 255, 0), (255, 0, 0), (0, 0, 255), (255, 255, 0), (0, 255, 255), (120, 120, 120)]  # Example colors for three classes
      color = colors[class_num]

      # Add text label
      if determined_classes[i][class_num] > 0.2:
        label = obj_class
      else:
        label = 'Not classified'
        color = colors[-1]

      # Draw the rectangle
      cv2.rectangle(original_image, (x, y), (x + width, y + height), color, 2)
      cv2.putText(original_image, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

  return original_image

In [None]:
imshow(draw_detections(img2, detected_cords, determined_classes, classes))

In [None]:
print(classes[4])

In [None]:
video_path = 'resources/vid3.mp4'  # Replace with the path to your video file
cap = cv2.VideoCapture(video_path)

if not cap.isOpened():
    print("Error: Couldn't open video.")
    exit()

# Get video information
fps = cap.get(cv2.CAP_PROP_FPS)
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

# Define the codec and create a video writer object
fourcc = cv2.VideoWriter_fourcc(*'XVID')  # You can change the codec as needed
output_video_path = 'outputs/video.avi'  # Replace with the desired output video file path
out = cv2.VideoWriter(output_video_path, fourcc, fps, (frame_width, frame_height))

frame_number = 0
process_every_n_frames = 20  # Process every 5th frame
deck_detected = False
while True:
    ret, frame = cap.read()
    if not ret:
        break
    # Perform object detection on the current frame every 5 frames
    if frame_number % process_every_n_frames == 0:
        detected_objects, detected_cords = detect_objects(frame)
        determined_classes = match_templates(templates, detected_objects)
        detected_this_frame = False
        for obj_class in determined_classes:
          if np.argmax(obj_class) == 3 and obj_class[np.argmax(obj_class)] > 0.2:
            deck_detected = True
            detected_this_frame = True
            break
        if not detected_this_frame:
          deck_detected = False
          print(1)
    if not deck_detected:
       cv2.putText(frame, "Card Drawn!", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
    # Draw rectangles and labels on the frame
    frame = draw_detections(frame, detected_cords, determined_classes, classes)

    # Write the frame to the output video
    out.write(frame)

    frame_number += 1

    if cv2.waitKey(1) & 0xFF == ord('q'):  # Press 'q' to exit
        break

print(frame_number)

# Release the video capture and writer objects
cap.release()
out.release()

# Close all OpenCV windows
cv2.destroyAllWindows()


## Problems:
 - there are 6 fractions each having cards in different color
 - there will probably be need to preprocess images to take care of lighting

We thought about 2 ways of dealing with first problem:
1. Based on results below it could be possible to create generic templates(gray) that could match each fraction and if possible exclude art from template so it wouldn't get compared

2. Create 6 sets of templates for each faction. Pretty self explanatory 6 is not a big number this shouldn't increase computational need greatly so maybe this approach would yield better results

In [None]:
detected_objects = detect_objects(img2)

In [None]:
match_templates(templates, detected_objects)

## Conclusions

We believe we could make this method work pretty well on photos, however we are not sure how it generalizes to a video, making computations each frame probbably is quite intense, so we could try detecting objects at start of a video and the classify any new objects that were moved

We would greatly appreciate feedback and any form of guidence to a reasonable approach

In [None]:
gray = cv2.cvtColor(template2, cv2.COLOR_BGR2GRAY)

kernel_size = (7, 7)

blurred_image = cv2.blur(gray, kernel_size)

sobel_x = cv2.Sobel(blurred_image, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(blurred_image, cv2.CV_64F, 0, 1, ksize=3)

sobel_combined = np.sqrt(sobel_x**2 + sobel_y**2)

sobel_combined = np.uint8(sobel_combined)

imshow(sobel_combined)

contours, _ = cv2.findContours(sobel_combined, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

largest_contour = max(contours, key=cv2.contourArea)

epsilon = 0.02 * cv2.arcLength(largest_contour, True)
approx = cv2.approxPolyDP(largest_contour, epsilon, True)

rect = cv2.minAreaRect(approx)
box = cv2.boxPoints(rect)
box = np.int0(box)

width, height = 200, 300
dst_pts = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1], [0, height - 1]], dtype="float32")
matrix = cv2.getPerspectiveTransform(box.astype("float32"), dst_pts)

rectified_card = cv2.warpPerspective(template2, matrix, (width, height))

imshow(template2)
imshow(rectified_card)

In [None]:
def order_corner_points(corners):
    # Separate corners into individual points
    # Index 0 - top-right
    #       1 - top-left
    #       2 - bottom-left
    #       3 - bottom-right
    corners = [(corner[0][0], corner[0][1]) for corner in corners]
    top_r, top_l, bottom_l, bottom_r = corners[0], corners[1], corners[2], corners[3]
    return (top_l, top_r, bottom_r, bottom_l)

In [None]:
def perspective_transform(image, corners):
    # Order points in clockwise order
    ordered_corners = order_corner_points(corners)
    top_l, top_r, bottom_r, bottom_l = ordered_corners

    # Determine width of new image which is the max distance between
    # (bottom right and bottom left) or (top right and top left) x-coordinates
    width_A = np.sqrt(((bottom_r[0] - bottom_l[0]) ** 2) + ((bottom_r[1] - bottom_l[1]) ** 2))
    width_B = np.sqrt(((top_r[0] - top_l[0]) ** 2) + ((top_r[1] - top_l[1]) ** 2))
    width = max(int(width_A), int(width_B))

    # Determine height of new image which is the max distance between
    # (top right and bottom right) or (top left and bottom left) y-coordinates
    height_A = np.sqrt(((top_r[0] - bottom_r[0]) ** 2) + ((top_r[1] - bottom_r[1]) ** 2))
    height_B = np.sqrt(((top_l[0] - bottom_l[0]) ** 2) + ((top_l[1] - bottom_l[1]) ** 2))
    height = max(int(height_A), int(height_B))

    # Construct new points to obtain top-down view of image in
    # top_r, top_l, bottom_l, bottom_r order
    dimensions = np.array([[0, 0], [width - 1, 0], [width - 1, height - 1],
                    [0, height - 1]], dtype = "float32")

    # Convert to Numpy format
    ordered_corners = np.array(ordered_corners, dtype="float32")

    # Find perspective transform matrix
    matrix = cv2.getPerspectiveTransform(ordered_corners, dimensions)

    # Return the transformed image
    return cv2.warpPerspective(image, matrix, (width, height))

In [None]:
image = template2.copy()

# Convert the image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

kernel_size = (7, 7)

blurred_image = cv2.blur(gray, kernel_size)

sobel_x = cv2.Sobel(blurred_image, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(blurred_image, cv2.CV_64F, 0, 1, ksize=3)

sobel_combined = np.sqrt(sobel_x**2 + sobel_y**2)

sobel_combined = np.uint8(sobel_combined)

# Apply thresholding to obtain a binary image
_, thresh = cv2.threshold(sobel_combined, 50, 255, cv2.THRESH_BINARY)

# Find contours in the binary image
contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

largest_contour = max(contours, key=cv2.contourArea)
cv2.drawContours()
imshow(sobel_combined)