# AI boys: chess detection project

**Members [ROLES]:**
- Kridbhume Chammanard [Image Processing]
- Ting-Yi Lin [Model Development]
- Thana Wanavit [Image Processing]
- Norapath Arjanurak [Model Development]
- Pattaradanai Lakkananithiphan [Pre&Postprocessing]

### Import the necessary libraries

In [82]:
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import mediapipe as mp
from collections import Counter
import chess
import chess.pgn

import os
import random

from PIL import Image
from ultralytics import YOLO
from dotenv import dotenv_values
from ultralytics.utils.ops import scale_image

import json

### Constant Settings

In [83]:
# initaiate hands
mp_hands = mp.solutions.hands
hands = mp_hands.Hands(min_detection_confidence=0.2, min_tracking_confidence=0.5)
# a to h from left to right
X_INDEX = list("abcdefgh")[::-1]
# 8 to 1 from top to bottom
Y_INDEX = [str(i+1) for i in range(8)]
# initiate state accumulation variables
# state_list = []
# pgn_list = []
# noise frame tolerance
TOLERANCE = 2
# crop distance
CROP = 50
# 1/frame_rate
RATE = 20
# confidence
CONF = 0.7
# class names
BLACK = "black"
WHITE = "white"
KING = "king"
# promotion key for uci
PROMOTE_KEY = {"knight":"n","king":"k","bishop":"b","queen":"q","rook":"r","pawn":""}
SYM_KEY = {"knight":"n","king":"k","bishop":"b","queen":"q","rook":"r","pawn":"p"}
# model path
MODEL_PATH = "chess_model/runs/detect/chess_data_model_fixed_data_bach8_epoch82/weights/best.pt"
# data path
DATA_PATH = ""
# submission output
OUTPUT_PATH = "submission.csv"

### Model Inference

In [84]:
class ChessPredicter:
    def __init__(self, model_path=MODEL_PATH):
      model = YOLO(model_path)
      self.model = model

    def predict_image(self, img):
      results = self.model(img)[0]
      return results.to_json()
    
    def format_output(self, result):
      lst = json.loads(result)
      # reformat as class, x, y, w, h and cut out those that has low confidence
      out = [(e['name'],
              e['box']['x1'],
              e['box']['y1'],
              e['box']['x2']-e['box']['x1'],
              e['box']['y2']-e['box']['y1'],
              e['confidence']) for e in lst]
      return [tup[:-1] for tup in out if tup[-1] >= CONF]
    
    def get_state(self, img):
       result = self.predict_image(img)
       return self.format_output(result)

### Image preprocessing

In [85]:
class ChessboardProcessor:
    WIDTH = 640 # Fixed Height of output (grids will be 80x80 px)
    HEIGHT = 640

    # Define max and min contour areas for corner detection
    MAX_CONTOUR_AREA = 40000 
    MIN_CONTOUR_AREA = 7000

    last_warped_image = None

    def __init__(self, image):
        self.image = image
        self.warped_image = None
        self.homo_matrix = None

    # Utility function to display images
    def display_image(self, img, title="Image"):
        plt.figure(figsize=(6, 6))
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
        plt.axis('off')
        plt.title(title)
        plt.show()

    # Reorders points to top-left, top-right, bottom-right, bottom-left
    def reorder(self, pts):
        pts = pts.reshape((4, 2))
        new_pts = np.zeros((4, 1, 2), dtype="float32")
        sum_pts = pts.sum(1)
        diff_pts = np.diff(pts, axis=1)

        new_pts[0] = pts[np.argmin(sum_pts)]  # Top-left
        new_pts[3] = pts[np.argmax(sum_pts)]  # Bottom-right
        
        new_pts[1] = pts[np.argmin(diff_pts)]  # Top-right
        new_pts[2] = pts[np.argmax(diff_pts)]  # Bottom-left

        return new_pts

    def find_largest_contour(self, max_board):
        # Dilate the image to connect inner square lines 
        kernel = np.ones((5, 5), np.uint8)
        dilated_max_board = cv2.dilate(max_board, kernel, iterations=1)

        # Find the contours of dilated image
        outer_contours, _ = cv2.findContours(dilated_max_board, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
        largest_pts = None 

        # Find contour polygon of largest area (assume to be chessboard)
        max_area = 0
        for contour in outer_contours:
            area = cv2.contourArea(contour)
            if area > 5000:  # Ignore small contours
                peri = cv2.arcLength(contour, True)
                approx = cv2.approxPolyDP(contour, 0.02 * peri, True)
                if area > max_area and len(approx) == 4:
                    largest_pts = approx
                    max_area = area

        return self.reorder(largest_pts) if largest_pts is not None else None

    # Preprocessing to obtain larger square
    def find_corners(self):
        # Grayscale and Equalize (counter brightness issues)
        gray_image = cv2.cvtColor(self.image, cv2.COLOR_BGR2GRAY)
        equalized_image = cv2.equalizeHist(gray_image)

        # Otsu's thresholding
        ret, otsu_binary = cv2.threshold(equalized_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

        # Canny edgea
        canny = cv2.Canny(otsu_binary, 0, 255)

        # Dilation (to connect gaps in edges)
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (7, 7))
        img_dilation = cv2.dilate(canny, kernel, iterations=1)

        # Hough Lines detection
        lines = cv2.HoughLinesP(img_dilation, 1, np.pi / 180, threshold=500, minLineLength=150, maxLineGap=100)

        black_image = np.zeros_like(img_dilation) # Canvas for drawing dilated lines
        max_board = np.zeros_like(img_dilation) # Canvas for drawing outer chessboard edges later

        # Draw resulting lines
        if lines is not None:
            for line in lines:
                x1, y1, x2, y2 = line[0]
                cv2.line(black_image, (x1, y1), (x2, y2), (255, 255, 255), 2)

        # Dilation to make lines thicker and more visible
        kernel = np.ones((3, 3), np.uint8)
        black_image = cv2.dilate(black_image, kernel, iterations=1) # Returns a black image with lines representing grids

        # Find contours in the dilated image
        contours, _ = cv2.findContours(black_image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)

        result_image = self.image.copy()  # Use original image as base for drawing contours

        # Loop through contours to find inner squares
        for contour in contours:
            epsilon = 0.02 * cv2.arcLength(contour, True) 
            approx = cv2.approxPolyDP(contour, epsilon, True) # Approximate contours representing polygons

            if len(approx) == 4:  # Only approx 4 sides
                (x, y, w, h) = cv2.boundingRect(approx)
                area = cv2.contourArea(contour)
                
                if self.MIN_CONTOUR_AREA < area < self.MAX_CONTOUR_AREA:
                    aspect_ratio = w / float(h)
                    
                    # Decrease chances of finding rectangles instead
                    if (0.2 <= aspect_ratio <= 1.3) and (len(approx) == 4): 
                        pts = [tuple(pt[0]) for pt in approx]
                        pt1, pt2, pt3, pt4 = pts

                        # Draw the square contour on the image for visualization (RED for inner squares)
                        cv2.line(result_image, pt1, pt2, (0, 0, 255), 7)  # Red
                        cv2.line(result_image, pt1, pt3, (0, 0, 255), 7)  # Red
                        cv2.line(result_image, pt2, pt4, (0, 0, 255), 7)  # Red
                        cv2.line(result_image, pt3, pt4, (0, 0, 255), 7)  # Red

                        cv2.line(max_board, pt1, pt2, (255,255,255), 7)  # Copy lines into separate board to find outer square
                        cv2.line(max_board, pt1, pt3, (255,255,255), 7)  
                        cv2.line(max_board, pt2, pt4, (255,255,255), 7)  
                        cv2.line(max_board, pt3, pt4, (255,255,255), 7)  

        corners = self.find_largest_contour(max_board)
        return corners
    
    # Applies perspective transformation to get a bird's-eye view
    def warp_image(self, corners):
        original_pts = np.float32(corners)
        new_pts = np.float32([[0, 0], [self.WIDTH, 0], [0, self.HEIGHT], [self.WIDTH, self.HEIGHT]])
        self.transformation_matrix = cv2.getPerspectiveTransform(original_pts, new_pts)
        self.warped_image = cv2.warpPerspective(self.image, self.transformation_matrix, (self.WIDTH, self.HEIGHT))
        return self.warped_image

    # Draws points on the given image
    def draw_points(self, img, pts, color=(0, 0, 255), size=10):
        for pt in pts:
            pt = tuple(map(int, pt))  # Convert to (x, y) tuple
            cv2.circle(img, pt, size, color, -1)

    # Generates random points within the image dimensions
    def generate_random_points(self, num_points=4):
        height, width, _ = self.image.shape
        
        # Generate random points
        random_points = [(random.randint(0, width - 1), random.randint(0, height - 1)) for _ in range(num_points)]
        random_points = np.float32(random_points)
        
        # Create a list of tuples with (piece, x_coord, y_coord)
        piece_list = []
        for i, (x_coord, y_coord) in enumerate(random_points, start=1):
            piece_list.append((f"piece {i}", x_coord, y_coord))
        
        return piece_list


    # Main function to process the image
    def rotate_and_warp(self, detection_cg):
        # Find the chessboard corners in the frame
        corners = self.find_corners()

        if corners is not None:
            # Copy the original image for visualization
            image_with_points = self.image.copy()

            # Draw the original detection CG points on the original image
            for piece, x_coord, y_coord in detection_cg:
                self.draw_points(image_with_points, [(x_coord, y_coord)])

            # Warp the image based on the detected chessboard corners
            warped_image = self.warp_image(corners)

            # Prepare the original detection CG points for transformation
            original_points = np.array([[x, y] for _, x, y in detection_cg], dtype=np.float32).reshape(-1, 1, 2)

            # Apply perspective transformation to the original points
            transformed_points = cv2.perspectiveTransform(original_points, self.transformation_matrix)

            # Prepare the transformed points as a list of tuples (piece, x_new, y_new)
            transformed_points_list = []
            for i, (piece, _, _) in enumerate(detection_cg):  # Correct unpacking here
                x_new, y_new = transformed_points[i][0]
                transformed_points_list.append((piece, x_new, y_new))

            # Store the last valid warped image
            self.last_warped_image = warped_image

            # Draw the transformed points on the warped image
            # self.draw_points(warped_image, transformed_points[:, 0])

            return warped_image, transformed_points_list
        else:
            # print("Could not find the corners of the chessboard.")
            # If no corners are detected, use the last valid warped image
            if self.last_warped_image is not None:
                # print("Using the last valid warped image.")
                return self.last_warped_image, []  # Return the last warped image and empty points
            else:
                # print("No previous valid warped image available.")
                return None, None


W0000 00:00:1733844236.247571   61078 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


### Program loop

In [86]:
# legacy

# key for PGN
KEY = {"knight":"N","king":"K","bishop":"B","queen":"Q","rook":"R","pawn":""}

# a function to convert differences directly to a simplified version of PGN
def simple_pgn_from_differences(differences):
    
    pgn_moves = list()

    # for each move
    for disappeared, appeared in differences:

        # the length of the pieces that disappeared and appeared
        l_d = len(disappeared)
        l_a = len(appeared)

        print("appeared:", appeared)
        print("disappeared:", disappeared)
        print("l_a:",l_a)
        print("l_d:", l_d)

        try:
            if l_d == 1 and l_a == 1: # movement or promotion

                old = list(disappeared)[0]
                new = list(appeared)[0]

                if old[:2] == new[:2]: # if the class and color is the same -> movement
                    print(1.1)
                    msg = f"{KEY[new[1]]}{new[2]}{new[3]}"
                    pgn_moves.append(msg)

                else: # promotion
                    print(1.2)
                    msg = f"{KEY[old[1]]}{new[2]}{new[3]}={KEY[new[1]]}"
                    pgn_moves.append(msg)

            elif l_d == 2 and l_a == 1: # capturing

                new = list(appeared)[0]
                old = [piece for piece in disappeared if piece[:2] == new[:2]][0] # the capturer

                if new[1] == "pawn":
                    print(2.1)
                    msg = f"{KEY[new[1]]}{old[2]}x{new[2]}{new[3]}"
                    pgn_moves.append(msg)
                else:
                    print(2.2)
                    msg = f"{KEY[new[1]]}x{new[2]}{new[3]}"
                    pgn_moves.append(msg)

            elif l_d == 2 and l_d == 2: # castling

                # find king's position
                new_king = [piece for piece in appeared if piece[1] == KING][0]

                if new_king[2] == "g": # king side
                    print(3.1)
                    msg = "O-O"
                    pgn_moves.append(msg)
                elif new_king[2] == "c": # queen side
                    print(3.2)
                    msg = "O-O-O"
                    pgn_moves.append(msg)
                else:
                    msg = "MOVE_ERROR"

            else:
                msg = "MOVE_ERROR"

        except:

            msg = "MOVE_ERROR"

    # find the color of the first move
    first_appeared = differences[0][1]
    l_fa = len(first_appeared)


    # check if it's black's turn at the start
    if (l_fa == 1 and list(first_appeared)[0][0] == "white") or \
    (l_fa == 2 and list(first_appeared)[0][0] == "white" and list(first_appeared)[0][1] == "white"):
        turn = "white"
    elif (l_fa == 1 and list(first_appeared)[0][0] == "black") or \
    (l_fa == 2 and list(first_appeared)[0][0] == "black" and list(first_appeared)[0][1] == "black"):
        turn = "black"
    else:
        turn = None
    
    # add ".." in case black starts
    if turn == "black":
        pgn_moves = [".."] + pgn_moves

    # combine the moves into pairs and add star of incompletion
    l_white = pgn_moves[0::2]
    l_black = pgn_moves[1::2]
    pairs = zip(l_white,l_black)
    pgn = " ".join(f"{index+1}. {content[0]} {content[1]}" for index, content in enumerate(pairs))
    if len(l_white) > len(l_black):
        pgn += f" {len(l_white)}. {l_white[-1]}"

    pgn += " *"

    print(pgn_moves)
    
    return pgn

W0000 00:00:1733844236.258034   61090 inference_feedback_manager.cc:114] Feedback manager requires a model with a single signature inference. Disabling support for feedback tensors.


In [101]:
# wrap the video reader
def frame_generator(video_path):
    # Open the video file
    cap = cv2.VideoCapture(video_path)

    if not cap.isOpened():
        print("Error: Could not open video file.")
        return

    while True:
        ret, frame = cap.read()
        if not ret:
            break  # End of video
        yield frame  # Yield frame to the caller

    cap.release()

# Function to check if hands are detected in a frame
def hands_detected(frame):
    # Convert the frame to RGB as MediaPipe uses RGB
    rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    # Process the frame and get the result
    results = hands.process(rgb_frame)
    
    # If hands are detected, results.multi_hand_landmarks will not be None
    return results.multi_hand_landmarks is not None

# summarize state list into easy to use format
def summarize_states(lst, tolerance):

    # Step 1: Clean the state list of noises
    # initialize counters
    state_counter = 1
    previous = None
    updated = False
    summarized_states = []
    # loop through the list
    for state in lst:

        # if new state detected
        if previous is None or state != previous:
            state_counter = 1
            updated = False

        # if the count of state reaches tolerance
        if state_counter >= tolerance and not updated:
            # print("Noted")
            summarized_states.append(state)
            updated = True

        # print(f"state = {state}\nprevious = {previous}\nstate_counter={state_counter}\nupdated={updated}","\n")

        # iterate the counters
        state_counter += 1
        previous = state
    # print(f"Summarized:", summarized_states)

    # Step 2: Identify differences between consecutive states
    differences = []
    for i in range(len(summarized_states) - 1):
        old_state = summarized_states[i]
        new_state = summarized_states[i + 1]
        
        disappeared = old_state - new_state
        appeared = new_state - old_state
        
        differences.append((disappeared, appeared))
    # print("Differences:", differences)

    return differences

def uci_from_differences(differences):

    """
    Convert the output of summarize_states into UCI format.

    Parameters:
        differences (list): List of differences as ({disappeared}, {appeared}).

    Returns:
        list: A list of UCI moves
    """

    uci_moves = []

    for disappeared, appeared in differences:

        l_d = len(disappeared)
        l_a = len(appeared)

        print("appeared:", appeared)
        print("disappeared:", disappeared)
        print("l_a:",l_a)
        print("l_d:", l_d)

        try:

            if l_d == 1 and l_a == 1: # movement or promotion

                print("triggered case 1")

                old = list(disappeared)[0]
                new = list(appeared)[0]

                if old[:2] == new[:2]: # if the class and color is the same -> movement
                    msg = f"{old[2]}{old[3]}{new[2]}{new[3]}"

                else: # promotion
                    msg = f"{old[2]}{old[3]}{new[2]}{new[3]}{PROMOTE_KEY[new[1]]}"

            elif l_d == 2 and l_a == 1: # capturing

                print("triggered case 2")

                new = list(appeared)[0]
                old = [piece for piece in disappeared if piece[:2] == new[:2]][0] # the capturer

                msg = f"{old[2]}{old[3]}{new[2]}{new[3]}"

            elif l_d == 2 and l_a == 2: # castling

                print("triggered case 3")

                new_king = [piece for piece in appeared if piece[1] == KING][0]

                if new_king[2] == "g": # king side
                    if new_king[0] == BLACK:
                        msg = "e8g8"
                    else:
                        msg = "e1g1"
                elif new_king[2] == "c": # queen side
                    if new_king[0] == BLACK:
                        msg = "e8c8"
                    else:
                        msg = "e1c1"
                else:
                    msg = "CASTLING_ERROR"

            else:
                msg = "MOVE_ERROR"

        except:
            msg = "MOVE_ERROR"

        uci_moves.append(msg)
     
    return uci_moves

def generate_pgn(moves, ori):

    # initialize empty board 
    board = chess.Board()

    if ori is not None:
        board.clear()

        # place each piece
        for color, piece, alpha, num in ori:

            piece = SYM_KEY[piece]

            if color == BLACK:
                piece = piece.lower()
            elif color == WHITE:
                piece = piece.upper()

            position = alpha + str(num)
            square = chess.parse_square(position)
            board.set_piece_at(square, chess.Piece.from_symbol(piece))

        # set the correct start turn
        print(moves)
        board.turn = board.color_at(chess.parse_square(moves[0][2] + str(moves[0][2])))

    # create game
    game = chess.pgn.Game()
    node = game
    node.headers["FEN"] = board.fen()

    # iterate through UCI and keep track of board state
    board_states = list()
    board_states.append(board)
    for move in moves:
        try:
            node = node.add_variation(board.parse_uci(move))
            board.push_uci(move)
            board_states.append(board)
        except ValueError:
            print(f"Move {move} is invalid")
    
    pgn = str(game).split("\n")[-1]
    return pgn, board_states

# convert a chess video to pgn format
def video2pgn(video_path):

    predictor = ChessPredicter()
    state_list = list()

    for i, frame in enumerate(frame_generator(video_path)):
            
        if i % RATE == 0:

            # Switch this one if RGB is better
            # frame = frame[:,:,::-1]

            # if hand is there
            if not hands_detected(frame):

                detection = predictor.get_state(frame)
                frame_h,frame_w,_ = frame.shape
                delta = (frame_h-frame_w)//2
                detection_cg = {(piece_class,x+w//2,y+h-delta-CROP) for piece_class,x,y,w,h in detection}
                print(detection_cg)

                # crop the image
                h,w,_ = frame.shape
                delta = (h-w)//2
                frame = frame[delta+CROP:delta+w-CROP,:,:]

                # get the transformed image and the coordinate of the transformed CG
                chessboard = ChessboardProcessor(frame)
                img, piece_cg = chessboard.rotate_and_warp(detection_cg)
                
                # if the frame is bad skip it
                if img is None or piece_cg is None: 
                    continue

                # if out of bound -> skip
                bad_frame = False
                for _, x, y in piece_cg:
                    if x >= img.shape[0] or x <= 0 or y > img.shape[1] or y <= 0:
                        bad_frame = True
                        break
                if bad_frame:
                    continue

                # get the image size to divide into cells
                shape = img.shape
                x_cell_size = shape[0]//8
                y_cell_size = shape[1]//8
                # print(x, x_cell_size, x//x_cell_size)

                plt.imshow(img)
                plt.show()

                # reformat the piece_cg set to indicate row and column instead
                detection_cell = {(piece_class.split("-")[0], # color
                                piece_class.split("-")[1], # class
                                X_INDEX[int(x//x_cell_size) % 8], # column: a,b,c..
                                Y_INDEX[int(y//y_cell_size) % 8]) # row: 1,2,...
                                for piece_class,x,y in piece_cg}
                
                print(detection_cell)
                
                state_list.append(detection_cell)

    print("state_list: ",state_list)
    print("state_list_count: ", len(state_list))

    differences = summarize_states(lst=state_list, tolerance=TOLERANCE)
    pgn = simple_pgn_from_differences(differences)
    board_states = None

    return pgn, board_states   

# main program to take a directory of videos to a csv file
def main(path=DATA_PATH, output=OUTPUT_PATH):

    dic = dict()

    if path == "":
        for index, video in enumerate(["Hello.mp4","Hi.mp4"]):
            pgn = video[::-1]
            dic[index] = (video, pgn)
    else:
        videos = os.listdir(path)
        for index, video in enumerate(videos):
            pgn, _ = video2pgn(video)
            dic[index] = (video, pgn)

    df = pd.DataFrame.from_dict(dic, orient='index', columns=["row_id", "output"])
    df.to_csv(output, index=False, encoding='utf-8')

    return df


In [91]:
main()

Unnamed: 0,row_id,output
0,Hello.mp4,4pm.olleH
1,Hi.mp4,4pm.iH
