In [2]:
import json

# Annotations

In this part, we must understand how the images are labelled to be able to preprocess it.

In [3]:
annot_file = open("raw_data/chessred2k/annotations.json")
annot = json.load(annot_file)

In [4]:
annot.keys()

dict_keys(['info', 'images', 'annotations', 'categories', 'splits'])

In [5]:
[type(annot[key]) for key in annot.keys()]

[dict, list, dict, list, dict]

## Info values

In [6]:
annot['info'].keys()

dict_keys(['description', 'version', 'year', 'date_created', 'author'])

In [7]:
[type(annot['info'][key]) for key in annot['info'].keys()]

[str, str, int, str, str]

In [8]:
for val in annot['info'].values():
    print(val)

Chess Recognition Dataset (ChessReD)
1.0
2023
26/08/2023
Athanasios Masouris


## Images values

In [9]:
first_game = [image for image in annot['images'] if image['game_id'] == 0]
len(first_game)

103

In [10]:
first_game[0]

{'file_name': 'G000_IMG000.jpg',
 'path': 'images/0/G000_IMG000.jpg',
 'camera': 'Huawei P40 pro',
 'height': 3072,
 'width': 3072,
 'game_id': 0,
 'move_id': 0,
 'id': 0}

In [11]:
first_game[102]

{'file_name': 'G000_IMG102.jpg',
 'path': 'images/0/G000_IMG102.jpg',
 'camera': 'Huawei P40 pro',
 'height': 3072,
 'width': 3072,
 'game_id': 0,
 'move_id': 102,
 'id': 102}

In [12]:
annot['images'][103]

{'file_name': 'G001_IMG000.jpg',
 'path': 'images/1/G001_IMG000.jpg',
 'camera': 'Huawei P40 pro',
 'height': 3072,
 'width': 3072,
 'game_id': 1,
 'move_id': 0,
 'id': 103}

Here is the understanding of the image key:
- 'file_name' gives obviously the name of the image
- 'path' provides the location. will be useful to define preprocessing functions with images as arguments
- 'height' and 'width' are quite obvious too.
- 'game_id' is associated to the number of the game. Not that useful at first glance
- 'move_id' won't be very useful either
- 'id' is the position of the image in the whole dataset. To be considered

## Annotations value

In [14]:
annot['annotations'].keys()

dict_keys(['pieces', 'corners'])

In [15]:
[type(annot['annotations'][key]) for key in annot['annotations'].keys()]

[list, list]

In [16]:
[len(annot['annotations'][key]) for key in annot['annotations'].keys()]

[223804, 2078]

In [17]:
annot["annotations"]['pieces'][0]

{'image_id': 0,
 'category_id': 7,
 'chessboard_position': 'a8',
 'id': 0,
 'bbox': [510.34, 963.65, 155.75, 186.14]}

In [18]:
annot["annotations"]['pieces'][223803]

{'image_id': 10799,
 'category_id': 7,
 'chessboard_position': 'd1',
 'id': 223803,
 'bbox': [581.57, 1188.01, 132.52, 209.77]}

In [19]:
annot["annotations"]['pieces'][1]

{'image_id': 0,
 'category_id': 8,
 'chessboard_position': 'b8',
 'id': 1,
 'bbox': [692.17, 877.29, 154.48, 211.46]}

In [20]:
annot["annotations"]['corners'][0]

{'image_id': 0,
 'corners': {'bottom_right': [2610.3, 1560.9],
  'top_right': [1772.23, 638.59],
  'top_left': [488.7, 1078.7],
  'bottom_left': [1063.3, 2304.1]},
 'id': 223804}

In [21]:
annot["annotations"]['corners'][1]

{'image_id': 1,
 'corners': {'top_right': [892.9, 931.7],
  'bottom_right': [2376.27, 994.8],
  'bottom_left': [2392.7, 2142.33],
  'top_left': [429.86, 1982.92]},
 'id': 223805}

## Categories value.

In [22]:
annot['categories']

[{'id': 0, 'name': 'white-pawn'},
 {'id': 1, 'name': 'white-rook'},
 {'id': 2, 'name': 'white-knight'},
 {'id': 3, 'name': 'white-bishop'},
 {'id': 4, 'name': 'white-queen'},
 {'id': 5, 'name': 'white-king'},
 {'id': 6, 'name': 'black-pawn'},
 {'id': 7, 'name': 'black-rook'},
 {'id': 8, 'name': 'black-knight'},
 {'id': 9, 'name': 'black-bishop'},
 {'id': 10, 'name': 'black-queen'},
 {'id': 11, 'name': 'black-king'},
 {'id': 12, 'name': 'empty'}]

## Splits value

In [23]:
annot['splits'].keys()

dict_keys(['train', 'val', 'test', 'chessred2k'])

In [24]:
annot['splits']['chessred2k']['train'].keys()

dict_keys(['image_ids', 'n_samples'])

## Creating board Y-value

### Building utils functions and making some tests

In [25]:
len(annot['images'])

10800

In [26]:
pieces_game_zero = [annot["annotations"]['pieces'][i] for i in range(223804) if annot["annotations"]['pieces'][i]['image_id'] == 0]

In [27]:
len(pieces_game_zero)

32

In [28]:
import numpy as np

In [29]:
def array_board(pieces_game:list):
    res_array = np.ones((8,8))*12
    letter_conv = {'a':0, 'b':1, 'c':2, 'd':3, 'e':4, 'f':5, 'g':6, 'h':7}
    for piece in pieces_game:
        pos = piece['chessboard_position']
        row, col = 8-int(pos[1]), letter_conv[pos[0]]
        res_array[row, col] = piece['category_id']
    return res_array

In [30]:
game_board_zero = array_board(pieces_game_zero)

In [31]:
fen_str = "PRNBQKprnbqk"
id_range = [i for i in range(12)]
dict_fen = dict([(k, fen_str[k]) for k in id_range])
dict_fen

{0: 'P',
 1: 'R',
 2: 'N',
 3: 'B',
 4: 'Q',
 5: 'K',
 6: 'p',
 7: 'r',
 8: 'n',
 9: 'b',
 10: 'q',
 11: 'k'}

In [38]:
# Empty board case set to 1 to handle more easily the FEN conversion
dict_fen[12] = 1

In [57]:
def row_converter(board_row:np.ndarray):
    str_res = ''
    null_count = 0
    for elem in board_row:
        if int(elem) != 12:
            if null_count != 0:
                str_res += str(null_count)
                null_count = 0
            str_res += dict_fen[int(elem)]
        else:
            null_count += 1
    if null_count != 0:
        str_res += str(null_count)
    return str_res

In [58]:
row_converter(game_board_zero[7,:])

'RNBQKBNR'

In [59]:
def board_to_fen(board:np.ndarray):
    convert = [row_converter(board[i,:]) for i in range(8)]
    res = "/".join(convert)
    return res

In [60]:
board_to_fen(game_board_zero)

'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR'

### Sealing the deal

In [37]:
nb_images = len(annot['images'])

In [47]:
# Gathering the pieces of the same game
pieces_game_list = []
i, k = 0, 0
while k < nb_images:
    same_image = True
    tmp_pieces = []
    while i < 223804 and same_image:
        if len(tmp_pieces) == 0:
            tmp_pieces.append(annot["annotations"]['pieces'][i])
            i += 1
        else:
            if annot["annotations"]['pieces'][i]['image_id'] == k:
                tmp_pieces.append(annot["annotations"]['pieces'][i])
                i += 1
            else:
                same_image = False
    pieces_game_list.append(array_board(tmp_pieces))
    k += 1

In [48]:
# Building a copy of the 'images' annotation value
output_info = annot['images']

In [13]:
annot['images'][10799]

{'file_name': 'G099_IMG099.jpg',
 'path': 'images/99/G099_IMG099.jpg',
 'camera': 'Samsung Galaxy S8',
 'height': 3024,
 'width': 3024,
 'game_id': 99,
 'move_id': 99,
 'id': 10799}

In [50]:
pieces_game_list[10799]

array([[12., 12., 12., 12., 12., 12., 12., 12.],
       [12., 12., 12.,  0., 12., 12., 12.,  6.],
       [12.,  6., 12., 12., 12., 12., 12., 12.],
       [ 6., 12., 12.,  2., 12., 12., 12.,  5.],
       [12., 12.,  0., 12., 12., 12., 12.,  0.],
       [11.,  0., 12., 12., 12., 12., 12., 12.],
       [ 0., 12., 12., 12.,  1., 12., 12., 12.],
       [12., 12., 12.,  7., 12., 12., 12., 12.]])

In [61]:
for i in range(nb_images):
    output_info[i]['fen'] = board_to_fen(pieces_game_list[i])

In [62]:
import pandas as pd

In [65]:
reverse_dict = {'file_name' : [output_info[i]['file_name'] for i in range(nb_images)],\
               'fen': [output_info[i]['fen'] for i in range(nb_images)]}

In [66]:
pd_output = pd.DataFrame.from_dict(reverse_dict)
pd_output.head()

Unnamed: 0,file_name,fen
0,G000_IMG000.jpg,rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR
1,G000_IMG001.jpg,rnbqkbnr/pppppppp/8/8/1P6/8/P1PPPPPP/RNBQKBNR
2,G000_IMG002.jpg,rnbqkbnr/ppp1pppp/8/3p4/1P6/8/P1PPPPPP/RNBQKBNR
3,G000_IMG003.jpg,rnbqkbnr/ppp1pppp/8/3p4/1P6/8/PBPPPPPP/RN1QKBNR
4,G000_IMG004.jpg,rn1qkbnr/ppp1pppp/8/3p1b2/1P6/8/PBPPPPPP/RN1QKBNR


We have created a Dataframe which associates a fen with the concerned image of the raw dataset. We can now work on the image dataset.