In [None]:
        # This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Generate a requirements.txt file for the Kaggle notebook environment

requirements = """paddlepaddle-gpu>=2.4.2
paddleocr>=2.6.0
tensorflow>=2.10.0
numpy>=1.21.0
pillow>=8.0.0
"""

# Save the requirements.txt file in the current working directory
requirements_file_path = "/kaggle/working/requirements.txt"

with open(requirements_file_path, "w") as file:
    file.write(requirements)

print(f"Requirements file saved at {requirements_file_path}")


In [None]:
pip install -r /kaggle/working/requirements.txt


test extracting text box coordinates

try box ocr,image classification, ocr

In [17]:
#(not for danny-chess-2) use this code to generate labels and annotated images. In order for labels to be valid, they will have to be reviewed by human and mistakes corrected only in labels column
import os
import cv2
from paddleocr import PaddleOCR
from PIL import Image, ImageDraw, ImageFont

# Define input and output paths
input_folder = '/kaggle/input/chess-dataset-notation/data'
output_folder = '/kaggle/working/labels'
annotated_folder = '/kaggle/working/annotated_images'
printed_folder = os.path.join(output_folder, 'printed')
handwritten_folder = os.path.join(output_folder, 'handwritten')

# Create necessary directories
os.makedirs(printed_folder, exist_ok=True)
os.makedirs(handwritten_folder, exist_ok=True)
os.makedirs(annotated_folder, exist_ok=True)

# Initialize PaddleOCR
ocr = PaddleOCR(
    use_angle_cls=True,
    lang='en',
    det_db_box_thresh=0.4,
    det_db_unclip_ratio=1.1,
    drop_score=0.3,
    rec_image_shape="3, 48, 300",
    det_limit_side_len=2400
)

# Fixed colors for each column
COLUMN_COLORS = {
    1: "blue",    # Column 1 - Printed
    2: "red",     # Column 2 - Handwritten
    3: "yellow",  # Column 3 - Handwritten
    4: "green",   # Column 4 - Printed
    5: "orange",  # Column 5 - Handwritten
    6: "purple"   # Column 6 - Handwritten
}

# Helper function to classify the column and get its color
def classify_column(x_center, image_width):
    first_column_end = image_width / 15
    fourth_column_start = image_width / 2
    fourth_column_end = fourth_column_start + image_width / 20
    column_width = (image_width - first_column_end - (fourth_column_end - fourth_column_start) - (image_width / 20)) / 4

    if 0 <= x_center < first_column_end:
        return "printed", COLUMN_COLORS[1]
    elif first_column_end <= x_center < first_column_end + column_width:
        return "handwritten", COLUMN_COLORS[2]
    elif first_column_end + column_width <= x_center < first_column_end + 2 * column_width:
        return "handwritten", COLUMN_COLORS[3]
    elif fourth_column_start <= x_center < fourth_column_end:
        return "printed", COLUMN_COLORS[4]
    elif fourth_column_end <= x_center < fourth_column_end + column_width:
        return "handwritten", COLUMN_COLORS[5]
    elif fourth_column_end + column_width <= x_center < fourth_column_end + 2 * column_width:
        return "handwritten", COLUMN_COLORS[6]
    return None, None

# Process each PNG file in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith('.png'):
        input_image_path = os.path.join(input_folder, filename)
        preprocessed_image_path = f'/kaggle/working/preprocessed_{filename}'

        # Step 1: Pre-Process the Image
        image = cv2.imread(input_image_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        filtered = cv2.bilateralFilter(gray, 9, 75, 75)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        contrast_enhanced = clahe.apply(filtered)
        cv2.imwrite(preprocessed_image_path, contrast_enhanced)

        # Step 2: Perform OCR
        ocr_result = ocr.ocr(preprocessed_image_path, cls=True)

        # Step 3: Annotate the Original Image
        annotated_image = Image.open(input_image_path).convert("RGB")  # Convert to RGB mode
        draw = ImageDraw.Draw(annotated_image)
        font = ImageFont.load_default()

        # Define top and bottom margins
        top_margin = 25  # Ignore boxes above this Y-coordinate
        bottom_margin = annotated_image.height - 25  # Ignore boxes below this Y-coordinate

        # Step 4: Process OCR Results
        image_width = annotated_image.width
        box_number = 1

        for line_num, line in enumerate(ocr_result[0]):
            bbox = line[0]
            x_min = int(min(point[0] for point in bbox))
            y_min = int(min(point[1] for point in bbox))
            x_max = int(max(point[0] for point in bbox))
            y_max = int(max(point[1] for point in bbox))
            x_center = (x_min + x_max) / 2
            y_center = (y_min + y_max) / 2

            # Apply top and bottom margin filtering
            if y_center < top_margin or y_center > bottom_margin:
                continue

            # Classify the column
            label, color = classify_column(x_center, image_width)
            if label:
                # Crop the text region
                cropped = image[y_min:y_max, x_min:x_max]

                # Save the cropped region
                output_subfolder = printed_folder if label == "printed" else handwritten_folder
                output_path = os.path.join(output_subfolder, f"{filename}_box{box_number}.png")
                cv2.imwrite(output_path, cropped)

                # Annotate the image
                int_bbox = [(int(point[0]), int(point[1])) for point in bbox]
                draw.polygon(int_bbox, outline=color)
                draw.text((x_min, y_min), f"{box_number}", fill=color)
                box_number += 1

        # Draw top and bottom margin lines on the annotated image
        draw.line([(0, top_margin), (image_width, top_margin)], fill="green", width=2)  # Top margin
        draw.line([(0, bottom_margin), (image_width, bottom_margin)], fill="green", width=2)  # Bottom margin

        # Save the annotated image
        annotated_image_path = os.path.join(annotated_folder, f"annotated_{filename}")
        annotated_image.save(annotated_image_path)

print(f"Labeled data saved in: {output_folder}")
print(f"Annotated images saved in: {annotated_folder}")


[2024/11/24 17:49:59] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/root/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=2400, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.4, det_db_unclip_ratio=1.1, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/root/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, rec_

FileNotFoundError: [Errno 2] No such file or directory: '/kaggle/input/chess-dataset-notation/data/testing'

get coordinates of the cells

In [None]:
#line annotations. outdated
from PIL import Image
import cv2
import numpy as np
import os

# Load the uploaded image
image_path = '/kaggle/input/danny-chess-2/2024-11-23 15-35.jpeg'
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

# Apply edge detection to identify lines
edges = cv2.Canny(image, 50, 150, apertureSize=3)

# Use Hough Line Transform to detect lines
lines = cv2.HoughLinesP(edges, 1, np.pi / 180, threshold=100, minLineLength=50, maxLineGap=10)

# Store all line segments for intersection calculation
horizontal_lines = []
vertical_lines = []

# Classify detected lines into horizontal and vertical
for line in lines:
    for x1, y1, x2, y2 in line:
        if abs(y2 - y1) < 5:  # Horizontal line
            horizontal_lines.append((x1, y1, x2, y2))
        elif abs(x2 - x1) < 5:  # Vertical line
            vertical_lines.append((x1, y1, x2, y2))

# Find intersections between horizontal and vertical lines
intersections = []
for h_line in horizontal_lines:
    for v_line in vertical_lines:
        x1_h, y1_h, x2_h, y2_h = h_line
        x1_v, y1_v, x2_v, y2_v = v_line

        # Check if the vertical line crosses the horizontal line
        if min(x1_h, x2_h) <= x1_v <= max(x1_h, x2_h) and min(y1_v, y2_v) <= y1_h <= max(y1_v, y2_v):
            intersections.append((x1_v, y1_h))

# Sort intersections for consistent box creation
intersections = sorted(intersections, key=lambda x: (x[1], x[0]))  # Sort by y, then x

# Create boxes only for adjacent points
box_id = 1  # Box numbering starts from 1
for i in range(len(intersections)):
    x1, y1 = intersections[i]

    # Find the points that are adjacent horizontally and vertically
    for j in range(len(intersections)):
        if i != j:
            x2, y2 = intersections[j]

            # Ensure valid horizontal or vertical adjacency
            if (abs(x2 - x1) > 0 and abs(y2 - y1) == 0) or (abs(y2 - y1) > 0 and abs(x2 - x1) == 0):
                # Create rectangle only when two pairs form a valid box
                if (x2, y2) in intersections and (x1, y2) in intersections and (x2, y1) in intersections:
                    # Top-left and bottom-right points of the box
                    top_left = (x1, y1)
                    bottom_right = (x2, y2)

                    # Draw the green rectangle
                    cv2.rectangle(image, top_left, bottom_right, (0, 255, 0), 2)

                    # Add box number at the center
                    center_x = (x1 + x2) // 2
                    center_y = (y1 + y2) // 2
                    cv2.putText(image, str(box_id), (center_x - 10, center_y + 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)

                    # Increment the box ID and break to avoid overlaps
                    box_id += 1
                    break

# Define the path to save the annotated image
output_dir = '/kaggle/working/annotated_images'
os.makedirs(output_dir, exist_ok=True)  # Ensure the output directory exists
output_path = os.path.join(output_dir, '2024-11-23 15-35.jpeg')

# Save the annotated image
cv2.imwrite(output_path, image)

print(f"Annotated image saved to {output_path}")


In [None]:
!rm -r /kaggle/working/labels_yolo

Create labels for the dataset for incpetionv3 classification

In [None]:
#(not for danny-chess-2) labels and annotation files intended for image classification not ocr
import os
import cv2
from paddleocr import PaddleOCR
from PIL import Image, ImageDraw, ImageFont

# Define input and output paths
input_folder = '/kaggle/input/chess-dataset-notation/data'
output_folder = '/kaggle/working/labels'
annotated_folder = '/kaggle/working/annotated_images'
printed_folder = os.path.join(output_folder, 'printed')
handwritten_folder = os.path.join(output_folder, 'handwritten')

# Create necessary directories
os.makedirs(printed_folder, exist_ok=True)
os.makedirs(handwritten_folder, exist_ok=True)
os.makedirs(annotated_folder, exist_ok=True)

# Initialize PaddleOCR
ocr = PaddleOCR(
    use_angle_cls=True,
    lang='en',
    det_db_box_thresh=0.4,
    det_db_unclip_ratio=1.1,
    drop_score=0.3,
    rec_image_shape="3, 48, 300",
    det_limit_side_len=2400
)

# Predefined palette colors
PREDEFINED_COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]  # Red, Green, Blue, Yellow

# Helper function to get colors based on box number
def get_color(box_number):
    return PREDEFINED_COLORS[box_number % len(PREDEFINED_COLORS)]

# Helper function to classify the column
def classify_column(x_center, image_width):
    first_column_end = image_width / 15
    fourth_column_start = image_width / 2
    fourth_column_end = fourth_column_start + image_width / 20
    column_width = (image_width - first_column_end - (fourth_column_end - fourth_column_start) - (image_width / 20)) / 4

    if 0 <= x_center < first_column_end:
        return "printed"
    elif first_column_end <= x_center < first_column_end + column_width:
        return "handwritten"
    elif first_column_end + column_width <= x_center < first_column_end + 2 * column_width:
        return "handwritten"
    elif fourth_column_start <= x_center < fourth_column_end:
        return "printed"
    elif fourth_column_end <= x_center < fourth_column_end + column_width:
        return "handwritten"
    elif fourth_column_end + column_width <= x_center < fourth_column_end + 2 * column_width:
        return "handwritten"
    return None

# Process each PNG file in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith('.png'):
        input_image_path = os.path.join(input_folder, filename)
        preprocessed_image_path = f'/kaggle/working/preprocessed_{filename}'

        # Step 1: Pre-Process the Image
        image = cv2.imread(input_image_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        filtered = cv2.bilateralFilter(gray, 9, 75, 75)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        contrast_enhanced = clahe.apply(filtered)
        cv2.imwrite(preprocessed_image_path, contrast_enhanced)

        # Step 2: Perform OCR
        ocr_result = ocr.ocr(preprocessed_image_path, cls=True)

        # Step 3: Annotate the Original Image
        annotated_image = Image.open(input_image_path).convert("P")  # Keep P mode
        draw = ImageDraw.Draw(annotated_image)
        font = ImageFont.load_default()

        # Step 4: Process OCR Results
        image_width = annotated_image.width
        box_number = 1

        for line_num, line in enumerate(ocr_result[0]):
            bbox = line[0]
            x_min = int(min(point[0] for point in bbox))
            y_min = int(min(point[1] for point in bbox))
            x_max = int(max(point[0] for point in bbox))
            y_max = int(max(point[1] for point in bbox))
            x_center = (x_min + x_max) / 2

            # Classify the column
            label = classify_column(x_center, image_width)
            if label:
                # Crop the text region
                cropped = image[y_min:y_max, x_min:x_max]

                # Save the cropped region
                output_subfolder = printed_folder if label == "printed" else handwritten_folder
                output_path = os.path.join(output_subfolder, f"{filename}_box{box_number}.png")
                cv2.imwrite(output_path, cropped)

                # Annotate the image
                int_bbox = [(int(point[0]), int(point[1])) for point in bbox]
                color = get_color(box_number)  # Get color for this box
                draw.polygon(int_bbox, outline=color)
                draw.text((x_min, y_min), f"{box_number}", fill=color)
                box_number += 1

        # Save the annotated image
        annotated_image_path = os.path.join(annotated_folder, f"annotated_{filename}")
        annotated_image.save(annotated_image_path)

print(f"Labeled data saved in: {output_folder}")
print(f"Annotated images saved in: {annotated_folder}")


In [None]:
#box labeling for yolo. expiremental
import os
import cv2
from paddleocr import PaddleOCR

# Define input and output paths
input_folder = '/kaggle/input/chess-dataset-notation/data'
output_folder = '/kaggle/working/labels_yolo'
images_folder = os.path.join(output_folder, 'images')
labels_folder = os.path.join(output_folder, 'labels')

# Create necessary directories
os.makedirs(images_folder, exist_ok=True)
os.makedirs(labels_folder, exist_ok=True)

# Initialize PaddleOCR
ocr = PaddleOCR(
    use_angle_cls=False,
    lang='en',
    det_db_box_thresh=0.4,
    det_db_unclip_ratio=1.1,
    drop_score=0.3,
    rec_image_shape="3, 48, 300",
    det_limit_side_len=2400
)

# Class IDs for YOLOv8
CLASS_IDS = {
    "printed": 0,      # Class ID 0 for printed text
    "handwritten": 1   # Class ID 1 for handwritten text
}

# Helper function to classify the column
def classify_column(x_center, image_width):
    first_column_end = image_width / 15
    fourth_column_start = image_width / 2
    fourth_column_end = fourth_column_start + image_width / 20
    column_width = (image_width - first_column_end - (fourth_column_end - fourth_column_start) - (image_width / 20)) / 4

    if 0 <= x_center < first_column_end:
        return "printed"
    elif first_column_end <= x_center < first_column_end + column_width:
        return "handwritten"
    elif first_column_end + column_width <= x_center < first_column_end + 2 * column_width:
        return "handwritten"
    elif fourth_column_start <= x_center < fourth_column_end:
        return "printed"
    elif fourth_column_end <= x_center < fourth_column_end + column_width:
        return "handwritten"
    elif fourth_column_end + column_width <= x_center < fourth_column_end + 2 * column_width:
        return "handwritten"
    return None

# Process each PNG file in the input folder
for filename in os.listdir(input_folder):
    if filename.endswith('.png'):
        input_image_path = os.path.join(input_folder, filename)

        # Step 1: Pre-Process the Image
        image = cv2.imread(input_image_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        filtered = cv2.bilateralFilter(gray, 9, 75, 75)
        clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
        contrast_enhanced = clahe.apply(filtered)

        # Save the preprocessed image in the images folder
        output_image_path = os.path.join(images_folder, filename)
        cv2.imwrite(output_image_path, contrast_enhanced)

        # Perform OCR on the preprocessed image
        ocr_result = ocr.ocr(output_image_path, cls=True)

        # Define top and bottom margins
        image_height, image_width = contrast_enhanced.shape
        top_margin = 25  # Ignore boxes above this Y-coordinate
        bottom_margin = image_height - 25  # Ignore boxes below this Y-coordinate

        # Initialize label file
        label_file_path = os.path.join(labels_folder, f"{os.path.splitext(filename)[0]}.txt")
        with open(label_file_path, 'w') as label_file:
            for line in ocr_result[0]:
                bbox = line[0]
                x_min = min(point[0] for point in bbox)
                y_min = min(point[1] for point in bbox)
                x_max = max(point[0] for point in bbox)
                y_max = max(point[1] for point in bbox)
                x_center = (x_min + x_max) / 2
                y_center = (y_min + y_max) / 2

                # Skip boxes outside the margins
                if y_center < top_margin or y_center > bottom_margin:
                    continue

                # Normalize coordinates
                x_center_norm = x_center / image_width
                y_center_norm = y_center / image_height
                width_norm = (x_max - x_min) / image_width
                height_norm = (y_max - y_min) / image_height

                # Classify the column and write to the label file
                label = classify_column(x_center, image_width)
                if label:
                    class_id = CLASS_IDS[label]
                    label_file.write(f"{class_id} {x_center_norm:.6f} {y_center_norm:.6f} {width_norm:.6f} {height_norm:.6f}\n")

print(f"Images and labels saved in: {output_folder}")


In [None]:
#some inceptionv3 testing. Not in use
inception_model = tf.keras.models.load_model('/kaggle/input/your-dataset-name/handwritten_classifier.h5')

# Step 5: Run the Pipeline on an Image
image_path = '/kaggle/input/chess-dataset-notation/data/011_0.png'
handwritten_texts = ocr_handwritten_boxes(image_path)

# Output the handwritten text
print("Handwritten Text Extracted:")
for text in handwritten_texts:
    print(text)


In [None]:
# Load the file into a Python list
with open('/kaggle/input/valid-moves/san_strings_with_symbols.txt', 'r') as file:
    chess_notations = file.read().splitlines()

In [None]:
#some training imports for metrics calculations
import os
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
import pandas as pd

In [None]:
# Load the labels file into a DataFrame(
dataset_path = '/kaggle/input/chess-dataset-notation/data'
labels_file = os.path.join(dataset_path, 'training_tags.txt')
labels_df = pd.read_csv(labels_file, sep=' ', header=None, names=['filename', 'label'])

# Add full paths to filenames
labels_df['filepath'] = labels_df['filename'].apply(lambda x: os.path.join(dataset_path, x))
print(labels_df)

In [None]:
#bad training labels
from sklearn.model_selection import train_test_split

training_labels_file = os.path.join(dataset_path, 'training_tags.txt')
testing_labels_file = os.path.join(dataset_path, 'testing_tags.txt')

# Load training labels
training_labels_df = pd.read_csv(training_labels_file, sep=' ', header=None, names=['filename', 'label'])
training_labels_df['filepath'] = training_labels_df['filename'].apply(lambda x: os.path.join(dataset_path, x))

# Load testing labels
testing_labels_df = pd.read_csv(testing_labels_file, sep=' ', header=None, names=['filename', 'label'])
testing_labels_df['filepath'] = testing_labels_df['filename'].apply(lambda x: os.path.join(dataset_path, x))


# Filter out classes with fewer than 2 instances
filtered_labels_df = training_labels_df.groupby('label').filter(lambda x: len(x) > 1)

# Perform the train-test split
train_df, val_df = train_test_split(
    f
    iltered_labels_df,
    test_size=0.1,
    random_state=42,
    stratify=filtered_labels_df['label']
)


In [None]:
#load chess-dataset-notation training tags
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt


import os

# Base directory for data
dataset_path = '/kaggle/input/chess-dataset-notation/data'

# Paths for images and labels
image_path = dataset_path  # Images are in the same folder
training_labels_file = os.path.join(dataset_path, 'training_tags.txt')
    

In [None]:
!pip install transformers datasets


In [None]:
# Clone the YOLOv5 repository
!git clone https://github.com/ultralytics/yolov5
%cd yolov5

# Install requirements
!pip install -r requirements.txt
!pip install torch torchvision pillow

In [None]:
!python3 -m pip install paddlepaddle-gpu==2.4.2.post112 -f https://www.paddlepaddle.org.cn/whl/linux/mkl/avx/stable.html

In [None]:
!python -m pip install paddlepaddle==2.4.2 -i https://mirror.baidu.com/pypi/simple

In [None]:
import paddle
paddle.utils.run_check()

In [None]:
!git clone https://github.com/PaddlePaddle/PaddleOCR.git

In [None]:
!ls 

In [None]:
!pip install tqdm imutils opencv-python matplotlib gdown

In [None]:
#download PPOCRdata.tar
import gdown

url = "https://drive.google.com/uc?id=19kSj_GswccuXk45yH87jTXLqpLTxaS67"
output = "PPOCRdata.tar"

gdown.download(url, output)

In [None]:
!ls -lh PaddleOCR

In [None]:
!python3 PaddleOCR/setup.py --help-commands

In [None]:
!tar -xf PPOCRdata.tar && rm -rf PPOCRdata.tar

In [None]:
import cv2 
import os 
#from imutils import perspective
import numpy as np 
import json
from tqdm import tqdm
import matplotlib.pyplot as plt
from imutils import perspective

In [1]:
    !pip install paddlepaddle
    !pip install paddleocr


Collecting paddlepaddle
  Downloading paddlepaddle-2.6.2-cp310-cp310-manylinux1_x86_64.whl.metadata (8.6 kB)
Collecting astor (from paddlepaddle)
  Downloading astor-0.8.1-py2.py3-none-any.whl.metadata (4.2 kB)
Downloading paddlepaddle-2.6.2-cp310-cp310-manylinux1_x86_64.whl (126.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m126.0/126.0 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m00:01[0m:00:01[0m
[?25hDownloading astor-0.8.1-py2.py3-none-any.whl (27 kB)
Installing collected packages: astor, paddlepaddle
Successfully installed astor-0.8.1 paddlepaddle-2.6.2
Collecting paddleocr
  Downloading paddleocr-2.9.1-py3-none-any.whl.metadata (8.5 kB)
Collecting lmdb (from paddleocr)
  Downloading lmdb-1.5.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.1 kB)
Collecting rapidfuzz (from paddleocr)
  Downloading rapidfuzz-3.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting python-docx (from paddleocr)
  Do

# ****extract text using paddleocr and save annotated file

In [18]:
#use this code to generate labels and annotation files for danny-chess-2
from paddleocr import PaddleOCR
from PIL import Image, ImageDraw, ImageFont
import os
import csv
import json

# Parameters
INPUT_FOLDER = '/kaggle/input/danny-chess-2/testing'  # Set input folder path
LABELS_FOLDER = '/kaggle/working/paddle_labels'
ANNOTATED_IMAGES_FOLDER = '/kaggle/working/paddle_annotated'
FONT_SIZE = 20  # Font size for annotations
BOX_MARGIN = 30  # Margin for expanding box coordinates
ANNOTATION_COLOR = "green"  # Color for annotations

# Initialize PaddleOCR with original parameters
ocr = PaddleOCR(
    use_angle_cls=False,           # Disable angle classification to detect and correct text orientation. Default: False
    lang='en',                     # Use the English language model for OCR. Default: 'en'
    det_db_box_thresh=0.4,         # Threshold for text box detection confidence; lower values may detect more boxes but increase noise. Default: 0.4
    det_db_unclip_ratio=1.5,       # Expansion ratio for detected text boxes; smaller values create tighter boxes around text. Default: 1.5
    drop_score=0.4,                # Minimum confidence score for retaining detected text; lower values allow less certain text results. Default: 0.5
    rec_image_shape="3, 48, 320",  # Input shape for the recognition model; specifies channels, height, and width of the input image. Default: "3, 48, 320"
    det_limit_side_len=960         # Maximum side length of the input image for detection; larger values allow processing of larger images. Default: 960
)

# Ensure output folders exist
os.makedirs(LABELS_FOLDER, exist_ok=True)
os.makedirs(ANNOTATED_IMAGES_FOLDER, exist_ok=True)

# Process all JPEG/JPG files in the input folder
for filename in os.listdir(INPUT_FOLDER):
    # Only process .jpeg or .jpg files
    if filename.lower().endswith(('.jpeg', '.jpg')):
        image_path = os.path.join(INPUT_FOLDER, filename)
        csv_path = os.path.join(LABELS_FOLDER, f"{filename}.csv")

        # Skip if the corresponding CSV file already exists
        if os.path.exists(csv_path):
            print(f"Skipping {filename} - CSV file already exists.")
            continue

        print(f"Processing {filename}...")

        # Perform OCR
        result = ocr.ocr(image_path, cls=True)

        # Load the image for annotation
        image = Image.open(image_path).convert('RGB')
        draw = ImageDraw.Draw(image)

        # Load font
        try:
            font = ImageFont.truetype("arial.ttf", size=FONT_SIZE)
        except IOError:
            font = ImageFont.load_default()

        # Extract white and black ranges
        white_ranges, black_ranges = [], []
        for res in result:
            for line in res:
                box = line[0]
                text = line[1][0].lower()
                x_min, x_max = box[0][0] - BOX_MARGIN, box[2][0] + BOX_MARGIN
                if text == "white":
                    white_ranges.append((x_min, x_max))
                elif text == "black":
                    black_ranges.append((x_min, x_max))

        # Define helper functions
        def calculate_x_center(box):
            return (box[0][0] + box[2][0]) / 2

        def is_within_any_range(x_center, ranges):
            return any(x_min <= x_center <= x_max for x_min, x_max in ranges)

        # Annotate image and save results
        preserved_boxes = []
        box_number = 1
        csv_data = []
        for res in result:
            for line in res:
                box = line[0]
                x_center = calculate_x_center(box)
                confidence = line[1][1] * 100
                text = line[1][0]

                # Draw annotations
                # Ensure coordinates are in the correct order
                x_min = int(min(box[0][0], box[2][0]))
                y_min = int(min(box[0][1], box[2][1]))
                x_max = int(max(box[0][0], box[2][0]))
                y_max = int(max(box[0][1], box[2][1]))
                
                # Draw annotations
                draw.rectangle([x_min, y_min, x_max, y_max], outline=ANNOTATION_COLOR, width=2)
                draw.text((x_min, y_min - 10), str(box_number), fill=ANNOTATION_COLOR, font=font)
                draw.rectangle([x_min, y_min, x_max, y_max], outline=ANNOTATION_COLOR, width=2)
                draw.text((x_min, y_min - 10), str(box_number), fill=ANNOTATION_COLOR, font=font)

                # Check ranges
                if is_within_any_range(x_center, white_ranges) or is_within_any_range(x_center, black_ranges):
                    preserved_boxes.append({
                        "box_number": box_number,
                        "x_center": x_center,
                        "text": text,
                        "confidence": confidence,
                        "coordinates": box
                    })
                    csv_data.append([box_number, confidence, json.dumps(box), text, text])
                    print(f"Box {box_number}, x-center: {x_center}, Text: {text}, Confidence: {confidence:.2f}%")
                
                box_number += 1

        # Save annotated image
        annotated_img_path = os.path.join(ANNOTATED_IMAGES_FOLDER, filename)
        image.save(annotated_img_path)
        print(f"Annotated image saved at: {annotated_img_path}")

        # Save CSV
        with open(csv_path, 'w', newline='', encoding='utf-8') as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(["Box Number", "Confidence", "Box Coordinates", "Original Text", "Label"])
            csvwriter.writerows(csv_data)

        print(f"Label CSV file saved at: {csv_path}")


[2024/11/24 17:53:53] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='/root/.paddleocr/whl/det/en/en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.4, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='/root/.paddleocr/whl/rec/en/en_PP-OCRv4_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batch_num=6, max_text_length=25, rec_c

zip annotated files for download

In [19]:
#zip annotated files and labels for download
!zip -r /kaggle/working/paddle_labels/paddle_labels.zip /kaggle/working/paddle_labels
!zip -r /kaggle/working/paddle_annotated/paddle_annotated.zip /kaggle/working/paddle_annotated

updating: kaggle/working/paddle_labels/ (stored 0%)
updating: kaggle/working/paddle_labels/2024-11-23 23-04.jpeg.csv (deflated 66%)
updating: kaggle/working/paddle_labels/2024-11-23 23-14.jpeg.csv (deflated 66%)
updating: kaggle/working/paddle_labels/2024-11-23 23-00.jpeg.csv (deflated 66%)
updating: kaggle/working/paddle_labels/2024-11-23 22-58.jpeg.csv (deflated 66%)
updating: kaggle/working/paddle_labels/2024-11-23 23-13.jpeg.csv (deflated 66%)
updating: kaggle/working/paddle_labels/2024-11-23 15-37 1.jpeg.csv (deflated 64%)
updating: kaggle/working/paddle_labels/2024-11-23 15-34.jpeg.csv (deflated 66%)
updating: kaggle/working/paddle_labels/2024-11-23 22-59.jpeg.csv (deflated 66%)
updating: kaggle/working/paddle_labels/034_0.png.csv (deflated 66%)
updating: kaggle/working/paddle_labels/{image_name}.csv (deflated 66%)
updating: kaggle/working/paddle_labels/2024-11-23 15-37.jpeg.csv (deflated 65%)
updating: kaggle/working/paddle_labels/2024-11-23 15-36.jpeg.csv (deflated 66%)
updatin

**Order moves using columns if there are no white and black columns**

In [None]:
#function organize_boxes_by_rows_and_columns
from PIL import Image

def organize_boxes_by_rows_and_columns(res, image):
    """
    Organize OCR-detected boxes into rows and calculate column widths dynamically using the image dimensions.
    Boxes in each row are sorted based on their x_center.

    Args:
        res (list): OCR result containing boxes, text, and confidence scores.
        image (Image): A Pillow Image object for calculating the image dimensions.

    Returns:
        dict: A dictionary with organized rows and calculated column widths.
    """
    if not res:
        print("No OCR results to process.")
        return {"rows": [], "column_widths": []}

    # Get the image dimensions
    image_width, image_height = image.size

    # Process OCR results and calculate x_center and y_center for each box
    boxes = []
    for idx, item in enumerate(res):
        # OCR result structure: [(box_coordinates, ("text", confidence))]
        box_coordinates = item[0]
        text, confidence = item[1]

        x_left, y_top = box_coordinates[0]
        x_right, y_bottom = box_coordinates[2]
        x_center = (x_left + x_right) / 2
        y_center = (y_top + y_bottom) / 2

        boxes.append({
            "box_number": idx + 1,
            "x_center": x_center,
            "y_center": y_center,
            "text": text,
            "confidence": confidence,
            "coordinates": [x_left, y_top, x_right, y_bottom]
        })

    # Sort boxes by y_center
    boxes = sorted(boxes, key=lambda b: b['y_center'])

    # Group boxes into rows
    rows = []
    current_row = []

    for box in boxes:
        if not current_row:
            # Start a new row with the first box
            current_row.append(box)
        else:
            # Determine the y_min and y_max of the current row
            y_min = min(b['coordinates'][1] for b in current_row)
            y_max = max(b['coordinates'][3] for b in current_row)

            if y_min <= box['y_center'] <= y_max:
                # Add the box to the current row if it fits
                current_row.append(box)
            else:
                # Finalize the current row and start a new one
                current_row = sorted(current_row, key=lambda b: b['x_center'])
                rows.append(current_row)
                current_row = [box]

    if current_row:  # Add the last row
        current_row = sorted(current_row, key=lambda b: b['x_center'])
        rows.append(current_row)

    # Adjust x_right for the first row
    first_row = rows[0] if rows else []
    for box in first_row:
        box['coordinates'][2] += 10  # Add 10 pixels to x_right

    # Calculate column widths based on the first row
    column_widths = []
    if first_row:
        for idx, box in enumerate(first_row):
            x_max = box['coordinates'][2]  # x_max of the current box
            if idx == 0:
                # First column starts at 0
                column_widths.append((0, x_max + 10))
            elif idx == len(first_row) - 1:
                # Last column ends at image width
                column_widths.append((column_widths[-1][1], image_width))
            else:
                # Intermediate columns
                prev_x_max = first_row[idx - 1]['coordinates'][2]
                column_widths.append((prev_x_max + 10, x_max + 5))

    return {"rows": rows, "column_widths": column_widths}

def exclude_columns(rows, column_widths, excluded_columns):
    """
    Exclude boxes that fall into specific columns based on column widths.

    Args:
        rows (list): List of rows, where each row contains boxes.
        column_widths (list): List of column boundaries as tuples (start, end).
        excluded_columns (list): List of column indices to exclude (1-based).

    Returns:
        list: A flat list of all boxes except those in the excluded columns.
    """
    excluded_boxes = []

    # Convert 1-based column indices to 0-based for indexing
    excluded_columns = [col - 1 for col in excluded_columns]

    for row in rows:
        for box in row:
            # Determine the column of the current box based on its x_center
            for col_index, (start, end) in enumerate(column_widths):
                if start <= box["x_center"] < end:
                    if col_index not in excluded_columns:
                        excluded_boxes.append(box)
                    break

    return excluded_boxes

In [None]:
#run organize_boxes_by_rows_and_columns(res, image)
result = organize_boxes_by_rows_and_columns(res, image)
print (len(result["rows"]))
print ("first row:",result["rows"][0])
print ("first row count:",len(result["rows"][0]))
print("columns",result["column_widths"])
print("columns count:",len(result["column_widths"]))

# Assume `rows` and `column_widths` are returned by the organize_boxes_by_rows_and_columns function.
excluded_columns = [1, 4]  # Exclude columns 1 and 4
preserved_boxes = exclude_columns(result["rows"], result["column_widths"], excluded_columns)

# Print remaining boxes
print(preserved_boxes[0])

In [None]:
#order chess moves for preserved_boxes variable
# Step 0: Calculate y_center for each box
for box in preserved_boxes:
    # Check if 'y_center' already exists; calculate only if it's missing
    if "y_center" not in box:
        # Calculate the y_center as the midpoint of the y-coordinates
        box["y_center"] = (box["coordinates"][0][1] + box["coordinates"][2][1]) / 2
# Step 1: Group boxes into rows by y_center
rows = []  # List of rows
for box in preserved_boxes:
    added_to_row = False 
for row in rows:
    # Check if the row already has 'row_y_min' and 'row_y_max'
    if "row_y_min" not in row[0] or "row_y_max" not in row[0]:
        # Calculate row_y_min and row_y_max only if missing
        row_y_min = min(b["coordinates"][0][1] for b in row)
        row_y_max = max(b["coordinates"][2][1] for b in row)

        # Assign to each box in the row
        for b in row:
            b["row_y_min"] = row_y_min
            b["row_y_max"] = row_y_max
        if row_y_min <= box["y_center"] <= row_y_max:
            row.append(box)
            added_to_row = True
            break
    if not added_to_row:
        rows.append([box])  # Start a new row

# Step 2: Sort each row by x_center
for row in rows:
    row.sort(key=lambda b: b["x_center"])

# Step 3: Split rows into two lists
first_list = []  # Rows with up to 2 elements
second_list = []  # Overflow rows with more than 2 elements

for row in rows:
    if len(row) <= 2:
        first_list.append(row)
    else:
        # Add the first two elements to the first list
        first_list.append(row[:2])
        # Add the remaining elements as a new row in the second list
        overflow = row[2:]
        # Exclude rows with "WHITE" or "BLACK" in the text from the second list
        filtered_overflow = [box for box in overflow if box["text"].lower() not in ["white", "black"]]
        if filtered_overflow:
            second_list.append(filtered_overflow)

# Step 4: Print the results
print("First List (Rows with up to 2 elements):")
for i, row in enumerate(first_list):
    print(f"Row {i + 1}:")
    for box in row:
        y_coords = [box["coordinates"][0][1], box["coordinates"][2][1]]
        print(f"  Box {box['box_number']}, x_center: {box['x_center']:.2f}, y_center: {box['y_center']:.2f}, "
              f"y_coords: {y_coords}, Text: {box['text']}, Confidence: {box['confidence']:.2f}%")

print("\nSecond List (Overflow Rows):")
for i, row in enumerate(second_list):
    print(f"Row {i + 1}:")
    for box in row:
        y_coords = [box["coordinates"][0][1], box["coordinates"][2][1]]
        print(f"  Box {box['box_number']}, x_center: {box['x_center']:.2f}, y_center: {box['y_center']:.2f}, "
              f"y_coords: {y_coords}, Text: {box['text']}, Confidence: {box['confidence']:.2f}%")


# **Calculate Accuracy based on Original text and label. 100% means that labels were not cleaned up**
OCR accuracy before additional model training (paddleocr):
2024-11-23 23-00.jpeg.csv      | Total_Rows: 130  | Matched_Rows: 103  | Total_Accuracy:  79.23 | Chess_Moves: 82   | Chess_Move_Accuracy: 62  | Move_Accuracy:  75.61
2024-11-23 22-59.jpeg.csv      | Total_Rows: 104  | Matched_Rows: 95   | Total_Accuracy:  91.35 | Chess_Moves: 64   | Chess_Move_Accuracy: 60  | Move_Accuracy:  93.75
2024-11-23 15-36.jpeg.csv      | Total_Rows: 119  | Matched_Rows: 105  | Total_Accuracy:  88.24 | Chess_Moves: 74   | Chess_Move_Accuracy: 64  | Move_Accuracy:  86.49
2024-11-23 23-13.jpeg.csv      | Total_Rows: 130  | Matched_Rows: 117  | Total_Accuracy:  90.00 | Chess_Moves: 82   | Chess_Move_Accuracy: 72  | Move_Accuracy:  87.80
2024-11-23 23-04.jpeg.csv      | Total_Rows: 157  | Matched_Rows: 128  | Total_Accuracy:  81.53 | Chess_Moves: 100  | Chess_Move_Accuracy: 79  | Move_Accuracy:  79.00
2024-11-23 15-35.jpeg.csv      | Total_Rows: 97   | Matched_Rows: 89   | Total_Accuracy:  91.75 | Chess_Moves: 60   | Chess_Move_Accuracy: 54  | Move_Accuracy:  90.00
2024-11-23 23-05.jpeg.csv      | Total_Rows: 112  | Matched_Rows: 99   | Total_Accuracy:  88.39 | Chess_Moves: 70   | Chess_Move_Accuracy: 62  | Move_Accuracy:  88.57
2024-11-23 15-36 1.jpeg.csv    | Total_Rows: 166  | Matched_Rows: 135  | Total_Accuracy:  81.33 | Chess_Moves: 106  | Chess_Move_Accuracy: 86  | Move_Accuracy:  81.13
2024-11-23 15-34.jpeg.csv      | Total_Rows: 98   | Matched_Rows: 93   | Total_Accuracy:  94.90 | Chess_Moves: 60   | Chess_Move_Accuracy: 57  | Move_Accuracy:  95.00
2024-11-23 22-58.jpeg.csv      | Total_Rows: 110  | Matched_Rows: 99   | Total_Accuracy:  90.00 | Chess_Moves: 68   | Chess_Move_Accuracy: 60  | Move_Accuracy:  88.24
2024-11-23 23-00 1.jpeg.csv    | Total_Rows: 108  | Matched_Rows: 84   | Total_Accuracy:  77.78 | Chess_Moves: 67   | Chess_Move_Accuracy: 53  | Move_Accuracy:  79.10
2024-11-23 23-14.jpeg.csv      | Total_Rows: 108  | Matched_Rows: 95   | Total_Accuracy:  87.96 | Chess_Moves: 67   | Chess_Move_Accuracy: 59  | Move_Accuracy:  88.06
2024-11-23 15-37 1.jpeg.csv    | Total_Rows: 159  | Matched_Rows: 114  | Total_Accuracy:  71.70 | Chess_Moves: 101  | Chess_Move_Accuracy: 72  | Move_Accuracy:  71.29
2024-11-23 22-59 1.jpeg.csv    | Total_Rows: 107  | Matched_Rows: 92   | Total_Accuracy:  85.98 | Chess_Moves: 66   | Chess_Move_Accuracy: 58  | Move_Accuracy:  87.88
2024-11-23 22-57.jpeg.csv      | Total_Rows: 160  | Matched_Rows: 132  | Total_Accuracy:  82.50 | Chess_Moves: 102  | Chess_Move_Accuracy: 80  | Move_Accuracy:  78.43

Accuracy results saved to: /kaggle/working/accuracy/accuracy_revision_4.csvy_revision_1.
c2024-11-24 10-10 1.jpeg.csv    | Total_Rows: 108  | Matched_Rows: 37   | Total_Accuracy:  34.26 | Chess_Moves: 108  | Chess_Move_Accuracy: 37  | Move_Accuracy:  34.26
2024-11-24 10-11 1.jpeg.csv    | Total_Rows: 73   | Matched_Rows: 24   | Total_Accuracy:  32.88 | Chess_Moves: 50   | Chess_Move_Accuracy: 21  | Move_Accuracy:  42.00
2024-11-24 10-12 1.jpeg.csv    | Total_Rows: 48   | Matched_Rows: 18   | Total_Accuracy:  37.50 | Chess_Moves: 37   | Chess_Move_Accuracy: 14  | Move_Accuracy:  37.84
2024-11-24 10-13 2.jpeg.csv    | Total_Rows: 109  | Matched_Rows: 39   | Total_Accuracy:  35.78 | Chess_Moves: 77   | Chess_Move_Accuracy: 31  | Move_Accuracy:  40.26
20241124-1012.jpg.csv          | Total_Rows: 38   | Matched_Rows: 32   | Total_Accuracy:  84.21 | Chess_Moves: 27   | Chess_Move_Accuracy: 21  | Move_Accuracy:  77.78

Accuracy results saved to: /kaggle/working/accuracy/accuracy_revision_5.csv: 27         | Move_Accuracy:  77.78curacy:  80.90

In [39]:
#calculate accuracy. Requires human check for label column in csv files and update of the dataset
import pandas as pd
import os

# Directory path containing the CSV files
directory_path = '/kaggle/input/danny-chess-2/testing'  # Replace with your folder path
output_directory = '/kaggle/working/accuracy'

# Ensure the output directory exists
os.makedirs(output_directory, exist_ok=True)

# Initialize variables for combined metrics
combined_total_rows = 0
combined_matches = 0
combined_preserved_rows = 0
combined_preserved_matches = 0

# Create a list to store file-specific results for CSV output
csv_output = []

# Iterate through all CSV files in the directory
for file_name in os.listdir(directory_path):
    if file_name.endswith('.csv'):  # Process only CSV files
        file_path = os.path.join(directory_path, file_name)
        
        try:
            data = pd.read_csv(file_path)
        except Exception as e:
            print(f"Error reading {file_name}: {e}")
            continue

        # Check for required column
        if 'Box Number' not in data.columns:
            print(f"Skipping {file_name}: 'Box Number' column not found.")
            continue

        # Calculate Total Accuracy for the current file
        data['Match'] = data['Original Text'] == data['Label']
        total_rows = len(data)
        matches = data['Match'].sum()
        total_accuracy = (matches / total_rows) * 100 if total_rows > 0 else 0

        # Filter rows for preserved boxes
        preserved_box_numbers = [box["box_number"] for box in preserved_boxes]  # Replace preserved_boxes dynamically
        preserved_data = data[data['Box Number'].isin(preserved_box_numbers)]
        preserved_matches = (preserved_data['Original Text'] == preserved_data['Label']).sum()
        preserved_rows = len(preserved_data)
        move_accuracy = (preserved_matches / preserved_rows) * 100 if preserved_rows > 0 else 0

        # Print results for the current file in table format
        print(f"{file_name:30} | Total_Rows: {total_rows:<4} | Matched_Rows: {matches:<4} | Total_Accuracy: {total_accuracy:6.2f} | Chess_Moves: {preserved_rows:<4} | Chess_Move_Accuracy: {preserved_matches:<4}| Move_Accuracy: {move_accuracy:6.2f}")

        # Add results to the CSV output list
        csv_output.append({
            "Filename": file_name,
            "Total_Rows": total_rows,
            "Matched_Rows": matches,
            "Total_Accuracy": total_accuracy,
            "Chess_Moves": preserved_rows,
            "Chess_Move_Accuracy": preserved_matches,
            "Move_Accuracy": move_accuracy
        })


# Find the next available revision number for the output file
revision = 1
while os.path.exists(os.path.join(output_directory, f"accuracy_revision_{revision}.csv")):
    revision += 1
output_file_path = os.path.join(output_directory, f"accuracy_revision_{revision}.csv")

# Save results to a CSV file
csv_df = pd.DataFrame(csv_output)
csv_df.to_csv(output_file_path, index=False)
print(f"\nAccuracy results saved to: {output_file_path}")


2024-11-24 10-10 1.jpeg.csv    | Total_Rows: 108  | Matched_Rows: 37   | Total_Accuracy:  34.26 | Chess_Moves: 108  | Chess_Move_Accuracy: 37  | Move_Accuracy:  34.26
2024-11-24 10-11 1.jpeg.csv    | Total_Rows: 73   | Matched_Rows: 24   | Total_Accuracy:  32.88 | Chess_Moves: 50   | Chess_Move_Accuracy: 21  | Move_Accuracy:  42.00
2024-11-24 10-12 1.jpeg.csv    | Total_Rows: 48   | Matched_Rows: 18   | Total_Accuracy:  37.50 | Chess_Moves: 37   | Chess_Move_Accuracy: 14  | Move_Accuracy:  37.84
2024-11-24 10-13 2.jpeg.csv    | Total_Rows: 109  | Matched_Rows: 39   | Total_Accuracy:  35.78 | Chess_Moves: 77   | Chess_Move_Accuracy: 31  | Move_Accuracy:  40.26
20241124-1012.jpg.csv          | Total_Rows: 38   | Matched_Rows: 32   | Total_Accuracy:  84.21 | Chess_Moves: 27   | Chess_Move_Accuracy: 21  | Move_Accuracy:  77.78

Accuracy results saved to: /kaggle/working/accuracy/accuracy_revision_5.csv


Accuracy code as function

In [40]:
import pandas as pd
import os

def calculate_accuracy(directory_path, output_directory, preserved_boxes, epoch):
    """
    Calculate accuracy metrics for OCR results and display combined metrics in table format.
    
    Args:
        directory_path (str): Path to the directory containing CSV files.
        output_directory (str): Path to save accuracy results.
        preserved_boxes (list): List of preserved box numbers.
        epoch (int): The current training epoch number.
    
    Returns:
        str: Path to the saved combined accuracy results CSV file.
    """
    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)

    # Initialize variables for combined metrics
    combined_total_rows = 0
    combined_matches = 0
    combined_preserved_rows = 0
    combined_preserved_matches = 0

    # Create a list to store file-specific results for CSV output
    csv_output = []

    # Iterate through all CSV files in the directory
    for file_name in os.listdir(directory_path):
        if file_name.endswith('.csv'):  # Process only CSV files
            file_path = os.path.join(directory_path, file_name)
            
            try:
                data = pd.read_csv(file_path)
            except Exception as e:
                print(f"Error reading {file_name}: {e}")
                continue

            # Check for required column
            if 'Box Number' not in data.columns:
                print(f"Skipping {file_name}: 'Box Number' column not found.")
                continue

            # Calculate Total Accuracy for the current file
            data['Match'] = data['Original Text'] == data['Label']
            total_rows = len(data)
            matches = data['Match'].sum()
            total_accuracy = (matches / total_rows) * 100 if total_rows > 0 else 0

            # Filter rows for preserved boxes
            preserved_data = data[data['Box Number'].isin(preserved_boxes)]
            preserved_matches = (preserved_data['Original Text'] == preserved_data['Label']).sum()
            preserved_rows = len(preserved_data)
            move_accuracy = (preserved_matches / preserved_rows) * 100 if preserved_rows > 0 else 0

            # Print results for the current file in table format
            print(f"{file_name:28} | Total_Rows: {total_rows:<4} | Matched_Rows: {matches:<4} | Total_Accuracy: {total_accuracy:6.2f} | Chess_Moves: {preserved_rows:<4} | Chess_Move_Accuracy: {preserved_matches:<4}| Move_Accuracy: {move_accuracy:6.2f}")

            # Add results to the CSV output list
            csv_output.append({
                "Filename": file_name,
                "Total_Rows": total_rows,
                "Matches_Total_Accuracy": matches,
                "Total_Accuracy": total_accuracy,
                "Chess_Moves": preserved_rows,
                "Matches_Move_Accuracy": preserved_matches,
                "Move_Accuracy": move_accuracy
            })

            # Update combined metrics
            combined_total_rows += total_rows
            combined_matches += matches
            combined_preserved_rows += preserved_rows
            combined_preserved_matches += preserved_matches

    # Calculate combined metrics
    combined_total_accuracy = (combined_matches / combined_total_rows) * 100 if combined_total_rows > 0 else 0
    combined_move_accuracy = (combined_preserved_matches / combined_preserved_rows) * 100 if combined_preserved_rows > 0 else 0

    # Print combined metrics in the specified structure
    print("\nCombined Metrics:")
    print(f"{'Combined Total Rows':5} | Total_Rows: {combined_total_rows:<5} | Matches: {combined_matches:<5} | Total_Accuracy: {combined_total_accuracy:6.2f} | Chess_Moves: {combined_preserved_rows:<5} | Move_Accuracy: {combined_move_accuracy:6.2f}")

    # Save combined metrics to a separate CSV file for the epoch
    combined_output_file = os.path.join(output_directory, f"training_epoch{epoch}.csv")
    combined_metrics = {
        "Metric": ["Total Rows", "Total Matched Rows", "Total Accuracy", "Chess Moves", "Chess Move Matches", "Move Accuracy"],
        "Value": [combined_total_rows, combined_matches, combined_total_accuracy, combined_preserved_rows, combined_preserved_matches, combined_move_accuracy]
    }
    combined_df = pd.DataFrame(combined_metrics)
    combined_df.to_csv(combined_output_file, index=False)
    print(f"\nCombined metrics saved to: {combined_output_file}")

    # Save results to a CSV file for individual files
    revision = 1
    while os.path.exists(os.path.join(output_directory, f"accuracy_revision_{revision}.csv")):
        revision += 1
    output_file_path = os.path.join(output_directory, f"accuracy_revision_{revision}.csv")
    csv_df = pd.DataFrame(csv_output)
    csv_df.to_csv(output_file_path, index=False)
    print(f"\nFile-specific accuracy results saved to: {output_file_path}")

    return combined_output_file


# ***Paddleocr recognition training***

In [41]:
import os
import pandas as pd
import paddle

from paddleocr import PaddleOCRModel

# Paths
image_dir = "/kaggle/input/danny-chess-2"
label_dir = "/kaggle/input/danny-chess-2"
output_dir = "/kaggle/working/paddleocr_training_output"
testing_dir = "/kaggle/input/danny-chess-2/testing"
accuracy_dir = "/kaggle/working/accuracy"

# Ensure output directories exist
os.makedirs(output_dir, exist_ok=True)
os.makedirs(accuracy_dir, exist_ok=True)

# Check for GPU availability
use_gpu = paddle.device.is_compiled_with_cuda()
print(f"GPU Detected: {use_gpu}")

# Prepare Dataset
def load_csv_dataset(image_dir, label_dir):
    dataset = []
    for csv_file in os.listdir(label_dir):
        if csv_file.endswith(".csv"):
            image_name = csv_file.replace(".csv", "")
            image_path = os.path.join(image_dir, image_name)
            if os.path.exists(image_path):
                labels = pd.read_csv(os.path.join(label_dir, csv_file))
                for _, row in labels.iterrows():
                    dataset.append({
                        "image": image_path,
                        "coordinates": [
                            [row["x_min"], row["y_min"]],
                            [row["x_max"], row["y_min"]],
                            [row["x_max"], row["y_max"]],
                            [row["x_min"], row["y_max"]]
                        ],
                        "label": row["label"]
                    })
    return dataset

# Load training dataset
training_dataset = load_csv_dataset(image_dir, label_dir)

# Define and Configure PaddleOCR Model
ocr_model = PaddleOCRModel(
    rec_model_dir="/kaggle/working/en_PP-OCRv3_rec_infer",  # Replace with your PaddleOCR model path or use default
    det_model_dir="/kaggle/working/en_PP-OCRv3_det_infer",  # Replace with your PaddleOCR detection model path
    use_gpu=use_gpu  # Dynamically set GPU usage
)

# Accuracy Calculation Function
def calculate_accuracy(directory_path, output_directory):
    os.makedirs(output_directory, exist_ok=True)
    csv_output = []

    # Iterate through all CSV files in the directory
    for file_name in os.listdir(directory_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(directory_path, file_name)
            try:
                data = pd.read_csv(file_path)
            except Exception as e:
                print(f"Error reading {file_name}: {e}")
                continue

            if 'Box Number' not in data.columns:
                print(f"Skipping {file_name}: 'Box Number' column not found.")
                continue

            data['Match'] = data['Original Text'] == data['Label']
            total_rows = len(data)
            matches = data['Match'].sum()
            total_accuracy = (matches / total_rows) * 100 if total_rows > 0 else 0

            preserved_data = data[data['Box Number'].isin(range(1, 10))]  # Example for preserved box numbers
            preserved_matches = (preserved_data['Original Text'] == preserved_data['Label']).sum()
            preserved_rows = len(preserved_data)
            move_accuracy = (preserved_matches / preserved_rows) * 100 if preserved_rows > 0 else 0

            csv_output.append({
                "Filename": file_name,
                "Total_Rows": total_rows,
                "Matches_Total_Accuracy": matches,
                "Total_Accuracy": total_accuracy,
                "Chess_Moves": preserved_rows,
                "Matches_Move_Accuracy": preserved_matches,
                "Move_Accuracy": move_accuracy
            })

    revision = 1
    while os.path.exists(os.path.join(output_directory, f"accuracy_revision_{revision}.csv")):
        revision += 1
    output_file_path = os.path.join(output_directory, f"accuracy_revision_{revision}.csv")
    pd.DataFrame(csv_output).to_csv(output_file_path, index=False)
    print(f"Accuracy results saved to: {output_file_path}")
    return output_file_path

# Training Function with Accuracy Calculation
def train_model(training_dataset, testing_dir, ocr_model, epochs=10, batch_size=16):
    for epoch in range(epochs):
        total_loss = 0
        correct_predictions = 0
        total_samples = 0

        for data in training_dataset:
            # Simulate training process
            image = data["image"]
            coordinates = data["coordinates"]
            label = data["label"]

            # Perform training step here
            # (In PaddleOCR, this is handled internally. Mock-up below.)
            loss, predicted_label = ocr_model.train_step(image, coordinates, label)
            total_loss += loss
            if predicted_label == label:
                correct_predictions += 1
            total_samples += 1

        # Metrics calculation
        accuracy = correct_predictions / total_samples
        avg_loss = total_loss / total_samples
        print(f"Epoch {epoch+1}/{epochs}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}")

        # Call calculate_accuracy for testing dataset
        accuracy_file = calculate_accuracy(testing_dir, accuracy_dir)
        print(f"Validation accuracy saved at: {accuracy_file}")

# Train model
train_model(training_dataset, testing_dir, ocr_model, epochs=10)


ImportError: cannot import name 'PaddleOCRModel' from 'paddleocr' (/opt/conda/lib/python3.10/site-packages/paddleocr/__init__.py)

In [50]:
import os
import pandas as pd
import glob
import ast

def normalize_name(name):
    """Normalize file name for consistent matching."""
    return os.path.splitext(name.replace(" ", "").lower())[0]

def generate_detection_annotations(image_dir, output_txt_path):
    """
    Generate detection annotations for all images and their corresponding CSV files in a directory.

    Args:
        image_dir (str): Directory containing images and their associated CSV files.
        output_txt_path (str): Path to save the generated annotations in train.txt or test.txt format.
    """
    annotation_lines = []

    # Normalize all image files for comparison
    image_files = {
        normalize_name(file): file
        for file in os.listdir(image_dir)
        if file.lower().endswith(('.jpeg', '.jpg', '.png'))
    }

    # Find all CSV files in the directory
    csv_files = glob.glob(os.path.join(image_dir, "*.csv"))

    for csv_file in csv_files:
        # Determine the base name (normalized) for matching
        base_name = normalize_name(os.path.basename(csv_file).replace(".csv", ""))

        # Find the corresponding image file
        image_file = image_files.get(base_name)
        if not image_file:
            print(f"Image not found for CSV: {csv_file}, skipping.")
            continue

        # Construct the full image path
        image_path = os.path.join(image_dir, image_file)

        # Read the CSV and generate annotations
        try:
            data = pd.read_csv(csv_file)
            annotations = []
            for _, row in data.iterrows():
                # Parse box coordinates and transcription
                box_coordinates = ast.literal_eval(row["Box Coordinates"])
                annotations.append({
                    "transcription": row["Label"],
                    "points": box_coordinates
                })
            # Append the annotation line for this image
            annotation_lines.append(f'"{image_path}"\t{annotations}')
        except Exception as e:
            print(f"Error processing {csv_file}: {e}")

    # Save all annotations to the output file
    with open(output_txt_path, "w") as f:
        f.write("\n".join(annotation_lines))
    print(f"Annotations saved to: {output_txt_path}")

# Specify the training and testing directories and output paths
train_image_dir = "/kaggle/input/danny-chess-2"
test_image_dir = "/kaggle/input/danny-chess-2/testing"
train_output_path = "/kaggle/working/train.txt"
test_output_path = "/kaggle/working/test.txt"

# Generate annotations for training and testing datasets
generate_detection_annotations(train_image_dir, train_output_path)
generate_detection_annotations(test_image_dir, test_output_path)


Annotations saved to: /kaggle/working/train.txt
Annotations saved to: /kaggle/working/test.txt


Convert the data in the dataset to ICDAR2015 format for PaddleOCR training...

In [53]:
import os
import pandas as pd
import ast

# Define source and destination paths
source_folder_training = '/kaggle/input/danny-chess-2'
source_folder_testing = '/kaggle/input/danny-chess-2/testing'
output_folder = '/kaggle/working/danny-chess-2/icdar2015'

# Create output folder structure
output_training_images = os.path.join(output_folder, 'ch4_training_images')
output_training_gt = os.path.join(output_folder, 'ch4_training_localization_transcription_gt')
output_test_images = os.path.join(output_folder, 'ch4_test_images')
output_test_gt = os.path.join(output_folder, 'ch4_test_gt')

os.makedirs(output_training_images, exist_ok=True)
os.makedirs(output_training_gt, exist_ok=True)
os.makedirs(output_test_images, exist_ok=True)
os.makedirs(output_test_gt, exist_ok=True)

def process_dataset(source_folder, output_images_folder, output_gt_folder):
    # Process all CSV files in the source folder
    for root, _, files in os.walk(source_folder):
        for file in files:
            if file.endswith('.csv'):
                csv_path = os.path.join(root, file)
                
                # Use the full name of the image (including .jpeg or .jpg)
                image_name = file[:-4]  # Remove '.csv' from the end of the file name
                
                # Check if the corresponding image exists
                image_path = os.path.join(root, image_name)
                if not os.path.exists(image_path):
                    continue
                
                # Copy the image to the output images folder
                output_image_path = os.path.join(output_images_folder, image_name)
                if not os.path.exists(output_image_path):
                    os.system(f"cp '{image_path}' '{output_image_path}'")
                
                # Read the CSV file
                data = pd.read_csv(csv_path)
                
                # Prepare the ground truth file
                base_name = os.path.splitext(image_name)[0]  # Strip the extension (e.g., .jpeg, .jpg)
                gt_file_path = os.path.join(output_gt_folder, f"gt_{base_name}.txt")
                with open(gt_file_path, 'w') as gt_file:
                    for _, row in data.iterrows():
                        # Parse the box coordinates
                        box_coordinates = ast.literal_eval(row['Box Coordinates'])
                        flattened_coords = [str(int(coord)) for point in box_coordinates for coord in point]
                        
                        # Combine coordinates and text
                        text = row['Original Text'] if pd.notna(row['Original Text']) else "###"
                        gt_file.write(','.join(flattened_coords) + f',"{text}"\n')

# Process training data
process_dataset(source_folder_training, output_training_images, output_training_gt)

# Process test data
process_dataset(source_folder_testing, output_test_images, output_test_gt)

print("ICDAR2015 folder structure created successfully for both training and testing datasets.")


ICDAR2015 folder structure created successfully for both training and testing datasets.


In [57]:
import yaml

# Path to the existing YAML file
yaml_path = "/kaggle/working/det_mv3_db.yml"

# Load the YAML file
with open(yaml_path, 'r') as file:
    config = yaml.safe_load(file)

# Ensure 'dataset' and 'output' keys exist
if 'dataset' not in config:
    config['dataset'] = {}
if 'output' not in config:
    config['output'] = {}

# Modify paths
config['dataset']['training_images'] = "/kaggle/working/danny-chess-2/icdar2015/ch4_training_images"
config['dataset']['training_annotations'] = "/kaggle/working/danny-chess-2/icdar2015/ch4_training_localization_transcription_gt"
config['dataset']['test_images'] = "/kaggle/working/danny-chess-2/icdar2015/ch4_test_images"
config['dataset']['test_annotations'] = "/kaggle/working/danny-chess-2/icdar2015/ch4_test_gt"
config['output']['logs'] = "/kaggle/working/output/logs"
config['output']['models'] = "/kaggle/working/output/models"

# Save the modified YAML file
with open(yaml_path, 'w') as file:
    yaml.dump(config, file)

print(f"Updated YAML file saved to {yaml_path}")


Updated YAML file saved to /kaggle/working/det_mv3_db.yml


In [101]:
# Path to the train.sh file
train_sh_path = '/kaggle/working/PaddleOCR/train.sh'

# Updated content for the train.sh file
new_content = """#!/bin/bash
python3 /kaggle/working/PaddleOCR/tools/train.py -c /kaggle/working/det_mv3_db.yml
python3 /kaggle/working/PaddleOCR/tools/train.py -c /kaggle/working/rec_mv3_none_bilstm_ctc.yml
"""

# Write the new content to the train.sh file
with open(train_sh_path, 'w') as file:
    file.write(new_content)

print(f"Updated {train_sh_path}")


Updated /kaggle/working/PaddleOCR/train.sh


In [85]:
with open('/kaggle/working/PaddleOCR/train.sh', 'r') as file:
    print(file.read())
!chmod +x /kaggle/working/PaddleOCR/train.sh
file_path = "/kaggle/working/PaddleOCR/tools/program.py"

# Read the file
with open(file_path, 'r') as file:
    lines = file.readlines()

# Replace the problematic line
lines = [
    line.replace("dist.ParallelEnv().dev_id", "dist.ParallelEnv().device_id")
    if "dist.ParallelEnv().dev_id" in line else line
    for line in lines
]

# Write the updated file
with open(file_path, 'w') as file:
    file.writelines(lines)

print(f"Updated {file_path} to replace 'dev_id' with 'device_id'")

#!/bin/bash
python3 /kaggle/working/PaddleOCR/tools/train.py -c /kaggle/working/det_mv3_db.yml

Updated /kaggle/working/PaddleOCR/tools/program.py to replace 'dev_id' with 'device_id'


In [98]:
#update detection yml file
import yaml

# Path to the YAML file
yaml_path = "/kaggle/working/det_mv3_db.yml"

# Load the YAML file
with open(yaml_path, 'r') as file:
    config = yaml.safe_load(file)

# Ensure 'Global', 'Train', 'Optimizer', and 'PostProcess' sections exist
config.setdefault('Global', {})
config.setdefault('Train', {})
config.setdefault('Optimizer', {})
config.setdefault('PostProcess', {})
config['Train'].setdefault('loader', {})
config['Optimizer'].setdefault('lr', {})

# Update 'Global' section
config['Global']['epoch_num'] = 100
config['Global']['cal_metric_during_train'] = True
config['Global']['save_epoch_step'] = 20
config['Global']['eval_batch_step'] = [0, 50]
config['Global']['pretrained_model'] = './pretrain_models/MobileNetV3_large_x0_5_pretrained'

# Update 'Train' loader settings
config['Train']['loader']['batch_size_per_card'] = 2
config['Train']['loader']['shuffle'] = True
config['Train']['loader']['drop_last'] = False

# Update 'Optimizer' settings
config['Optimizer']['lr']['name'] = 'Cosine'
config['Optimizer']['lr']['learning_rate'] = 0.0005
config['Optimizer']['lr']['warmup_epoch'] = 5
config['Optimizer']['regularizer'] = {'name': 'L2', 'factor': 0}

# Update 'PostProcess' settings
config['PostProcess']['box_thresh'] = 0.5
config['PostProcess']['unclip_ratio'] = 1.7

# Update 'Architecture' settings for small dataset
config.setdefault('Architecture', {})
config['Architecture'].setdefault('Backbone', {})
config['Architecture'].setdefault('Neck', {})
config['Architecture']['Backbone']['model_name'] = 'large'
config['Architecture']['Backbone']['name'] = 'MobileNetV3'
config['Architecture']['Backbone']['scale'] = 0.5  # Keep this as 0.5 for now
config['Architecture']['Neck']['name'] = 'DBFPN'
config['Architecture']['Neck']['out_channels'] = 128

# Save the modified YAML file
with open(yaml_path, 'w') as file:
    yaml.dump(config, file, default_flow_style=False)

print(f"Updated {yaml_path} with recommended settings for a small dataset.")


Updated /kaggle/working/det_mv3_db.yml with recommended settings for a small dataset.


In [102]:
#!cat  "/kaggle/working/det_mv3_db.yml"
#!find /kaggle/working/ -name "rec_mv3_none_bilstm_ctc.yml
#!cp /kaggle/working/PaddleOCR/configs/rec/rec_mv3_none_bilstm_ctc.yml /kaggle/working/rec_mv3_none_bilstm_ctc.yml
#!cat /kaggle/working/PaddleOCR/train.sh
#!ls /kaggle/working/PaddleOCR/configs/rec/
!cat /kaggle/working/rec_mv3_none_bilstm_ctc.yml

#!/bin/bash
python3 /kaggle/working/PaddleOCR/tools/train.py -c /kaggle/working/det_mv3_db.yml
python3 /kaggle/working/PaddleOCR/tools/train.py -c /kaggle/working/rec_mv3_none_bilstm_ctc.yml


In [99]:
#modify recognition yml
import yaml

# Path to the YAML file
yaml_path = "/kaggle/working/rec_mv3_none_bilstm_ctc.yml"

# Load the existing YAML file
with open(yaml_path, 'r') as file:
    config = yaml.safe_load(file)

# Update only the necessary fields
# Global settings
config['Global']['epoch_num'] = 100  # Set epochs to 100
config['Global']['pretrained_model'] = "./pretrain_models/MobileNetV3_large_x0_5_pretrained"
config['Global']['character_dict_path'] = "./ppocr/utils/dict/en_dict.txt"
config['Global']['save_model_dir'] = "./output/rec/mv3_none_bilstm_ctc/"
config['Global']['eval_batch_step'] = [0, 2000]  # Evaluate every 2000 iterations
config['Global']['cal_metric_during_train'] = True

# Train dataset settings
config['Train']['dataset']['data_dir'] = "/kaggle/working/danny-chess-2/icdar2015/ch4_training_images"
config['Train']['loader']['batch_size_per_card'] = 64  # Adjust batch size

# Eval dataset settings
config['Eval']['dataset']['data_dir'] = "/kaggle/working/danny-chess-2/icdar2015/ch4_test_images"
config['Eval']['loader']['batch_size_per_card'] = 32  # Adjust batch size for evaluation

# Save the modified YAML file
with open(yaml_path, 'w') as file:
    yaml.dump(config, file, default_flow_style=False)

print(f"Updated {yaml_path} with necessary changes.")


Updated /kaggle/working/rec_mv3_none_bilstm_ctc.yml with necessary changes.


In [88]:
!bash /kaggle/working/PaddleOCR/train.sh


[2024/11/24 22:33:17] ppocr INFO: Architecture : 
[2024/11/24 22:33:17] ppocr INFO:     Backbone : 
[2024/11/24 22:33:17] ppocr INFO:         model_name : large
[2024/11/24 22:33:17] ppocr INFO:         name : MobileNetV3
[2024/11/24 22:33:17] ppocr INFO:         scale : 0.5
[2024/11/24 22:33:17] ppocr INFO:     Head : 
[2024/11/24 22:33:17] ppocr INFO:         k : 50
[2024/11/24 22:33:17] ppocr INFO:         name : DBHead
[2024/11/24 22:33:17] ppocr INFO:     Neck : 
[2024/11/24 22:33:17] ppocr INFO:         name : DBFPN
[2024/11/24 22:33:17] ppocr INFO:         out_channels : 256
[2024/11/24 22:33:17] ppocr INFO:     Transform : None
[2024/11/24 22:33:17] ppocr INFO:     algorithm : DB
[2024/11/24 22:33:17] ppocr INFO:     model_type : det
[2024/11/24 22:33:17] ppocr INFO: Eval : 
[2024/11/24 22:33:17] ppocr INFO:     dataset : 
[2024/11/24 22:33:17] ppocr INFO:         data_dir : ./Sample-Data/Test/
[2024/11/24 22:33:17] ppocr INFO:         label_file_list : ['./Sample-Data/Test/tes

In [52]:
!rm -rf /kaggle/working/danny-chess-2/icdar2015


In [None]:
!python3 /kaggle/working/PaddleOCR/tools/train.py \
    -c /kaggle/working/placeholder_rec_train.yml \
    --epoch_num 100 \
    --save_model_dir /kaggle/working/paddleocr_output \
    --device gpu

In [None]:
from paddleocr import PaddleOCR
import torch.nn as nn
from torchmetrics import Accuracy, Precision, Recall, F1Score, AveragePrecision

class ChessSequenceModel(nn.Module):
    def __init__(self, num_classes, max_seq_length=100):
        super(ChessSequenceModel, self).__init__()
        self.max_seq_length = max_seq_length
        # Initialize PaddleOCR
        self.paddleocr = PaddleOCR(use_angle_cls=True, lang="en")
        
    def forward(self, x):
        batch_results = []
        for image in x:  # Process batch of images
            # Convert PyTorch tensor to NumPy array (HWC, uint8)
            image_np = (image.permute(1, 2, 0).numpy() * 255).astype('uint8')
            
            # Use PaddleOCR for text detection and recognition
            results = self.paddleocr.ocr(image_np, cls=True)
            
            # Extract recognized text (sequence of chess moves)
            sequence = [res[1][0] for res in results[0]]  # Extract text from results
            
            batch_results.append(sequence)
        
        return batch_results


**let's save the model if we get restarted**

In [None]:
#save the trained model /kaggle/working/paddleocr_training_output/trained_model_weights
ocr_model.save_weights("/kaggle/working/paddleocr_training_output/trained_model_weights")
print("Trained model weights saved!")


reload the trained model

In [None]:
#load saved mode from /kaggle/working/paddleocr_training_output/trained_model_weights. Must be saved first
from paddleocr import PaddleOCRModel

# Paths to the trained weights and the default pre-trained weights
trained_model_dir = "/kaggle/working/paddleocr_training_output/trained_model_weights"
stock_rec_model_dir = "/kaggle/working/en_PP-OCRv3_rec_infer"
stock_det_model_dir = "/kaggle/working/en_PP-OCRv3_det_infer"

# Reload the trained model
ocr_model_trained = PaddleOCRModel(
    rec_model_dir=trained_model_dir,
    det_model_dir=trained_model_dir,  # Adjust if separate trained detection weights are saved
    use_gpu=paddle.device.is_compiled_with_cuda()
)

# Initialize the stock PaddleOCR model
ocr_model_stock = PaddleOCRModel(
    rec_model_dir=stock_rec_model_dir,
    det_model_dir=stock_det_model_dir,
    use_gpu=paddle.device.is_compiled_with_cuda()
)

print("Trained and stock PaddleOCR models loaded successfully!")


In [None]:
def __getitem__(self, idx):
    image_path, moves = self.image_labels[idx]
    image = Image.open(image_path).convert("RGB")
    image = np.array(image)
    
    # Resize for PaddleOCR (default sizes work well)
    if image.ndim == 2:
        image = np.expand_dims(image, axis=-1)
        image = np.repeat(image, 3, axis=-1)

    # Convert image to tensor
    image = torch.from_numpy(image.transpose((2, 0, 1))).float() / 255.0

    # Labels as tensor for evaluation
    labels = convert_labels_to_tensor(moves, self.move_to_idx)
    return image, labels


In [None]:
for batch_idx, (images, labels, lengths, max_len) in enumerate(dataloader):
    images = images.to(device)
    labels = labels.to(device)
    
    # Forward pass through PaddleOCR-based model
    predictions = model(images)
    
    # Convert predictions to indices
    pred_indices = [
        [move_to_idx.get(move.lower(), padding_idx) for move in sequence]
        for sequence in predictions
    ]
    
    # Pad predictions to match labels
    pred_indices = pad_sequence(
        [torch.tensor(seq) for seq in pred_indices], batch_first=True, padding_value=padding_idx
    ).to(device)
    
    # Calculate loss
    labels_reshaped = labels.view(-1)
    valid_mask = labels_reshaped != padding_idx
    loss = criterion(pred_indices.view(-1, len(move_to_idx))[valid_mask], labels_reshaped[valid_mask])
    
    loss.backward()
    optimizer.step()


In [None]:
# Initialize the model
model = ChessSequenceModel(num_classes=100, max_seq_length=10)

In [None]:
import os
from PIL import Image
import torch
from torch.utils.data import Dataset
from paddleocr import PaddleOCR

class ChessNotationDataset(Dataset):
    def __init__(self, image_folder, label_file, transform=None):
        """
        Dataset class for loading chessboard images and corresponding moves.

        Args:
            image_folder (str): Path to the folder containing images.
            label_file (str): Path to the label file mapping images to moves.
            transform (callable, optional): Transformation to apply to images.
        """
        self.image_folder = image_folder
        self.transform = transform
        self.image_labels = self._parse_labels(label_file)

    def _parse_labels(self, label_file):
        """
        Parse the label file to create a mapping of images to moves.

        Args:
            label_file (str): Path to the label file.

        Returns:
            list of tuples: Each tuple contains the image path and the associated moves.
        """
        image_labels = []
        with open(label_file, 'r') as f:
            for line in f:
                parts = line.strip().split()
                image_name = parts[0]
                moves = parts[1:]
                image_path = os.path.join(self.image_folder, image_name)
                if os.path.exists(image_path):
                    image_labels.append((image_path, moves))
        return image_labels

    def __len__(self):
        return len(self.image_labels)

    def __getitem__(self, idx):
        image_path, moves = self.image_labels[idx]
        image = Image.open(image_path).convert("RGB")
        
        # Apply transformations if provided
        if self.transform:
            image = self.transform(image)
        else:
            # Default: Convert image to PyTorch tensor (HWC -> CHW)
            image = torch.from_numpy(np.array(image).transpose((2, 0, 1))).float() / 255.0

        return image, moves


In [None]:
from torch.utils.data import DataLoader
from torchvision import transforms

# Define paths
image_folder = "/kaggle/input/chess-dataset-notation/data"
label_file = "/kaggle/input/chess-dataset-notation/labels.txt"  # Replace with actual label file path

# Define dataset and transformations
transform = transforms.Compose([
    transforms.Resize((299, 299)),  # Resize to 299x299 for PaddleOCR compatibility
    transforms.ToTensor(),          # Convert image to tensor
])

dataset = ChessNotationDataset(image_folder=image_folder, label_file=label_file, transform=transform)

# Create DataLoader
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)


In [None]:
!ls -la /kaggle/input/chess-dataset-notation/data

Function to split png file into rows

In [None]:
import cv2
import os
import numpy as np

def split_by_horizontal_lines_sequential(image_path, output_folder, row_prefix, line_offset=2, min_row_height=10):
    """
    Split an image into rows based on horizontal lines, excluding lines themselves, with sequential numbering.

    Args:
        image_path (str): Path to the PNG file.
        output_folder (str): Folder to save cropped rows.
        row_prefix (str): Prefix for naming cropped regions.
        line_offset (int): Number of pixels to offset the crop region to exclude lines.
        min_row_height (int): Minimum height in pixels for a row to be considered valid.

    Returns:
        list of str: Filenames of cropped rows.
    """
    # Load image
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Edge detection to highlight lines
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    # Detect lines using Hough Transform
    lines = cv2.HoughLinesP(edges, rho=1, theta=np.pi / 180, threshold=100,
                            minLineLength=100, maxLineGap=10)

    # Extract horizontal lines (angle ~ 0 degrees)
    horizontal_lines = []
    for line in lines:
        x1, y1, x2, y2 = line[0]
        if abs(y1 - y2) < 5:  # Check if the line is horizontal
            horizontal_lines.append((x1, y1, x2, y2))

    # Sort horizontal lines by their y-coordinates
    horizontal_lines = sorted(horizontal_lines, key=lambda line: line[1])

    # Split the image into rows using horizontal lines
    row_files = []
    row_counter = 1  # Sequential counter for rows
    for i in range(len(horizontal_lines) - 1):
        _, y1, _, _ = horizontal_lines[i]
        _, y2, _, _ = horizontal_lines[i + 1]

        # Exclude lines by adding an offset
        y1 = y1 + line_offset  # Start just below the current line
        y2 = y2 - line_offset  # End just above the next line

        # Validate y-coordinates
        y1 = max(0, y1)
        y2 = min(gray.shape[0], y2)
        if y1 >= y2 or (y2 - y1) < min_row_height:  # Check minimum row height
            print(f"Skipping invalid or small row: y1={y1}, y2={y2}, height={y2 - y1}")
            continue

        # Crop the region
        cropped_row = image[y1:y2, :]
        if cropped_row.size == 0:
            print(f"Empty cropped row for y1={y1}, y2={y2}, skipping...")
            continue

        # Save the cropped row with sequential numbering
        row_filename = f"{row_prefix}_row_{row_counter}.png"
        cv2.imwrite(os.path.join(output_folder, row_filename), cropped_row)
        row_files.append(row_filename)
        row_counter += 1  # Increment the row counter

    # Save a debug image with horizontal lines drawn
    debug_image = image.copy()
    for x1, y1, x2, y2 in horizontal_lines:
        cv2.line(debug_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    debug_filename = os.path.join(output_folder, f"{row_prefix}_debug.png")
    cv2.imwrite(debug_filename, debug_image)

    return row_files


define a horizontal split function with one file

In [None]:
# Example Usage
image_path = "/kaggle/input/chess-dataset-notation/data/153_0.png"
output_folder = "/kaggle/working/cropped_rows"
os.makedirs(output_folder, exist_ok=True)

row_prefix = "153_0"  # Matches the prefix in training_tags.txt
board_files = split_by_horizontal_lines_sequential(
    image_path=image_path,
    output_folder=output_folder,
    row_prefix=row_prefix
    
#    min_width=250,         # 1.5 inches at 96 DPI
#    min_height=50,         # 0.25 inches at 96 DPI
#    max_region_ratio=0.3,  # Exclude regions larger than 90% of the image
#    padding=5             # Extra padding
)

print("Extracted chessboards:", board_files)

In [None]:
!rm -r /kaggle/working/cropped_rows

function for vertical line split

In [None]:
import cv2
import os
import numpy as np

def detect_and_split_columns_debug(row_image_path, output_folder, column_prefix, line_offset=2, min_column_width=10):
    """
    Detect vertical lines in a row and split it into columns, with debugging output.

    Args:
        row_image_path (str): Path to the row image.
        output_folder (str): Folder to save debug and cropped columns.
        column_prefix (str): Prefix for naming cropped columns.
        line_offset (int): Number of pixels to offset the crop region to exclude lines.
        min_column_width (int): Minimum width in pixels for a valid column.

    Returns:
        list of str: Filenames of the cropped columns.
    """
    # Load row image
    image = cv2.imread(row_image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Step 1: Edge Detection
    edges = cv2.Canny(gray, 20, 80, apertureSize=3)
    edge_debug_path = os.path.join(output_folder, "edges_debug.png")
    cv2.imwrite(edge_debug_path, edges)
    print(f"Saved edge detection debug image: {edge_debug_path}")

    # Step 2: Detect Lines
    lines = cv2.HoughLinesP(edges, rho=1, theta=np.pi / 180, threshold=30,
                            minLineLength=50, maxLineGap=5)

    # Debugging: Draw detected lines
    debug_image = image.copy()
    vertical_lines = []
    if lines is not None:
        for line in lines:
            x1, y1, x2, y2 = line[0]
            if abs(x1 - x2) < 10:  # Vertical lines have small x-differences
                vertical_lines.append((x1, y1, x2, y2))
                cv2.line(debug_image, (x1, y1), (x2, y2), (0, 255, 0), 2)  # Draw line
    else:
        print("No vertical lines detected.")

    debug_path = os.path.join(output_folder, "lines_debug.png")
    cv2.imwrite(debug_path, debug_image)
    print(f"Saved line detection debug image: {debug_path}")

    # Step 3: Sort and Validate Lines
    vertical_lines = sorted(vertical_lines, key=lambda line: line[0])  # Sort by x-coordinate

    # Step 4: Split Columns
    column_files = []
    for i in range(len(vertical_lines) - 1):
        x1, _, x2, _ = vertical_lines[i]
        x3, _, x4, _ = vertical_lines[i + 1]

        # Adjust for line offset
        x1 += line_offset
        x2 = x3 - line_offset

        # Ensure valid column width
        if x1 >= x2 or (x2 - x1) < min_column_width:
            print(f"Skipping invalid or small column: x1={x1}, x2={x2}, width={x2 - x1}")
            continue

        # Crop and save column
        cropped_column = image[:, x1:x2]
        column_filename = os.path.join(output_folder, f"{column_prefix}_col_{i+1}.png")
        cv2.imwrite(column_filename, cropped_column)
        column_files.append(column_filename)

    return column_files


In [None]:
# Example Usage
row_image_path = "/kaggle/working/cropped_row_temp/153_0_row_30.png"
output_folder = "/kaggle/working/cropped_cells"
os.makedirs(output_folder, exist_ok=True)

column_prefix = "column"  # Matches the prefix in training_tags.txt
board_files = detect_and_split_columns_debug(
    row_image_path=row_image_path,
    output_folder=output_folder,
    column_prefix=column_prefix
    
#    min_width=250,         # 1.5 inches at 96 DPI
#    min_height=50,         # 0.25 inches at 96 DPI
#    max_region_ratio=0.3,  # Exclude regions larger than 90% of the image
#    padding=5             # Extra padding
)

print("Extracted chessboards:", board_files)

In [None]:
!rm -r /kaggle/working/cropped_cells

In [None]:
output_folder = "/kaggle/working/cropped_row_temp"
os.makedirs(output_folder, exist_ok=True)
!cp /kaggle/working/cropped_rows/153_0_row_30.png $output_folder/153_0_row_30.png

Below function just prints the DPI of images

In [None]:
from PIL import Image

def print_image_info(image_path):
    """
    Print resolution and DPI of a PNG image.

    Args:
        image_path (str): Path to the PNG file.
    """
    with Image.open(image_path) as img:
        # Get resolution
        width, height = img.size
        print(f"Resolution: {width} x {height} pixels")

        # Get DPI (if available)
        dpi = img.info.get('dpi', None)
        if dpi:
            print(f"DPI: {dpi[0]} x {dpi[1]} (horizontal x vertical)")
        else:
            print("DPI information not available in this image.")

# Example Usage
image_path = "/kaggle/input/chess-dataset-notation/data/001_0.png"
print_image_info(image_path)


In [None]:
from paddleocr import PaddleOCR, draw_ocr
import matplotlib.pyplot as plt
from PIL import Image

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')  # Initialize with English language

# Path to the PNG file
image_path = '/kaggle/input/chess-dataset-notation/data/012_0.png'

# Perform OCR on the image
result = ocr.ocr(image_path, cls=True)

# Display OCR Results
for line in result[0]:
    print(f"Detected Text: {line[1][0]}, Confidence: {line[1][1]}")

# Visualize OCR Results on the Image
image = Image.open(image_path).convert('RGB')
boxes = [line[0] for line in result[0]]
txts = [line[1][0] for line in result[0]]
scores = [line[1][1] for line in result[0]]

# Draw the OCR results on the image
annotated_image = draw_ocr(image, boxes, txts, scores, font_path='/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf')  # Add path to TTF font if necessary
plt.imshow(annotated_image)
plt.axis('off')
plt.show()


In [None]:
!pip install paddlepaddle
!pip install paddleocr
from paddleocr import PaddleOCR, draw_ocr
import matplotlib.pyplot as plt
from PIL import Image, ImageOps, ImageFont
import numpy as np
import os

# Path to your valid readings list
valid_readings_path = '/kaggle/input/aarrrrr/present.txt'


# Initialize PaddleOCR with the custom dictionary
ocr = PaddleOCR(
    use_angle_cls=True,
    lang='en'
)

# Path to the PNG file
image_path = '/kaggle/input/chess-dataset-notation/data/001_0.png'

# Perform OCR on the image
result = ocr.ocr(image_path, cls=True)

# Sort OCR results by reading order
def sort_by_reading_order(results):
    # Sort by vertical position first (top-to-bottom)
    results = sorted(results, key=lambda x: x[0][0][1])  # x[0][0][1] is the y-coordinate of the top-left corner
    # Further sort by horizontal position (left-to-right) within the same row
    sorted_results = []
    row_threshold = 10  # Threshold to group lines based on y-coordinates
    current_row = [results[0]]
    
    for i in range(1, len(results)):
        if abs(results[i][0][0][1] - current_row[-1][0][0][1]) < row_threshold:
            current_row.append(results[i])
        else:
            sorted_results.extend(sorted(current_row, key=lambda x: x[0][0][0]))  # Sort by x-coordinate
            current_row = [results[i]]
    
    sorted_results.extend(sorted(current_row, key=lambda x: x[0][0][0]))
    return sorted_results

sorted_results = sort_by_reading_order(result[0])

# Display OCR Results
print("Sorted OCR Results:")
for line in sorted_results:
    print(f"Detected Text: {line[1][0]}, Confidence: {line[1][1]}")

# Visualize OCR Results on the Image
image = Image.open(image_path).convert('RGB')
boxes = [line[0] for line in sorted_results]
txts = [line[1][0] for line in sorted_results]
scores = [line[1][1] for line in sorted_results]

# Use a valid font path
font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"

# Draw the OCR results on the image
annotated_image = draw_ocr(image, boxes)#, txts, scores, font_path=font_path)

# Convert NumPy array to PIL Image
annotated_image_pil = Image.fromarray(np.uint8(annotated_image))

# Save the annotated image
output_path = '/kaggle/working/annotated_image_with_dict.png'
annotated_image_pil.save(output_path)
print(f"Annotated image saved at {output_path}")

# Display the saved image
plt.imshow(annotated_image_pil)
plt.axis('off')
plt.show()


In [None]:
plt.imshow(annotated_image)
output_path = '/kaggle/working/annotated_image.png'  # Adjust path if needed
annotated_image_pil = Image.fromarray(np.uint8(annotated_image))
annotated_image_pil.save(output_path)

In [None]:
!ls  "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"

In [None]:
import os

# Create a directory to store the models
os.makedirs('inference', exist_ok=True)

# Download models
!wget -P inference https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tar
!wget -P inference https://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tar
!wget -P inference https://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tar

# Extract the models
!tar xf inference/ch_PP-OCRv3_det_infer.tar -C inference
!tar xf inference/ch_PP-OCRv3_rec_infer.tar -C inference
!tar xf inference/ch_ppstructure_mobile_v2.0_SLANet_infer.tar -C inference


In [None]:
from paddleocr import PPStructure, draw_structure_result, save_structure_res
from PIL import Image
import cv2
import os
import matplotlib.pyplot as plt
import numpy as np 

# Initialize the table recognition pipeline
table_engine = PPStructure(table=True)

# Path to your image
image_path = '/kaggle/input/chess-dataset-notation/data/012_0.png'

# Run table recognition
results = table_engine(image_path)

# Save and visualize the results
save_folder = '/kaggle/working/'
save_structure_res(results, save_folder, os.path.basename(image_path).split('.')[0])

# Load and display the annotated image
img = Image.open(image_path).convert('RGB')
draw_img = draw_structure_result(img, results, font_path="/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf")

# Convert to BGR for display with OpenCV
cv2_draw_img = cv2.cvtColor(np.array(draw_img), cv2.COLOR_RGB2BGR)
plt.figure(figsize=(12, 12))
plt.imshow(cv2_draw_img)
plt.axis('off')
plt.show()

# Save the annotated image
annotated_image_path = '/kaggle/working/annotated_image.png'
annotated_image_pil = Image.fromarray(np.uint8(draw_img))
annotated_image_pil.save(annotated_image_path)
print(f"Annotated table image saved at: {annotated_image_path}")


In [None]:
!pip install premailer

In [None]:
def load_labels(label_file):
    image_labels = {}
    with open(label_file, 'r') as file:
        for line in file:
            parts = line.strip().split()
            image_name = parts[0]
            label = parts[1]
            base_name = "_".join(image_name.split('_')[:2])
            if base_name not in image_labels:
                image_labels[base_name] = []
            image_labels[base_name].append(label)
    return image_labels

In [None]:

image_labels_dict = load_labels('/kaggle/input/chess-dataset-notation/data/training_tags.txt')


In [None]:
   def _parse_labels(self, label_file):
        image_labels = []
        image_labels_dict = load_labels(label_file)
   import os
        for base_name, moves in image_labels_dict.items():
            base_name = base_name.split('_')[0]
            image_path_0 = os.path.join(self.image_folder, f"{base_name}_0.png")
            image_path_1 = os.path.join(self.image_folder, f"{base_name}_1.png")
            if os.path.exists(image_path_0):
                image_labels.append((image_path_0, moves))
            elif os.path.exists(image_path_1):
                image_labels.append((image_path_1, moves))
        return image_labels

define dataset class

In [None]:
import os
from torchvision import models
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torch
import torch.nn as nn
from torch.nn.utils.rnn import pad_sequence
import numpy as np
import cv2
from torchmetrics import Accuracy, Precision, Recall, F1Score, AveragePrecision
from sklearn.metrics import average_precision_score
import torch.optim as optim

class ChessNotationDataset(Dataset):
    def __init__(self, image_folder, label_file, move_to_idx, transform=None):
        self.image_folder = image_folder
        self.move_to_idx = move_to_idx
        self.transform = transform
        self.image_labels = self._parse_labels(label_file)

    def _parse_labels(self, label_file):
        image_labels = []
        image_labels_dict = load_labels(label_file)
        for base_name, moves in image_labels_dict.items():
            base_name = base_name.split('_')[0]
            image_path_0 = os.path.join(self.image_folder, f"{base_name}_0.png")
            image_path_1 = os.path.join(self.image_folder, f"{base_name}_1.png")
            if os.path.exists(image_path_0):
                image_labels.append((image_path_0, moves))
            elif os.path.exists(image_path_1):
                image_labels.append((image_path_1, moves))
        return image_labels

    def __len__(self):
        return len(self.image_labels)

    def __getitem__(self, idx):
        image_path, moves = self.image_labels[idx]
        image = Image.open(image_path).convert("RGB")
        image = np.array(image)
        
        if image.ndim == 2:
            image = np.expand_dims(image, axis=-1)
            image = np.repeat(image, 3, axis=-1)
        
        if image.dtype != np.uint8:
            image = image.astype(np.uint8)

        # Resize image to 299x299 (for InceptionV3)
        image = cv2.resize(image, (299, 299))

        # Convert image to tensor
        image = torch.from_numpy(image.transpose((2, 0, 1))).float() / 255.0

        # Convert the move labels to a tensor
        labels = convert_labels_to_tensor(moves, self.move_to_idx)
        return image, labels
        

functions

In [None]:
def create_move_to_idx(label_file):
    move_to_idx = {}
    idx = 0
    with open(label_file, 'r') as file:
        for line in file:
            move = line.strip()
            move_key = move.lower()
            if move_key not in move_to_idx:
                move_to_idx[move_key] = idx
                idx += 1
    return move_to_idx

# Convert labels to tensor
def convert_labels_to_tensor(moves, move_to_idx):
    return torch.tensor([move_to_idx[move.lower()] for move in moves], dtype=torch.long)

def load_labels(label_file):
    image_labels = {}
    with open(label_file, 'r') as file:
        for line in file:
            parts = line.strip().split()
            image_name = parts[0]
            label = parts[1]
            base_name = "_".join(image_name.split('_')[:2])
            if base_name not in image_labels:
                image_labels[base_name] = []
            image_labels[base_name].append(label)
    return image_labels
    
def custom_collate_fn(batch):
    images = torch.stack([item[0] for item in batch])
    
    labels = [item[1] for item in batch]
    lengths = torch.tensor([len(label) for label in labels])
    max_len = max(lengths)
    
    padded_labels = []
    for label in labels:
        padding = torch.full((max_len - len(label),), padding_idx, dtype=torch.long)
        padded_label = torch.cat([label, padding])
        padded_labels.append(padded_label)
    
    labels = torch.stack(padded_labels)
    
    return images, labels, lengths, max_len

In [None]:
# Initialize dataset and model
padding_idx = -1
image_folder = "/kaggle/input/chess-dataset-notation/data"
label_file = "/kaggle/input/training-tags-file/training_tags.txt"
move_to_idx = create_move_to_idx("/kaggle/input/ripchessgm/san_strings_with_symbols.txt")
dataset = ChessNotationDataset(
    image_folder=image_folder,
    label_file=label_file,
    move_to_idx=move_to_idx,
    transform=None
)

In [None]:
print("dataset count:",len(move_to_idx))

In [None]:
!pip install paddlepaddle
!pip install paddleocr

import paddle
import paddle.nn as nn
from paddleocr.ppocr.modeling.backbones import MobileNetV3
from paddleocr.ppocr.modeling.necks import SequenceEncoder

class ChessSequenceModel(nn.Layer):
    def __init__(self, num_classes, max_seq_length=100, hidden_size=512, num_layers=1):
        super(ChessSequenceModel, self).__init__()
        self.max_seq_length = max_seq_length

        # Use PaddleOCR's MobileNetV3 backbone for feature extraction
        self.backbone = MobileNetV3(scale=0.5, model_name='small')
        backbone_output_size = 576  # Output feature size of MobileNetV3-small

        # Add LSTM layer to handle sequences
        self.lstm = nn.LSTM(
            input_size=backbone_output_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            time_major=False  # Set this to True if you pass sequences in (time, batch, feature) format
        )

        # Final output layer
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        # Pass input through the backbone (feature extractor)
        features = self.backbone(x)
        
        # Flatten the features for sequence modeling
        batch_size, channels, height, width = features.shape
        features = features.reshape([batch_size, width, -1])  # Convert to (batch, sequence, feature)

        # Pass features through the LSTM
        lstm_out, _ = self.lstm(features)

        # Use the final LSTM output for classification
        output = self.fc(lstm_out[:, -1, :])  # Use the last output of LSTM

        return output


# !conda create -n clean_env python=3.10 scikit-learn=1.2.2



In [None]:
!conda create -n clean_env python=3.10 scikit-learn=1.2.2

In [None]:
from PIL import Image
import pytesseract
import os

# Specify the path to the image
img_path = '/kaggle/input/aarrrrr/IMG-0886.jpg'

# Open the image using Pillow
img = Image.open(img_path)

# Perform OCR
text = pytesseract.image_to_string(img)

# Print the extracted text
print("Extracted Text:\n", text)

try to remove horizontal lines

In [None]:
import cv2
import os
import numpy as np

def split_by_horizontal_lines_sequential(image_path, output_folder, row_prefix, line_offset=2, min_row_height=10):
    """
    Split an image into rows based on horizontal lines, excluding lines themselves, with sequential numbering.

    Args:
        image_path (str): Path to the PNG file.
        output_folder (str): Folder to save cropped rows.
        row_prefix (str): Prefix for naming cropped regions.
        line_offset (int): Number of pixels to offset the crop region to exclude lines.
        min_row_height (int): Minimum height in pixels for a row to be considered valid.

    Returns:
        list of str: Filenames of cropped rows.
    """
    # Load image
    image = cv2.imread(image_path)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # Edge detection to highlight lines
    edges = cv2.Canny(gray, 50, 150, apertureSize=3)

    # Detect lines using Hough Transform
    lines = cv2.HoughLinesP(edges, rho=1, theta=np.pi / 180, threshold=100,
                            minLineLength=100, maxLineGap=10)

    # Extract horizontal lines (angle ~ 0 degrees)
    horizontal_lines = []
    for line in lines:
        x1, y1, x2, y2 = line[0]
        if abs(y1 - y2) < 5:  # Check if the line is horizontal
            horizontal_lines.append((x1, y1, x2, y2))

    # Create a mask for inpainting
    mask = np.zeros_like(gray)
    for x1, y1, x2, y2 in horizontal_lines:
        cv2.line(mask, (x1, y1), (x2, y2), 255, thickness=2)

    # Inpaint the image
    inpainted_image = cv2.inpaint(image, mask, inpaintRadius=3, flags=cv2.INPAINT_TELEA)

    # Sort horizontal lines by their y-coordinates
    horizontal_lines = sorted(horizontal_lines, key=lambda line: line[1])

    # Split the image into rows using horizontal lines
    row_files = []
    row_counter = 1  # Sequential counter for rows
    for i in range(len(horizontal_lines) - 1):
        _, y1, _, _ = horizontal_lines[i]
        _, y2, _, _ = horizontal_lines[i + 1]

        # Exclude lines by adding an offset
        y1 = y1 + line_offset  # Start just below the current line
        y2 = y2 - line_offset  # End just above the next line

        # Validate y-coordinates
        y1 = max(0, y1)
        y2 = min(gray.shape[0], y2)
        if y1 >= y2 or (y2 - y1) < min_row_height:  # Check minimum row height
            print(f"Skipping invalid or small row: y1={y1}, y2={y2}, height={y2 - y1}")
            continue

        # Crop the region
        cropped_row = inpainted_image[y1:y2, :]
        if cropped_row.size == 0:
            print(f"Empty cropped row for y1={y1}, y2={y2}, skipping...")
            continue

        # Save the cropped row with sequential numbering
        row_filename = f"{row_prefix}_row_{row_counter}.png"
        cv2.imwrite(os.path.join(output_folder, row_filename), cropped_row)
        row_files.append(row_filename)
        row_counter += 1  # Increment the row counter

    # Save a debug image with horizontal lines drawn
    debug_image = image.copy()
    for x1, y1, x2, y2 in horizontal_lines:
        cv2.line(debug_image, (x1, y1), (x2, y2), (0, 255, 0), 2)
    debug_filename = os.path.join(output_folder, f"{row_prefix}_debug.png")
    cv2.imwrite(debug_filename, debug_image)

    return row_files


In [None]:
# Example Usage
image_path = "/kaggle/input/chess-dataset-notation/data/153_0.png"
output_folder = "/kaggle/working/cropped_rows"
os.makedirs(output_folder, exist_ok=True)

row_prefix = "153_0"  # Matches the prefix in training_tags.txt
board_files = split_by_horizontal_lines_sequential(
    image_path=image_path,
    output_folder=output_folder,
    row_prefix=row_prefix
    
#    min_width=250,         # 1.5 inches at 96 DPI
#    min_height=50,         # 0.25 inches at 96 DPI
#    max_region_ratio=0.3,  # Exclude regions larger than 90% of the image
#    padding=5             # Extra padding
)

print("Extracted chessboards:", board_files)

In [None]:
from paddleocr import PaddleOCR, draw_ocr
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import json

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang='en')

# Path to the PNG file
image_path = '/kaggle/input/chess-dataset-notation/data/001_0.png'

# Perform OCR on the image
result = ocr.ocr(image_path, cls=True)

# Sort OCR results by reading order
def sort_by_reading_order(results):
    # Sort by vertical position first (top-to-bottom)
    results = sorted(results, key=lambda x: x[0][0][1])  # x[0][0][1] is the y-coordinate of the top-left corner
    # Further sort by horizontal position (left-to-right) within the same row
    sorted_results = []
    row_threshold = 10  # Threshold to group lines based on y-coordinates
    current_row = [results[0]]
    
    for i in range(1, len(results)):
        if abs(results[i][0][0][1] - current_row[-1][0][0][1]) < row_threshold:
            current_row.append(results[i])
        else:
            sorted_results.extend(sorted(current_row, key=lambda x: x[0][0][0]))  # Sort by x-coordinate
            current_row = [results[i]]
    
    sorted_results.extend(sorted(current_row, key=lambda x: x[0][0][0]))
    return sorted_results

sorted_results = sort_by_reading_order(result[0])

# Display OCR Results
print("Sorted OCR Results:")
for idx, line in enumerate(sorted_results, start=1):
    print(f"Box {idx}: Detected Text: {line[1][0]}, Confidence: {line[1][1]}")

# Function to check if a box should be skipped based on X-coordinates
def is_in_excluded_x_range(box, column_threshold=50, exclusion_range=(730, 790)):
    x_coords = [point[0] for point in box]
    min_x, max_x = min(x_coords), max(x_coords)
    # Skip if box is in the first column or within the excluded range
    return min_x <= column_threshold or (min_x >= exclusion_range[0] and max_x <= exclusion_range[1])

# Prepare training labels and filter boxes
training_labels = []
filtered_boxes = []
filtered_texts = []
for idx, line in enumerate(sorted_results, start=1):
    box = line[0]
    text = line[1][0]
    if not is_in_excluded_x_range(box):  # Skip boxes in excluded X-coordinate ranges
        training_labels.append({
            "coordinates": box,
            "text": text,
            "box_id": idx
        })
        filtered_boxes.append((box, idx))  # Keep box and index for drawing
        filtered_texts.append(text)  # Keep text for mapping

# Save training labels as a JSON file
output_label_path = '/kaggle/working/training_labels.json'
with open(output_label_path, 'w') as f:
    json.dump(training_labels, f, indent=4)
print(f"Training labels saved at {output_label_path}")

# Draw filtered OCR results on the image with numbered boxes
image = Image.open(image_path).convert('RGB')
draw = ImageDraw.Draw(image)

# Use a valid font path for numbering
font_path = "/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf"
font = ImageFont.truetype(font_path, size=20)

for box, idx in filtered_boxes:
    draw.polygon([tuple(point) for point in box], outline="red", width=2)  # Draw box
    # Draw the box number at the top-left corner of the box
    draw.text((box[0][0], box[0][1] - 20), str(idx), fill="blue", font=font)

# Save the annotated image with numbered boxes
output_image_path = '/kaggle/working/annotated_image_filtered.png'
image.save(output_image_path)
print(f"Annotated image saved at {output_image_path}")

# Display the saved image
plt.imshow(image)
plt.axis('off')
plt.show()
