# Sudoku recognition & solver

## Adapt dataset

sudoku images & dat files found on kaggle ([source](https://www.kaggle.com/datasets/mexwell/sudoku-image-dataset))

we have dataset but with an arborescence like this :
```sh
├── mixed
│   └── mixed
├── mixed 2
│   └── mixed 2
├── v1_test
│   └── v1_test
├── v1_training
│   └── v1_training
├── v2_test
│   └── v2_test
└── v2_train
    └── v2_train
```

With YOLOv8, we need to have a dataset like this :

([https://www.kaggle.com/datasets/meeratif/yolo-format-data](https://www.kaggle.com/datasets/meeratif/yolo-format-data))

```sh
dataset
├── images
│   ├── train
│   │   ├── sudoku_1.jpg
│   │   └── sudoku_2.jpg
│   └── val
│       ├── sudoku_3.jpg
│       └── sudoku_4.jpg
└── labels
    ├── train
    │   ├── sudoku_1.txt
    │   └── sudoku_2.txt
    └── val
        ├── sudoku_3.txt
        └── sudoku_4.txt
```

on dévellope un script pour adapter les fichiers d'annotations en quelque chose de viable pour YOLOv8 :

```sh
<class_id> <x_center> <y_center> <width> <height>
```

In [1]:
import os

In [2]:
def dat_to_yolo(dat_path, output_txt_path):
    """
    Convertit un fichier .dat de Sudoku en un fichier .txt au format YOLO.
    Args:
        dat_path (str): Chemin vers le fichier .dat.
        output_txt_path (str): Chemin de sortie pour le fichier YOLO .txt.
    """
    with open(dat_path, 'r') as file:
        lines = file.readlines()

    # Extraire les dimensions de l'image
    width, height = 0, 0
    for line in lines:
        if 'JPG' in line:
            resolution = line.split()[0]  # Extrait "640x480:24"
            resolution = resolution.split(':')[0]  # Supprime ":24"
            width, height = map(int, resolution.split('x'))
            break

    # Extraire la grille
    grid = []
    for line in lines:
        # Vérifiez si la ligne contient uniquement des chiffres (grille Sudoku)
        if line.strip() and all(char.isdigit() or char.isspace() for char in line):
            grid.append([int(x) for x in line.split()])

    # Dimensions des cellules dans la grille
    cell_width = 1 / 9  # Chaque cellule est 1/9 de la largeur
    cell_height = 1 / 9  # Chaque cellule est 1/9 de la hauteur

    # Conversion de la grille en annotations YOLO
    with open(output_txt_path, 'w') as output_file:
        for row_idx, row in enumerate(grid):
            for col_idx, value in enumerate(row):
                if value == 0:  # Ignorer les cases vides
                    continue
                x_center = (col_idx + 0.5) * cell_width
                y_center = (row_idx + 0.5) * cell_height
                width = cell_width
                height = cell_height
                class_index = value - 1  # Classes YOLO : 0 pour "1", 1 pour "2", etc.
                output_file.write(f"{class_index} {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")


# Antoinnnneee pas besoin de re run ça jlai déjà fait check le /dataset

In [None]:
train_source = 'archive/v1_training/v1_training/'

files = os.listdir(train_source)
for file in files:
    if file.endswith('.dat'):
        dat_path = os.path.join(train_source, file)
        output_txt_path = os.path.join("dataset/labels/train/", file.replace('.dat', '.txt'))
        dat_to_yolo(dat_path, output_txt_path)
    if file.endswith('.jpg'):
        os.system(f"cp {train_source}{file} dataset/images/train/")
        
test_source = 'archive/v1_test/v1_test/'

files = os.listdir(test_source)
for file in files:
    if file.endswith('.dat'):
        dat_path = os.path.join(test_source, file)
        output_txt_path = os.path.join("dataset/labels/val/", file.replace('.dat', '.txt'))
        dat_to_yolo(dat_path, output_txt_path)
    if file.endswith('.jpg'):
        os.system(f"cp {test_source}{file} dataset/images/val/")