In [1]:
import cv2
import os
import pandas as pd
import numpy as np
import keras, tensorflow as tf
import matplotlib.pyplot as plt
import shutil

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, LSTM
from keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from sklearn.model_selection import train_test_split


In [2]:
def extract(img, smaller):
        
    # Convert the image to gray scale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)

    if(smaller):
        rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))

        # Applying dilation on the threshold image
        dilation = cv2.morphologyEx(thresh1, cv2.MORPH_OPEN, rect_kernel, iterations = 1)
        # dilation = cv2.dilate(thresh1, rect_kernel, iterations = 1)
    else:
        rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))

        # Applying dilation on the threshold image
        dilation = cv2.dilate(thresh1, rect_kernel, iterations = 2)

    # Finding contours
    contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, 
                                                    cv2.CHAIN_APPROX_NONE)

    return contours

In [28]:
def boundingbox(img_path):
    img = cv2.imread(img_path)
    contours = extract(img, False)

    sum = 0
    y_pred = []
    boxes = []
    im = img.copy()
    crops = []

    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        
        # rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (255, 0, 0), 2)
            
        # Cropping the text block
        cropped = img[y:y + h, x:x + w]

        # extract each alphabet from each block
        contours2 = extract(cropped, True)
        
        for i in contours2:
            # count alphabets 
            sum += 1

            x2, y2, w2, h2 = cv2.boundingRect(i)
            
            cropped2 = img[y+y2: y+y2+h2, x+x2: x+x2+w2]
            
            center_x = (x + x2 + w2 / 2) / img.shape[1]
            center_y = (y + y2 + h2 / 2) / img.shape[0]
            normalized_width = w2 / img.shape[1]
            normalized_height = h2 / img.shape[0]

            # Append the YOLO format coordinates to the boxes list
            boxes.append([center_x, center_y, normalized_width, normalized_height])


            # boxes.append([x+x2, y+y2, x+x2 + w2, y+y2 + h2]) 

            cropped2 = cv2.resize(cropped2, (64,64))
            
            crops.append(cropped2)

            cropped2 = cropped2.reshape((1, 64, 64, 3))

    
    
    order = np.argsort([x[0] for x in boxes])
    boxes = [boxes[x] for x in order]
    crops = [crops[x] for x in order]
    
    return sum, boxes, im, crops
    

In [8]:
# each classes(62 classes) has 55 images 
model_training_csv = os.path.join(os.getcwd(), 'alphabets/english.csv')

# make dataframe for training model 
model_training_df = pd.read_csv(model_training_csv)

class_names = model_training_df.label.unique()

Y = [label for label in range(len(class_names))]

# one-hot-encoded the label
y_train = keras.utils.to_categorical(class_names, len(class_names))


In [26]:

# ระบุโพลเดอร์ที่เก็บรูปภาพที่เตรียมไว้
source_folder_images = os.path.join(os.getcwd(), "archive/train_v2/train")

# ระบุโพลเดอร์ที่เก็บรูปภาพใน dataset YOLO
destination_folder_images = os.path.join(os.getcwd(), "data/images")


destination_folder_labels = os.path.join(os.getcwd(), "data/labels")

# destination_folder_labels = os.path.join(os.getcwd(), "dataset_yolo", "labels")


In [83]:
train_img_path = os.path.join(os.getcwd(), "archive/train_v2/train")
test_img_path = os.path.join(os.getcwd(), "archive/test_v2/test")

train_csv_path = os.path.join(os.getcwd(), "archive/written_name_train_v2.csv")
test_csv_path = os.path.join(os.getcwd(), "archive/written_name_test_v2.csv")



df = pd.read_csv(train_csv_path)
cnt = 0
im = 0
boxes = []

# for i in range (df.shape[0]):
for i in range (10000):
    img_name = os.path.join(train_img_path, df['FILENAME'][i])
    num_of_char, boxes, img, crop = boundingbox(img_name)
    # num_of_char, y_pred, boxes, img, crop = boundingbox(img_name, model)
    
    if num_of_char == len(str(df['IDENTITY'][i])):
        cnt += 1
        fileName = df['FILENAME'][i]
        shutil.copy(os.path.join(source_folder_images, fileName), os.path.join(destination_folder_images, fileName))

        label = ''
        for j in range(len(df['IDENTITY'][i])):
            if df['IDENTITY'][i][j] == ' ' or df['IDENTITY'][i][j] == '-':
                continue
            name = np.where(class_names == df['IDENTITY'][i][j])[0][0]
            # print(name[0])
            label += str(name) + ' '
            temp = ' '.join(map(str, boxes[j]))
            label += temp
            label += '\n'
         

        fileName = fileName.split('.')
        new_filename = f"{fileName[0]}.txt"
        label_path = os.path.join(destination_folder_labels, new_filename)

        with open(label_path, "w") as label_file:
            label_file.write(label)


In [53]:
import os
from ultralytics import YOLO

yolo_folder = os.path.join(os.getcwd(), "data")
model = YOLO("yolov8s.pt")  # load the model

# Specify the path to your training YAML file
data_yaml = os.path.join(yolo_folder, "coco128.yaml")

# Train the model
results = model.train(data=data_yaml, epochs=5)


New https://pypi.org/project/ultralytics/8.1.25 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.1.24 🚀 Python-3.11.5 torch-2.2.1 CPU (Apple M1)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/Users/boost/Documents/deep_learning/project/data/coco128.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, emb

[34m[1mtrain: [0mScanning /Users/boost/Documents/deep_learning/project/data/labels... 120 images, 1063 backgrounds, 120 corrupt: 100%|██████████| 1183/1183 [00:00<00:00, 5602.17it/s]

[34m[1mtrain: [0mNew cache created: /Users/boost/Documents/deep_learning/project/data/labels.cache



[34m[1mval: [0mScanning /Users/boost/Documents/deep_learning/project/data/labels.cache... 120 images, 1063 backgrounds, 120 corrupt: 100%|██████████| 1183/1183 [00:00<?, ?it/s]






Plotting labels to runs/detect/train/labels.jpg... 
zero-size array to reduction operation maximum which has no identity
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000152, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5         0G          0      282.2          0          0        640:  57%|█████▋    | 38/67 [11:12<08:33, 17.70s/it]


KeyboardInterrupt: 