In [1]:
import cv2
import os
import pandas as pd
import numpy as np
import keras, tensorflow as tf
import matplotlib.pyplot as plt
import shutil

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization, LSTM
from keras.preprocessing.image import ImageDataGenerator

from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from sklearn.model_selection import train_test_split


In [2]:
def extract(img, smaller):
        
    # Convert the image to gray scale
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    ret, thresh1 = cv2.threshold(gray, 0, 255, cv2.THRESH_OTSU | cv2.THRESH_BINARY_INV)

    if(smaller):
        rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))

        # Applying dilation on the threshold image
        dilation = cv2.morphologyEx(thresh1, cv2.MORPH_OPEN, rect_kernel, iterations = 1)
        # dilation = cv2.dilate(thresh1, rect_kernel, iterations = 1)
    else:
        rect_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (18, 18))

        # Applying dilation on the threshold image
        dilation = cv2.dilate(thresh1, rect_kernel, iterations = 2)

    # Finding contours
    contours, hierarchy = cv2.findContours(dilation, cv2.RETR_EXTERNAL, 
                                                    cv2.CHAIN_APPROX_NONE)

    return contours

In [28]:
def boundingbox(img_path):
    img = cv2.imread(img_path)
    contours = extract(img, False)

    sum = 0
    y_pred = []
    boxes = []
    im = img.copy()
    crops = []

    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        
        # rect = cv2.rectangle(im2, (x, y), (x + w, y + h), (255, 0, 0), 2)
            
        # Cropping the text block
        cropped = img[y:y + h, x:x + w]

        # extract each alphabet from each block
        contours2 = extract(cropped, True)
        
        for i in contours2:
            # count alphabets 
            sum += 1

            x2, y2, w2, h2 = cv2.boundingRect(i)
            
            cropped2 = img[y+y2: y+y2+h2, x+x2: x+x2+w2]
            
            center_x = (x + x2 + w2 / 2) / img.shape[1]
            center_y = (y + y2 + h2 / 2) / img.shape[0]
            normalized_width = w2 / img.shape[1]
            normalized_height = h2 / img.shape[0]

            # Append the YOLO format coordinates to the boxes list
            boxes.append([center_x, center_y, normalized_width, normalized_height])


            # boxes.append([x+x2, y+y2, x+x2 + w2, y+y2 + h2]) 

            cropped2 = cv2.resize(cropped2, (64,64))
            
            crops.append(cropped2)

            cropped2 = cropped2.reshape((1, 64, 64, 3))

    
    
    order = np.argsort([x[0] for x in boxes])
    boxes = [boxes[x] for x in order]
    crops = [crops[x] for x in order]
    
    return sum, boxes, im, crops
    

In [8]:
# each classes(62 classes) has 55 images 
model_training_csv = os.path.join(os.getcwd(), 'alphabets/english.csv')

# make dataframe for training model 
model_training_df = pd.read_csv(model_training_csv)

class_names = model_training_df.label.unique()

Y = [label for label in range(len(class_names))]

# one-hot-encoded the label
y_train = keras.utils.to_categorical(class_names, len(class_names))


In [26]:

# ‡∏£‡∏∞‡∏ö‡∏∏‡πÇ‡∏û‡∏•‡πÄ‡∏î‡∏≠‡∏£‡πå‡∏ó‡∏µ‡πà‡πÄ‡∏Å‡πá‡∏ö‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡∏ó‡∏µ‡πà‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡πÑ‡∏ß‡πâ
source_folder_images = os.path.join(os.getcwd(), "archive/train_v2/train")

# ‡∏£‡∏∞‡∏ö‡∏∏‡πÇ‡∏û‡∏•‡πÄ‡∏î‡∏≠‡∏£‡πå‡∏ó‡∏µ‡πà‡πÄ‡∏Å‡πá‡∏ö‡∏£‡∏π‡∏õ‡∏†‡∏≤‡∏û‡πÉ‡∏ô dataset YOLO
destination_folder_images = os.path.join(os.getcwd(), "data/images")


destination_folder_labels = os.path.join(os.getcwd(), "data/labels")

# destination_folder_labels = os.path.join(os.getcwd(), "dataset_yolo", "labels")


In [40]:
train_img_path = os.path.join(os.getcwd(), "archive/train_v2/train")
test_img_path = os.path.join(os.getcwd(), "archive/test_v2/test")

train_csv_path = os.path.join(os.getcwd(), "archive/written_name_train_v2.csv")
test_csv_path = os.path.join(os.getcwd(), "archive/written_name_test_v2.csv")



df = pd.read_csv(train_csv_path)
cnt = 0
im = 0
boxes = []

# for i in range (df.shape[0]):
for i in range (100):
    # print(df['FILENAME'][i])
    img_name = os.path.join(train_img_path, df['FILENAME'][i])
    num_of_char, boxes, img, crop = boundingbox(img_name)
    # num_of_char, y_pred, boxes, img, crop = boundingbox(img_name, model)
    
    if num_of_char == len(str(df['IDENTITY'][i])):
        cnt += 1
        fileName = df['FILENAME'][i]
        shutil.copy(os.path.join(source_folder_images, fileName), os.path.join(destination_folder_images, fileName))

        label = ''
        for j in range(len(df['IDENTITY'][i])):
            if df['IDENTITY'][i][j] == ' ':
                continue
            name = np.where(class_names == df['IDENTITY'][i][j])[0][0]
            print(name)
            label += str(name) + ' '
            temp = ' '.join(map(str, boxes[j]))
            label += temp
            label += '\n'
         

        fileName = fileName.split('.')
        new_filename = f"{fileName[0]}.txt"
        label_path = os.path.join(destination_folder_labels, new_filename)

        with open(label_path, "w") as label_file:
            label_file.write(label)

print(cnt)

27
24
22
10
18
23
21
18
21
24
30
22
18
29
17
18
14
30
33
19
10
15
15
14
30
33
11
27
30
16
14
27
18
14
22
10
30
25
10
28
16
30
18
21
21
24
29
16
24
16
30
14
29
13
30
25
27
10
29
21
10
11
10
27
17
19
30
21
18
14
10
28
17
21
14
34
16
10
27
24
29
12


In [22]:
np.where(class_names == '1')[0][0]

1

In [41]:
import os
from ultralytics import YOLO

yolo_folder = os.path.join(os.getcwd(), "data")
model = YOLO("yolov8s.pt")  # load the model

# Specify the path to your training YAML file
data_yaml = os.path.join(yolo_folder, "coco128.yaml")

# Train the model
results = model.train(data=data_yaml, epochs=5)


New https://pypi.org/project/ultralytics/8.1.25 available üòÉ Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.1.24 üöÄ Python-3.11.5 torch-2.2.1 CPU (Apple M1)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/Users/boost/Documents/deep_learning/project/data/coco128.yaml, epochs=5, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=train10, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=Fa

[34m[1mtrain: [0mScanning /Users/boost/Documents/deep_learning/project/data/labels... 12 images, 0 backgrounds, 0 corrupt: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12/12 [00:00<00:00, 2935.99it/s]

[34m[1mtrain: [0mNew cache created: /Users/boost/Documents/deep_learning/project/data/labels.cache



[34m[1mval: [0mScanning /Users/boost/Documents/deep_learning/project/data/labels.cache... 12 images, 0 backgrounds, 0 corrupt: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12/12 [00:00<?, ?it/s]


Plotting labels to runs/detect/train10/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.000152, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ‚úÖ
Image sizes 640 train, 640 val
Using 0 dataloader workers
Logging results to [1mruns/detect/train10[0m
Starting training for 5 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        1/5         0G      2.075      6.867      1.135        141        640: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:07<00:00,  7.52s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  1.86it/s]

                   all         12         81    0.00034       0.05    0.00166    0.00133






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        2/5         0G       2.16      7.113      1.073        176        640: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:07<00:00,  7.03s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  1.84it/s]

                   all         12         81   0.000336       0.05    0.00184    0.00147






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        3/5         0G      2.199      7.618      1.063        137        640: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:06<00:00,  6.96s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  1.83it/s]

                   all         12         81   0.000329       0.05    0.00216    0.00173






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        4/5         0G      2.202      6.705      1.109        205        640: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:06<00:00,  6.91s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  1.40it/s]

                   all         12         81    0.00532     0.0583     0.0054    0.00345






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


        5/5         0G      2.226      6.938      1.078        220        640: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:07<00:00,  7.09s/it]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  1.99it/s]

                   all         12         81    0.00531     0.0583    0.00508    0.00319






5 epochs completed in 0.012 hours.
Optimizer stripped from runs/detect/train10/weights/last.pt, 22.5MB
Optimizer stripped from runs/detect/train10/weights/best.pt, 22.5MB

Validating runs/detect/train10/weights/best.pt...
Ultralytics YOLOv8.1.24 üöÄ Python-3.11.5 torch-2.2.1 CPU (Apple M1)
Model summary (fused): 168 layers, 11149578 parameters, 0 gradients, 28.6 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00,  2.14it/s]


                   all         12         81    0.00532     0.0583     0.0054    0.00345
                     A         12          9          0          0          0          0
                     B         12          2          0          0          0          0
                     D         12          1          0          0          0          0
                     E         12          7          0          0          0          0
                     F         12          2          0          0          0          0
                     G         12          5          0          0          0          0
                     H         12          3          0          0          0          0
                     I         12          7          0          0          0          0
                     J         12          2          0          0          0          0
                     L         12          7          0          0          0          0
                     