In [1]:
!pip install opencv-python pytesseract
!sudo apt-get install -y tesseract-ocr

Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.13
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 3 newly installed, 0 to remove and 49 not upgraded.
Need to get 4,816 kB of archives.
After this operation, 15.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 

In [2]:

!sudo apt-get install -y tesseract-ocr
!pip install pytesseract

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [8]:
import os
import cv2
import pytesseract

def process_image(image_path, debug=False):
    debug_folder = "debug"
    if debug:
        os.makedirs(debug_folder, exist_ok=True)

    image = cv2.imread(image_path)

    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "gray.jpg"), gray)

    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "blurred.jpg"), blurred)

    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "thresh.jpg"), thresh)

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    license_plate = None
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if 2 < aspect_ratio < 5 and h > 20:
            license_plate = gray[y:y+h, x:x+w]
            if debug:
                plate_debug = image[y:y+h, x:x+w]
                cv2.imwrite(os.path.join(debug_folder, "license_plate.jpg"), plate_debug)
            break

    if license_plate is None:
        return "Tablica nie znaleziona"

    license_plate = cv2.GaussianBlur(license_plate, (3, 3), 0)
    license_plate = cv2.threshold(license_plate, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "license_plate_processed.jpg"), license_plate)

    config = "--psm 8"
    text = pytesseract.image_to_string(license_plate, config=config)

    return text.strip(), license_plate

def save_characters_images(characters, license_plate_image, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder, exist_ok=True)
    h, w = license_plate_image.shape
    char_width = w // len(characters)

    for i, char in enumerate(characters):
        char_image = license_plate_image[:, i*char_width:(i+1)*char_width]
        cv2.imwrite(os.path.join(output_folder, f"{char}.jpg"), char_image)

def main():
    dataset_path = '.'

    if not os.path.exists(dataset_path):
        print(f"Folder {dataset_path} nie istnieje!")
        return

    all_characters = []

    for file_name in os.listdir(dataset_path):
        if file_name.endswith('.jpg'):
            image_path = os.path.join(dataset_path, file_name)
            print(f"Przetwarzanie: {image_path}")
            text, license_plate_image = process_image(image_path, debug=True)

            if text != "Tablica nie znaleziona":
                characters = list(text.replace(" ", ""))
                all_characters.append(characters)
                output_folder = os.path.join("debug", f"char_images_{file_name.split('.')[0]}")
                save_characters_images(characters, license_plate_image, output_folder)
            else:
                all_characters.append(["Brak tablicy"])

    return all_characters

if __name__ == "__main__":
    characters_table = main()
    for row in characters_table:
        print(row)


Przetwarzanie: ./polen25.jpg
Przetwarzanie: ./polen35.jpg
['G', 'W', 'E', '9', 'T', '7', '1', ',']
['B', 'E', 'B', 'K', 'L', '0', '3', '6', '9', '3', ')']
