## Pobranie i rozpakowanie datasetu aut z polskimi tablicami rejestracyjnymi

In [None]:
!kaggle datasets download -d piotrstefaskiue/poland-vehicle-license-plate-dataset
!unzip poland-vehicle-license-plate-dataset -d /content
!rm poland-vehicle-license-plate-dataset.zip

Dataset URL: https://www.kaggle.com/datasets/piotrstefaskiue/poland-vehicle-license-plate-dataset
License(s): other
Downloading poland-vehicle-license-plate-dataset.zip to /content
100% 472M/474M [00:07<00:00, 76.9MB/s]
100% 474M/474M [00:07<00:00, 66.7MB/s]
Archive:  poland-vehicle-license-plate-dataset.zip
  inflating: /content/annotations.xml  
  inflating: /content/photos/1.jpg   
  inflating: /content/photos/10.jpg  
  inflating: /content/photos/100.jpg  
  inflating: /content/photos/101.jpg  
  inflating: /content/photos/102.jpg  
  inflating: /content/photos/103.jpg  
  inflating: /content/photos/104.jpg  
  inflating: /content/photos/105.jpg  
  inflating: /content/photos/106.jpg  
  inflating: /content/photos/107.jpg  
  inflating: /content/photos/108.jpg  
  inflating: /content/photos/109.jpg  
  inflating: /content/photos/11.jpg  
  inflating: /content/photos/110.jpg  
  inflating: /content/photos/111.jpg  
  inflating: /content/photos/112.jpg  
  inflating: /content/photos/

## Wyciągnięcie pojedynczych znaków ze zbioru tablic rejestracyjnych



In [None]:
import os, cv2
import numpy as np
from skimage.filters import threshold_local

# przetworzenie tablicy rejestracyjnej
def process_image(image_path = None, img = None, debug_folder=None, blur_ksize=(5, 5)):
    # folder na podgląd poszczególnych etapów przetwarzania
    if debug_folder is not None:
        os.makedirs(debug_folder, exist_ok=True)

    # wczytywanie obrazka tablicy rejestracyjnej ze ścieżki
    if image_path is not None:
        image = cv2.imread(image_path)
        if image is None:
            raise ValueError("Błąd: Nie można wczytać obrazu")
    # używanie przekazanego obrazka tablicy rejestracyjnej
    elif img is not None:
        image = img
    else:
        raise ValueError("Nie przekazano obrazu lub sciezki")

    save_count = 0
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_org.png"), image)
        save_count += 1

    # rozmyty obrazek, polepsza efekty binaryzacji
    image = cv2.GaussianBlur(image, blur_ksize, 0)
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_blurred.png"), image)
        save_count += 1
    # kanał niebieski i czerwony, używane do usunięcia eurobandu
    # alternatywnie możnaby skorzystać w tym celu ze składowej H modelu HSV
    # (ale nie zostało to przetestowane)
    B = image[:,:,0]
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_blue.png"), B)
        save_count += 1
    R = image[:,:,2]
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_red.png"), R)
        save_count += 1
    Bh, Bw = B.shape[:]
    # wartość bezwględna różnicy między kanałem niebieskim a czerwonym, przy
    # jednorodnie oświetlonych obrazach pozwala na skuteczną lokalizację eurobandu
    abs_diff = np.zeros(B.shape[:], dtype=np.uint8)
    for h in range(Bh):
        for w in range(Bw):
            abs_diff[h][w] = abs(int(B[h][w])-int(R[h][w]))
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_BR_diff.png"), abs_diff)
        save_count += 1
    # maska pozwalająca usunąć euroband
    _, diff_thresh = cv2.threshold(abs_diff, 40, 255, cv2.THRESH_BINARY)
    diff_thresh = cv2.bitwise_not(diff_thresh)
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_BR_diff_thresh.png"), diff_thresh)
        save_count += 1

    # kanał value z modelu HSV pozwala na precyzyjne oddzielenie znaków z tablicy
    # rejestracyjnej
    V = cv2.split(cv2.cvtColor(image, cv2.COLOR_BGR2HSV))[2]
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_value.png"), V)
        save_count += 1
    # progowanie adaptacyjne kanału value
    T = threshold_local(V, 61, offset=15, method="gaussian")
    image = (V > T).astype("uint8") * 255
    # negacja obrazu, aby obiekty były białem
    image = cv2.bitwise_not(image)
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_thresh.png"), image)
        save_count += 1
    # iloczyn z maską usuwającą euroband
    image = cv2.bitwise_and(image, diff_thresh)
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_thresh_bitand.png"), image)
        save_count += 1
    return image

# sprawdza czy dany kontur dotyka krawędzi obrazka
def touches_border(contour, lh, lw):
    x, y, w, h = cv2.boundingRect(contour)
    if x <= 1 or y <= 1 or abs(lw - (x + w)) <= 1 or abs(lh - (y + h)) <= 1:
        return True
    else:
        return False

# funkcja wydzielająca segmenty, w których znajdują się litery
def get_characters_images(license_plate_image, debug_folder = None, min_char_height_factor=0.4,
                    max_char_height_factor=0.9, min_char_aspect_ratio=1., max_char_aspect_ratio=10.,
                    min_char_width_factor = 0.015, max_char_width_factor = 0.18):
    # folder na podgląd poszczególnych etapów przetwarzania
    if debug_folder is not None:
        os.makedirs(debug_folder, exist_ok = True)

    image = license_plate_image
    save_count = 0

    # znalezienie konturów obiektów z przekazanego obrazka
    contours, _ = cv2.findContours(image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    # określenie warunków określających czy dany obiekt jest znakiem
    lh, lw = license_plate_image.shape[:2]
    min_char_height = min_char_height_factor * lh
    max_char_height = max_char_height_factor * lh
    min_char_width = min_char_width_factor * lw
    max_char_width = max_char_width_factor * lw
    # maska na znaki, pozwalająca oczyścić obrazek z niepotrzebnych konturów
    chars_mask = np.zeros(image.shape[:], dtype=np.uint8)

    for i, contour in enumerate(contours):
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = h / w
        # jesli dany kontur spełnia warunki, to rysujemy jego wypełnienie na masce
        if (min_char_height <= h <= max_char_height and min_char_aspect_ratio <= aspect_ratio <= max_char_aspect_ratio
                and min_char_width <= w <= max_char_width and (touches_border(contour, lh, lw) == False)):
            cv2.drawContours(chars_mask, [contour], 0, (255,), -1)
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_chars_mask.png"), chars_mask)
        save_count += 1
    # iloczyn przetworzonej tablicy rejestracyjnej z maską znaków
    chars_image = cv2.bitwise_and(image, chars_mask)
    if debug_folder is not None:
        cv2.imwrite(os.path.join(debug_folder, f"{save_count}_chars_image.png"), chars_image)
        save_count += 1

    # [wycięty znak]
    characters = []
    # [składowa x lewej krawędzi bounding boxa konturu,
    # składowa x prawej krawędzi bounding boxa konturu]
    # służy do znalezienia największej przerwy między znakami, czyli do przerwy
    # między wyróżnikiem miejsca a wyróżnikiem pojazdu
    char_locations = []
    # kontury znaków, posortowane po składowej x, dzięki czemy możemy zapisać je
    # w odpowiedniej kolejności
    contours, _ = cv2.findContours(chars_image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=lambda c: cv2.boundingRect(c)[0])
    for i, contour in enumerate(contours):
        x, y, w, h = cv2.boundingRect(contour)
        char = chars_image[y:y + h, x:x + w]
        characters.append(char)
        char_locations.append((x, x+w))
        if debug_folder is not None:
            char_path = os.path.join(debug_folder, f"char_{x}.png")
            cv2.imwrite(char_path, char)

    # znalezienie indeksu największej przerwy między znakami
    max_gap = 0
    max_gap_index = -1
    for i in range(1, len(char_locations)):
        gap = char_locations[i][0] - char_locations[i-1][1]
        if gap > max_gap:
            max_gap = gap
            max_gap_index = i
    return characters, max_gap_index

In [None]:
import os
import xml.etree.ElementTree as ET

# utworzenie słownika znaków występujących na polskich rejestracjach
valid_characters = '0123456789ABCDEFGHIJKLMNOPRSTUVWXYZ'
CHARS = {char: index for index, char in enumerate(valid_characters)}

def extract_characters_from_data(debug = False):
    xml_file = "annotations.xml"  # Plik XML z danymi o tablicach
    images_folder = "photos"  # Folder z obrazami
    target_folder = "characters"  # Folder na wycięte znaki z tablic rejestracyjnych
    output_txt_file = "labels.txt"  # Plik tekstowy na etykiety znaków

    os.makedirs(target_folder, exist_ok=True)
    # statystyki na temat tego ile tablic rejestracyjnych udało się odczytać z dobrą
    # ilościa znaków
    total_plates = 0
    well_read_plates = 0
    # statystyki na temat tego ile jakich znaków na ile możliwych udało się
    # uzyskać ze zbioru danych
    chars_count = [0 for _ in CHARS]
    total_chars_count = [0 for _ in CHARS]

    # Otwórz plik tekstowy do zapisu danych o tablicach
    with open(output_txt_file, "w", encoding="utf-8") as output_file:
        # Wczytaj i sparsuj plik XML
        tree = ET.parse(xml_file)
        root = tree.getroot()

        # Iteracja po elementach <image> w pliku XML
        for image in root.findall(".//image"):
            image_name = image.get("name")
            image_path = os.path.join(images_folder, image_name)

            if not os.path.exists(image_path):
                continue

            img = cv2.imread(image_path)
            for box in image.findall(".//box[@label='plate']"):
                # Pobierz współrzędne tablicy rejestracyjnej
                xtl = int(float(box.get("xtl")))
                ytl = int(float(box.get("ytl")))
                xbr = int(float(box.get("xbr")))
                ybr = int(float(box.get("ybr")))

                # Wytnij tablicę rejestracyjną z obrazu
                cropped = img[ytl:ybr, xtl:xbr]

                # Pobierz numer rejestracyjny
                plate_number = box.find(".//attribute[@name='plate number']").text.replace(" ", "")
                # poprawienie błędnej etykiety
                if image_name == "58.jpg":
                    plate_number = "STA8582C"
                total_plates += 1
                for char in plate_number:
                    total_chars_count[CHARS[char]] += 1
                # nazwa pliku bez rozszerzenia
                simple_image_name = image_name[:-4]
                try:
                    if debug == True:
                        processed_license_plate = process_image(img = cropped,
                                    debug_folder = f"debug/{simple_image_name}")
                    else:
                        processed_license_plate = process_image(img = cropped)
                except ValueError as e:
                    print(f"{e}, {image_name}")
                    continue
                if debug == True:
                    characters, space_location = get_characters_images(processed_license_plate,
                                            debug_folder = f"debug/{simple_image_name}/chars")
                else:
                    characters, space_location = get_characters_images(processed_license_plate)

                if len(characters) != len(plate_number):
                    if debug == True:
                        print(f"Extracted {len(characters)} characters in {image_name}, should be {len(plate_number)}, skipping")
                    continue
                well_read_plates += 1

                # Zapisz wycięte znaki
                for i, character in enumerate(characters):
                    character_path = os.path.join(target_folder, f"{simple_image_name}_{i}.png")
                    cv2.imwrite(character_path, reshape_character(character))
                    #cv2.imwrite(character_path, character)
                    output_file.write(f"{simple_image_name}_{i}.png, {CHARS[plate_number[i]]}\n")
                    chars_count[CHARS[plate_number[i]]] += 1
    if debug == True:
        print(f"Udało się odczytać prawidłową liczbę znaków z {well_read_plates/total_plates*100:.0f}% rejestracji")
        for char, count, total_count in zip(CHARS, chars_count, total_chars_count):
            print(f"{char}: {count}/{total_count}")

In [None]:
# funkcja zmieniająca wymiary obrazka bez deformacji, domyślne rozmiary odpowiadają
# wymiarą oczekiwanym przez model klasyfikujący znak
def reshape_character(char, target_resized = 216, target_padded = 256):
    height, width = char.shape[:2]
    bigger_dimension = max(height, width)
    resize_ratio = target_resized / bigger_dimension
    if resize_ratio > 1:
        interpolation = cv2.INTER_CUBIC
    else:
        interpolation = cv2.INTER_AREA
    resized = cv2.resize(char, (0,0), fx=resize_ratio, fy=resize_ratio, interpolation=interpolation)
    height, width = resized.shape[:2]
    top = (target_padded-height) // 2
    bottom = target_padded-height-top
    left = (target_padded-width) // 2
    right = target_padded-width-left
    return cv2.copyMakeBorder(resized, top, bottom, left, right, cv2.BORDER_CONSTANT, value = 0)

In [None]:
extract_characters_from_data(debug=True)

Extracted 6 characters in 10.jpg, should be 7, skipping
Extracted 8 characters in 110.jpg, should be 7, skipping
Extracted 6 characters in 112.jpg, should be 7, skipping
Extracted 0 characters in 115.jpg, should be 7, skipping
Extracted 8 characters in 118.jpg, should be 7, skipping
Extracted 5 characters in 143(1).jpg, should be 7, skipping
Extracted 5 characters in 143.jpg, should be 7, skipping
Extracted 5 characters in 148.jpg, should be 7, skipping
Extracted 6 characters in 186.jpg, should be 7, skipping
Extracted 6 characters in 24.jpg, should be 7, skipping
Extracted 2 characters in 35.jpg, should be 4, skipping
Extracted 8 characters in 52.jpg, should be 7, skipping
Extracted 1 characters in 6.jpg, should be 7, skipping
Extracted 5 characters in 8.jpg, should be 7, skipping
Extracted 6 characters in 81.jpg, should be 7, skipping
Extracted 9 characters in 84.jpg, should be 8, skipping
Udało się odczytać prawidłową liczbę znaków z 92% rejestracji
0: 65/70
1: 78/83
2: 77/79
3: 54/

## Nauka modelu klasyfikującego pojedyncze znaki z tablicy rejestracyjnej

In [None]:
!pip install torch-summary

Collecting torch-summary
  Downloading torch_summary-1.4.5-py3-none-any.whl.metadata (18 kB)
Downloading torch_summary-1.4.5-py3-none-any.whl (16 kB)
Installing collected packages: torch-summary
Successfully installed torch-summary-1.4.5


In [2]:
import torch
from torch.utils.data import random_split
import math
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets
import torchvision.transforms.v2 as v2
import os, sys, time
from torchsummary import summary
import pandas as pd
from torchvision.io import read_image, ImageReadMode
from torchvision.models import efficientnet_b1, EfficientNet_B1_Weights

class CustomDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None, target_transform=None):
    	#super().__init__()
        self.img_labels = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

	# ilość danych
    def __len__(self):
        return len(self.img_labels)

	# zwrócenie obrazka i jego etykiety
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path, ImageReadMode.GRAY)
		# model oczekuje obrazów RGB, więc kopiujemy jedyną warstwe, aby spełnić
		# ten warunek
        image = image.repeat(3, 1, 1).float()
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

# uczenie i testowanie modelu
def trainAndTest(model, optimizer, train_data_loader, test_data_loader, loss_fn,
                 device, preprocess):
	# ustawienie modelu w tryb treningu
	model.train()
	for images, labels in train_data_loader:
		# przetworzenie obrazków w sposób oczekiwany przez model
		images = preprocess(images)
		images = images.to(device)
		labels = labels.to(device)

		# uzyskanie predykcji modelu i obliczenie wartości funkcji straty w celu
		# nauki modelu
		output = model(images)
		loss = loss_fn(output, labels)

		# wyzerowanie gradientów z poprzednich iteracji, aby nie aktualizować nimi
		# ponownie wag modelu
		optimizer.zero_grad()

		# propagacja wstecz, czyli aktualizacja wag modelu
		loss.backward()
		# krok optymalizatora kontrolującego hiperparametry treningu
		optimizer.step()

	# statystyki tego jak model sobie radzi
	loss_total = 0
	guesses_right = 0
	# ustawienie modelu w tryb predykcji
	model.eval()
	# nie uczymy modelu, więc nie należy obliczać gradientów
	with torch.no_grad():
		for images, labels in test_data_loader:
			images = images.to(device)
			labels = labels.to(device)

			output = model(images)
			loss = loss_fn(output, labels)

			# którą klasę model wybrał jako najbardziej prawdopodobną
			predictions = torch.max(output, 1)[1]

			guesses_right += (predictions == labels).sum()
			loss_total += loss.item()

	return loss_total, guesses_right

In [None]:
# czy wypisać podsumowanie modelu
print_summary = True
print_datasets = True

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# ustalenie hiperparametrów treningu oraz wczytanie modelu wraz z wagami
batch_size = 32
max_epoch = 100
lr = 0.001
loss_function = nn.CrossEntropyLoss()
weights = EfficientNet_B1_Weights.DEFAULT
model = efficientnet_b1(weights=weights)
# zamiana ostatnie warstwy modelu, która służyła do klasyfikacji dla 100 klas
# na warstwe służącą do klasyfikacji dla odpowiedniej dla nas liczby klas (35)
features_in = model.classifier[1].in_features
model.classifier = nn.Linear(features_in, len(CHARS))
model = model.to(device)
preprocess = weights.transforms()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
if print_summary == True:
	summary(model, torch.randn(1, 3, 256, 256).to(device))

# wczytanie datasetu i podzielenie go na zbior treningowy i testowy
dataset = CustomDataset("labels.txt", "characters")
train_dataset, test_dataset = random_split(dataset, [0.8, 0.2])
train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_data_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

if print_datasets == True:
	train_chars_count = [0 for _ in CHARS]
	test_chars_count = [0 for _ in CHARS]
	total_chars_count = [0 for _ in CHARS]
	for _, labels in train_data_loader:
		for label in labels:
			train_chars_count[label] += 1
			total_chars_count[label] += 1
	for _, labels in test_data_loader:
		for label in labels:
			test_chars_count[label] += 1
			total_chars_count[label] += 1
	print(f"char: train_count|test_count/total_count")
	for char, train_count, test_count, total_count in \
		zip(CHARS, train_chars_count, test_chars_count, total_chars_count):
            print(f"{char}: {train_count}|{test_count}/{total_count}")

# statystki związane z testowaniem i treningiem
lowest_loss = math.inf
total_test_batches = len(test_data_loader)
total_test_data = len(test_dataset)
total_time = 0

# pętla treningu i testowania
for epoch in range(max_epoch):
	t0 = time.time()
	print(f'Epoch: {epoch+1}/{max_epoch}')

	total_loss, right_guesses = trainAndTest(model, optimizer, train_data_loader,
	                        test_data_loader, loss_function, device, preprocess)

	# obliczenie statystyk dla danej epoki
	epoch_time = time.time() - t0
	total_time += epoch_time
	avg_val_loss = total_loss/total_test_batches
	print('avg_val_loss: {:.4f}, val_acc: {:.4f}\nEpoch Time = {:.2f}, Cumulative Time = {:.2f}\n'.format(avg_val_loss, right_guesses/total_test_data, epoch_time, total_time))

	# zapisanie najlepszych dotychczas wag
	if avg_val_loss < lowest_loss:
		lowest_loss = avg_val_loss
		torch.save({
			'lowest_loss': lowest_loss,
			'model_state_dict': model.state_dict(),
			'optimizer_state_dict': optimizer.state_dict(),
		}, f'best_weights.pt')

# zapisanie ostatnich uzyskanych wag
torch.save({
	'lowest_loss': lowest_loss,
	'model_state_dict': model.state_dict(),
	'optimizer_state_dict': optimizer.state_dict(),
	}, f'last_weights.pt')

Layer (type:depth-idx)                        Output Shape              Param #
├─Sequential: 1-1                             [-1, 1280, 8, 8]          --
|    └─Conv2dNormActivation: 2-1              [-1, 32, 128, 128]        --
|    |    └─Conv2d: 3-1                       [-1, 32, 128, 128]        864
|    |    └─BatchNorm2d: 3-2                  [-1, 32, 128, 128]        64
|    |    └─SiLU: 3-3                         [-1, 32, 128, 128]        --
|    └─Sequential: 2-2                        [-1, 16, 128, 128]        --
|    |    └─MBConv: 3-4                       [-1, 16, 128, 128]        1,448
|    |    └─MBConv: 3-5                       [-1, 16, 128, 128]        612
|    └─Sequential: 2-3                        [-1, 24, 64, 64]          --
|    |    └─MBConv: 3-6                       [-1, 24, 64, 64]          6,004
|    |    └─MBConv: 3-7                       [-1, 24, 64, 64]          10,710
|    |    └─MBConv: 3-8                       [-1, 24, 64, 64]          10,710
|   

## Zapisanie datasetu i modelu na dysku

In [4]:
!scp drive/MyDrive/Sem5/AO/best_weights.pt ./

In [9]:
checkpoint = torch.load("best_weights.pt", map_location=torch.device('cpu'), weights_only=True)
torch.save({
	'model_state_dict': checkpoint["model_state_dict"],
	}, f'best_weights_only.pt')

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
!scp -r characters drive/MyDrive/Sem5/AO
!scp labels.txt drive/MyDrive/Sem5/AO
!scp best_weights_only.pt drive/MyDrive/Sem5/AO