In [10]:
!pip install opencv-python pytesseract
!sudo apt-get install -y tesseract-ocr


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  tesseract-ocr-eng tesseract-ocr-osd
The following NEW packages will be installed:
  tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
0 upgraded, 3 newly installed, 0 to remove and 49 not upgraded.
Need to get 4,816 kB of archives.
After this operation, 15.6 MB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-eng all 1:4.00~git30-7274cfa-1.1 [1,591 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr-osd all 1:4.00~git30-7274cfa-1.1 [2,990 kB]
Get:3 http://archive.ubuntu.com/ubuntu jammy/universe amd64 tesseract-ocr amd64 4.1.1-2.1build1 [236 kB]
Fetched 4,816 kB in 2s (2,738 kB/s)
debconf: unable to initialize frontend: Dialog
debconf: (No usable dialog-like program is installed, so the dialog based frontend cannot be used. at /usr/share/perl5/Debc

In [12]:
!sudo apt-get install -y tesseract-ocr
!pip install pytesseract

Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
tesseract-ocr is already the newest version (4.1.1-2.1build1).
0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.


In [13]:
import cv2
import pytesseract
import os

def process_image(image_path, debug=False):
    # Tworzenie folderu debug
    debug_folder = "debug"
    if debug:
        os.makedirs(debug_folder, exist_ok=True)

    # Wczytanie obrazu
    image = cv2.imread(image_path)

    # Konwersja do skali szarości
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "gray.jpg"), gray)

    # Redukcja szumów
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "blurred.jpg"), blurred)

    # Adaptacyjne progowanie
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "thresh.jpg"), thresh)

    # Znalezienie konturów
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    license_plate = None
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if 2 < aspect_ratio < 5 and h > 20:
            license_plate = gray[y:y+h, x:x+w]
            if debug:
                plate_debug = image[y:y+h, x:x+w]
                cv2.imwrite(os.path.join(debug_folder, "license_plate.jpg"), plate_debug)
            break

    if license_plate is None:
        return "Tablica nie znaleziona"

    # Wyostrzanie tablicy
    license_plate = cv2.GaussianBlur(license_plate, (3, 3), 0)
    license_plate = cv2.threshold(license_plate, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "license_plate_processed.jpg"), license_plate)

    # Segmentacja znaków
    contours, _ = cv2.findContours(license_plate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    characters_folder = os.path.join(debug_folder, "characters")
    if debug:
        os.makedirs(characters_folder, exist_ok=True)

    characters = []
    for contour in sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0]):
        x, y, w, h = cv2.boundingRect(contour)
        if h > 10:  # Filtracja małych konturów (szumów)
            char = license_plate[y:y+h, x:x+w]
            char = cv2.resize(char, (30, 50))  # Normalizacja rozmiaru znaków
            characters.append(char)
            if debug:
                char_debug_path = os.path.join(characters_folder, f"char_{x}.jpg")
                cv2.imwrite(char_debug_path, char)

    # Użycie pytesseract do odczytu tekstu (jak model wytrenujemy, wstawimy go w tym momencie!)
    config = "--psm 7"  # Tryb segmentacji: pojedyncza linia tekstu
    text = pytesseract.image_to_string(license_plate, config=config)
    return text.strip()

def main():
    dataset_path = '.'
    output_file = 'results.txt'

    if not os.path.exists(dataset_path):
        print(f"Folder {dataset_path} nie istnieje!")
        return

    results = []
    for file_name in os.listdir(dataset_path):
        if file_name.endswith('.jpg'):
            image_path = os.path.join(dataset_path, file_name)
            print(f"Przetwarzanie: {image_path}")
            text = process_image(image_path, debug=True)
            results.append(f"{file_name}: {text}")

    with open(output_file, 'w') as f:
        for result in results:
            f.write(result + '\n')

    print(f"Wyniki zapisano w pliku {output_file}")

if __name__ == "__main__":
    main()


Przetwarzanie: ./polen17.jpg
Przetwarzanie: ./polen35.jpg
Przetwarzanie: ./polen25.jpg
Przetwarzanie: ./polen33.jpg
Wyniki zapisano w pliku results.txt


In [15]:
!pip install kaggle
from google.colab import files

# files.upload()

import os
os.environ['KAGGLE_CONFIG_DIR'] = "/root/.kaggle"

!kaggle datasets download -d kentvejrupmadsen/letter-images-dataset
!unzip letter-images-dataset.zip -d /content/data/




[1;30;43mStrumieniowane dane wyjściowe obcięte do 5000 ostatnich wierszy.[0m
  inflating: /content/data/dataset/Å/ids_0737.jpg  
  inflating: /content/data/dataset/Å/ids_0738.jpg  
  inflating: /content/data/dataset/Å/ids_0739.jpg  
  inflating: /content/data/dataset/Å/ids_0740.jpg  
  inflating: /content/data/dataset/Å/ids_0741.jpg  
  inflating: /content/data/dataset/Å/ids_0742.jpg  
  inflating: /content/data/dataset/Å/ids_0743.jpg  
  inflating: /content/data/dataset/Å/ids_0744.jpg  
  inflating: /content/data/dataset/Å/ids_0745.jpg  
  inflating: /content/data/dataset/Å/ids_0746.jpg  
  inflating: /content/data/dataset/Å/ids_0747.jpg  
  inflating: /content/data/dataset/Å/ids_0748.jpg  
  inflating: /content/data/dataset/Å/ids_0749.jpg  
  inflating: /content/data/dataset/Å/ids_0750.jpg  
  inflating: /content/data/dataset/Å/ids_0751.jpg  
  inflating: /content/data/dataset/Å/ids_0752.jpg  
  inflating: /content/data/dataset/Å/ids_0753.jpg  
  inflating: /content/data/dataset/Å/

In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import joblib

# Przygotowanie danych
def load_data(data_dir):
    X = []
    y = []
    for label in os.listdir(data_dir):
        label_path = os.path.join(data_dir, label)
        if os.path.isdir(label_path):
            for img_name in os.listdir(label_path):
                img_path = os.path.join(label_path, img_name)
                img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                img = cv2.resize(img, (30, 50))  # Normalizacja rozmiaru
                X.append(img.flatten())          # Spłaszczamy obraz do wektora
                y.append(label)
    return np.array(X), np.array(y)

# Ładowanie danych
data_dir = "/content/data/dataset/"
X, y = load_data(data_dir)

# Podział na zbiory treningowe i walidacyjne
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Tworzenie modelu i jego uczenie
model = SVC(kernel='linear', probability=True)
model.fit(X_train, y_train)

# Walidacja
y_pred = model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f"Dokładność walidacji: {accuracy * 100:.2f}%")

# Zapis modelu
joblib.dump(model, "character_classifier.pkl")


In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import transforms
import numpy as np

# Przygotowanie danych
class CharacterDataset(Dataset):
    def __init__(self, data_dir, transform=None):
        self.data = []
        self.labels = []
        self.transform = transform
        self.label_to_idx = {}
        self.idx_to_label = []

        for label_idx, label in enumerate(os.listdir(data_dir)):
            label_path = os.path.join(data_dir, label)
            if os.path.isdir(label_path):
                self.label_to_idx[label] = label_idx
                self.idx_to_label.append(label)
                for img_name in os.listdir(label_path):
                    img_path = os.path.join(label_path, img_name)
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                    img = cv2.resize(img, (30, 50))  # Normalizacja rozmiaru
                    self.data.append(img)
                    self.labels.append(label_idx)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img = self.data[idx]
        label = self.labels[idx]
        if self.transform:
            img = self.transform(img)
        return img, label

# Definicja modelu
class CharacterModel(nn.Module):
    def __init__(self, num_classes):
        super(CharacterModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 15 * 25, 128)
        self.fc2 = nn.Linear(128, num_classes)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# Przygotowanie danych
data_dir = "/content/data/dataset/"
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
dataset = CharacterDataset(data_dir, transform=transform)

# Podział na zbiory treningowe i walidacyjne
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Inicjalizacja modelu
num_classes = len(dataset.label_to_idx)
model = CharacterModel(num_classes=num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Trenowanie modelu
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for images, labels in train_loader:
        images, labels = images.unsqueeze(1).to(device), labels.to(device)  # Dodaj wymiar kanału
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {total_loss / len(train_loader):.4f}")

# Walidacja modelu
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in val_loader:
        images, labels = images.unsqueeze(1).to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = correct / total * 100
print(f"Dokładność walidacji: {accuracy:.2f}%")

# Zapis modelu i wag
torch.save({
    'model_state_dict': model.state_dict(),
    'label_to_idx': dataset.label_to_idx
}, "character_model.pth")


In [1]:
import cv2
import pytesseract
import os
import numpy as np
import joblib

def process_image(image_path, debug=False):
    # Tworzenie folderu debug
    debug_folder = "debug"
    if debug:
        os.makedirs(debug_folder, exist_ok=True)

    # Wczytanie obrazu
    image = cv2.imread(image_path)

    # Konwersja do skali szarości
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "gray.jpg"), gray)

    # Redukcja szumów
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "blurred.jpg"), blurred)

    # Adaptacyjne progowanie
    thresh = cv2.adaptiveThreshold(blurred, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2)
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "thresh.jpg"), thresh)

    # Znalezienie konturów
    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    license_plate = None
    for contour in contours:
        x, y, w, h = cv2.boundingRect(contour)
        aspect_ratio = w / h
        if 2 < aspect_ratio < 5 and h > 20:
            license_plate = gray[y:y+h, x:x+w]
            if debug:
                plate_debug = image[y:y+h, x:x+w]
                cv2.imwrite(os.path.join(debug_folder, "license_plate.jpg"), plate_debug)
            break

    if license_plate is None:
        return "Tablica nie znaleziona"

    # Wyostrzanie tablicy
    license_plate = cv2.GaussianBlur(license_plate, (3, 3), 0)
    license_plate = cv2.threshold(license_plate, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
    if debug:
        cv2.imwrite(os.path.join(debug_folder, "license_plate_processed.jpg"), license_plate)

    # Segmentacja znaków
    contours, _ = cv2.findContours(license_plate, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    characters_folder = os.path.join(debug_folder, "characters")
    if debug:
        os.makedirs(characters_folder, exist_ok=True)

    characters = []
    for contour in sorted(contours, key=lambda ctr: cv2.boundingRect(ctr)[0]):
        x, y, w, h = cv2.boundingRect(contour)
        if h > 10:  # Filtracja małych konturów (szumów)
            char = license_plate[y:y+h, x:x+w]
            char = cv2.resize(char, (30, 50))  # Normalizacja rozmiaru znaków
            characters.append(char.flatten())  # Przygotowanie znaku do klasyfikacji
            if debug:
                char_debug_path = os.path.join(characters_folder, f"char_{x}.jpg")
                cv2.imwrite(char_debug_path, char)

    # Wczytanie wytrenowanego modelu
    model_path = "character_classifier.pkl"
    if not os.path.exists(model_path):
        return "Model klasyfikacji znaków nie został znaleziony"

    model = joblib.load(model_path)

    # Klasyfikacja znaków
    recognized_text = ""
    for char in characters:
        char = np.array(char).reshape(1, -1)
        prediction = model.predict(char)
        recognized_text += prediction[0]

    return recognized_text.strip()

def main():
    dataset_path = '.'
    output_file = 'results.txt'

    if not os.path.exists(dataset_path):
        print(f"Folder {dataset_path} nie istnieje!")
        return

    results = []
    for file_name in os.listdir(dataset_path):
        if file_name.endswith('.jpg'):
            image_path = os.path.join(dataset_path, file_name)
            print(f"Przetwarzanie: {image_path}")
            text = process_image(image_path, debug=True)
            results.append(f"{file_name}: {text}")

    with open(output_file, 'w') as f:
        for result in results:
            f.write(result + '\n')

    print(f"Wyniki zapisano w pliku {output_file}")

if __name__ == "__main__":
    main()


Przetwarzanie: ./polen17.jpg
Przetwarzanie: ./polen35.jpg
Przetwarzanie: ./polen25.jpg
Przetwarzanie: ./polen33.jpg
Wyniki zapisano w pliku results.txt
