In [58]:
import torchvision
import torch.nn as nn
import torch
import json
from PIL import Image
from torchvision.transforms import Resize
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
import torchvision.transforms as T
from tqdm.notebook import tqdm


In [59]:
class TrainingDataset(torch.utils.data.Dataset):
    def __init__(self, data, validation=False):
        self.images = []
        self.boxes = []


        x_final, y_final = 600, 1200
        transform = Resize((x_final, y_final))
        to_tensor = ToTensor()
        for item in data:
            file_name = item['data']['image'].split('-')[-1]

            image_path = file_name  # Zmień odpowiednio ścieżkę do katalogu ze zdjęciami
            image = Image.open(image_path)
            image = to_tensor(transform(image))*255
            image = image.type(torch.uint8)
            self.images.append(image)
            points = item['annotations'][0]['result'][0]['value']['points']

            xmin = min(point[0] for point in points)/100*x_final
            ymin = min(point[1] for point in points)/100*y_final
            xmax = max(point[0] for point in points)/100*x_final
            ymax = max(point[1] for point in points)/100*y_final
            box = torch.tensor([[xmin, ymin, xmax, ymax]])
            self.boxes.append(box)

        self.labels = torch.zeros((len(data), 1), dtype=torch.int64)


        transforms = [#T.ElasticTransform(alpha=5.0), #T.Grayscale(),
                    #   T.ColorJitter(brightness=.5, hue=.3),
                    #   T.RandomInvert(p=0.3), T.RandomPosterize(bits=2),
                    #   T.RandomSolarize(threshold=0.2),
                    #   T.RandomAdjustSharpness(sharpness_factor=1),
                    #   T.RandomAutocontrast(),
                    #   T.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
                       T.RandomGrayscale(p=0.5),
                    #   T.RandomSolarize(1, p=0.5)



                      ]
        # self.transforms = T.Compose([*[T.RandomApply([transform], p=0.5) for transform in transforms]])
        # if validation:
        #     self.transforms = None
        transforms = transforms * 5

        self.transforms = T.Compose([T.RandomApply(transforms, p=0.5)])

    # def crop_img_with_box(self, img, box):
    #     # TODO: crop image with box
    #     return img, box

    def __getitem__(self, idx):
        out_img = self.images[idx]
        if self.transforms is not None:
            out_img = self.transforms(out_img)

        out_box = self.boxes[idx]
        # out_img, out_box = self.crop_img_with_box(out_img, out_box)
        return out_img/255, out_box, self.labels[idx]

    def __len__(self):
        return len(self.images)

In [60]:
model = torchvision.models.detection.fasterrcnn_resnet50_fpn()
model.roi_heads.box_predictor.cls_score = nn.Linear(1024, 1, bias=True)
model.roi_heads.box_predictor.bbox_pred = nn.Linear(1024, 4, bias=True)

from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights

#Opcjonalne zamrożenie wag
weights_dict = dict(model.named_parameters())
for k, v in weights_dict.items():
    if "box_predictor" not in k:
        v.requires_grad = False

In [61]:
with open('labelj.json') as f:
    data = json.load(f)

In [62]:

#Hiperparametr
epochs = 20
#lr, momentum - hiperparametry
#można spróbować torch.optim.Adam
# optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
training_dataset = TrainingDataset(data)
#batch_size = 2 - hiperparametr
training_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=2, shuffle=True)
val_dataset = TrainingDataset(data, validation=True)
val_dataloader = torch.utils.data.DataLoader(training_dataset, batch_size=2, shuffle=True)

for epoch in tqdm(list(range(epochs))):
    for images, boxes, labels in training_dataloader:
        images = list(image.to(device) for image in images)
        targets = []
        for i in range(len(images)):
            d = {}
            d['boxes'] = boxes[i].to(device)
            d['labels'] = labels[i].to(device)
            targets.append(d)
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        losses.backward()
        optimizer.step()
        optimizer.zero_grad()
        print(losses, d['boxes'])
    with torch.no_grad():
        for images, boxes, labels in val_dataloader:
            images = list(image.to(device) for image in images)
            targets = []
            for i in range(len(images)):
                d = {}
                d['boxes'] = boxes[i].to(device)
                d['labels'] = labels[i].to(device)
                targets.append(d)
            loss_dict = model(images, targets)
            losses = sum(loss for loss in loss_dict.values())
            print(f"Val loss: {losses.item()}")

#Zapisanie modelu
torch.save(model.state_dict(), "model.pth")


  0%|          | 0/20 [00:00<?, ?it/s]

tensor(0.7060, device='cuda:0', grad_fn=<AddBackward0>) tensor([[  71.8495,  152.1838,  483.4316, 1010.5036]], device='cuda:0')
tensor(0.7044, device='cuda:0', grad_fn=<AddBackward0>) tensor([[224.9832, 224.9093, 557.9157, 915.3127]], device='cuda:0')
tensor(0.7119, device='cuda:0', grad_fn=<AddBackward0>) tensor([[  71.7688,  139.2857,  558.0228, 1129.5232]], device='cuda:0')
tensor(0.7048, device='cuda:0', grad_fn=<AddBackward0>) tensor([[ 74.2857, 109.2857, 594.2857, 996.4286]], device='cuda:0')
tensor(0.7039, device='cuda:0', grad_fn=<AddBackward0>) tensor([[ 109.0909,  249.5455,  535.0649, 1135.9091]], device='cuda:0')
tensor(0.7179, device='cuda:0', grad_fn=<AddBackward0>) tensor([[  38.5714,  135.0000,  512.8571, 1127.1428]], device='cuda:0')
tensor(0.7043, device='cuda:0', grad_fn=<AddBackward0>) tensor([[  71.4286,  175.7143,  484.2857, 1035.0000]], device='cuda:0')
Val loss: 0.7107003927230835
Val loss: 0.7131628394126892
Val loss: 0.7021306753158569
Val loss: 0.7113913297653

In [63]:
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image

import torch

model.to("cpu")
model.eval()
for img, boxes, labels in val_dataset:
    preds = model([img])
    img = img*255
    img = img.type(torch.uint8)
    box_im = draw_bounding_boxes(img, boxes=boxes,

                              colors="red",
                              width=4)
    # boxes = [box for box, label, score in el.items() if label == 84 for el in preds]
    boxes = []
    for el in preds:
        for box, label, score in zip(el['boxes'], el['labels'], el['scores']):
            if label == 84:
                boxes.append(box)
    # if boxes:
    #     boxes = torch.stack(boxes)
    #     box_im = draw_bounding_boxes(box_im, boxes=boxes,

    #                           colors="green",
    #                           width=4)
    im = to_pil_image(box_im.detach())
    im.show()

KeyboardInterrupt: 

In [None]:
# from torchvision.utils import draw_bounding_boxes
# from torchvision.transforms.functional import to_pil_image
# model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, box_score_thresh=0.5)
# model.to("cpu")
# model.eval()


# from PIL import Image
# from torchvision.transforms import ToTensor

# # Ścieżka do testowego zdjęcia spoza bazy danych
# image_path = r'C:\Users\Boows\Desktop\sieci\test1.jpg'

# # Wczytanie obrazu jako obiekt PIL.Image
# image = Image.open(image_path)

# # Przekształcenie obrazu na tensor
# tensor_image = ToTensor()(image)

# # Wykonanie predykcji na modelu
# #preds = model([tensor_image])


# for img, boxes, labels in val_dataset:
#     preds = model([img])
#     img = img*255
#     img = img.type(torch.uint8)
#     box_im = draw_bounding_boxes(img, boxes=boxes,

#                               colors="red",
#                               width=4)
#     # boxes = [box for box, label, score in el.items() if label == 84 for el in preds]
#     boxes = []
#     for el in preds:
#         for box, label, score in zip(el['boxes'], el['labels'], el['scores']):
#             if label == 84:
#                 boxes.append(box)
#     if boxes:
#         boxes = torch.stack(boxes)
#         box_im = draw_bounding_boxes(box_im, boxes=boxes,

#                               colors="green",
#                               width=4)
#     im = to_pil_image(box_im.detach())
#     im.show()


KeyboardInterrupt: 

In [71]:
#WYŚWIETLA WSZYSTKIE ZDJĘCIA
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image
model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, box_score_thresh=0.7)
model.to("cpu")
model.eval()


from PIL import Image
from torchvision.transforms import ToTensor

# Ścieżka do testowego zdjęcia spoza bazy danych
image_path = r'C:\Users\Boows\Desktop\sieci\test1.jpg'

# Wczytanie obrazu jako obiekt PIL.Image
image = Image.open(image_path)

# Przekształcenie obrazu na tensor
tensor_image = ToTensor()(image)

# Wykonanie predykcji na modelu
#preds = model([tensor_image])


for img, boxes, labels in val_dataset:
    preds = model([img])
    img = img * 255
    img = img.type(torch.uint8)

    # Rysowanie zielonych ramkach dla przewidywanych bounding boxów
    boxes = []
    for el in preds:
        for box, label, score in zip(el['boxes'], el['labels'], el['scores']):
            if label == 84:
                boxes.append(box)
    if boxes:
        boxes = torch.stack(boxes)
        box_im = draw_bounding_boxes(img, boxes=boxes,
                                     colors="green",
                                     width=4)
        im = to_pil_image(box_im.detach())
        im.show()



KeyboardInterrupt: 

In [94]:
#WYŚWIETLA PODANE ZDJĘCIE ZE ŚCIEŻKI
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image
model = fasterrcnn_resnet50_fpn_v2(weights=FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT, box_score_thresh=0.8)
model.to("cpu")
model.eval()


from PIL import Image
from torchvision.transforms import ToTensor

# Ścieżka do testowego zdjęcia spoza bazy danych
image_path = r'C:\Users\Boows\Desktop\sieci\proba3.jpg'

# Wczytanie obrazu jako obiekt PIL.Image
image = Image.open(image_path)

# Przekształcenie obrazu na tensor
tensor_image = ToTensor()(image)

# Wykonanie predykcji na modelu
preds = model([tensor_image])

# Rysowanie zielonych ramkach dla przewidywanych bounding boxów
img = tensor_image * 255
img = img.type(torch.uint8)
boxes = []
for el in preds:
    for box, label, score in zip(el['boxes'], el['labels'], el['scores']):
        if label == 84:
            boxes.append(box)
if boxes:
    boxes = torch.stack(boxes)
    box_im = draw_bounding_boxes(img, boxes=boxes,
                                 colors="green",
                                 width=4)
    im = to_pil_image(box_im.detach())
    im.show()


In [98]:
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image
import torch

model = fasterrcnn_resnet50_fpn_v2(pretrained=True)
model.to("cpu")
model.eval()

from PIL import Image
from torchvision.transforms import ToTensor

# Ścieżka do testowego zdjęcia spoza bazy danych
image_path = r'C:\Users\Boows\Desktop\sieci\test1.jpg'

# Wczytanie obrazu jako obiekt PIL.Image
image = Image.open(image_path)

# Przekształcenie obrazu na tensor
tensor_image = ToTensor()(image)

# Wykonanie predykcji na modelu
preds = model([tensor_image])

# Wyciągnięcie pierwszego bounding boxa o etykiecie 84
target_box = None
for box, label in zip(preds[0]['boxes'], preds[0]['labels']):
    if label == 84:
        target_box = box
        break

# Jeżeli znaleziono bounding box o etykiecie 84, wycięcie wnętrza i wyświetlenie
if target_box is not None:
    xmin, ymin, xmax, ymax = target_box.tolist()
    xmin, ymin, xmax, ymax = int(xmin), int(ymin), int(xmax), int(ymax)
    cropped_img = image.crop((xmin, ymin, xmax, ymax))
    cropped_img.show()





In [97]:
import pytesseract
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2
from PIL import Image


def read_text_from_image(image_path):
    # Wczytanie obrazu
    image = Image.open(image_path)

    # Konwersja obrazu na tekst
    text = pytesseract.image_to_string(image)

    return text

# Ścieżka do pliku z obrazem
image_path = r'C:\Users\Boows\Desktop\sieci\test1.jpg'

# Odczytanie tekstu z obrazu
text = read_text_from_image(image_path)

# Wyświetlenie odczytanego tekstu
print(text)


Poprawnie zlutowany projekt

Zasady dziatania oraz napotkane problemy:

Dzlatanie powy2szego projektu nie jest skomplikowane. Po podtaczeniu
zasilania zapal sie dioda LED ktora poinformuje nas o tym, iz zasilanie jest sprawne.
Nastepnie wystarczy nacisnaé guzik aby otrzymac dawigk z brzgczyka. Wysokos¢
wydawanego déwieku jest Zalezna z ulozeniem guzikow, im dalej w prawo tym
Gawigk jest wy2szy, 2 Wiec skrajnie lewy guzik wydaje najnizszy dwigk a skrajnie
prawy najwy2szy. Jednak mozna rowniez samemu ustawié wysokosé kazdego
Géwigku poprzez zmiany wartosci na potencjometrze z jednoczesnym
przytrzymaniem wybranego guzika.

“Jedynym napotkanym problem, Ktéry mégl zawazyé na niewlaéciwym
dziataniem projektu bylo, niepoprawne zaprojektowanie Sciezek do ukladu scalonego
NESS, Jednak po szybkiej Konsultacji z prowadzacym, udalo sie doprowadzie
projekt do jego poprawnego dzialania poprzez odgiecie jedne| z ndzek




In [101]:
# import pytesseract
# from PIL import Image


# def read_text_from_image(cropped_img):
#     # Konwersja obrazu na tekst
#     text = pytesseract.image_to_string(cropped_img)

#     return text

# # Odczytanie tekstu z wyciętego zdjęcia
# text = read_text_from_image(cropped_img)

# # Wyświetlenie odczytanego tekstu
# print(text)

import pytesseract
from PIL import Image

def read_text_from_image(cropped_img):
    # Konwersja obrazu na tekst
    text = pytesseract.image_to_string(cropped_img)

    return text

# Odczytanie tekstu z wyciętego zdjęcia
text = read_text_from_image(cropped_img)

# Ścieżka do pliku tekstowego, w którym chcemy zapisać odczytany tekst
output_file = "output.txt"

# Zapisanie odczytanego tekstu do pliku
with open(output_file, "w", encoding="utf-8") as file:
    file.write(text)

print("Odczytany tekst został zapisany do pliku:", output_file)



Odczytany tekst został zapisany do pliku: output.txt
