In [11]:
import cv2
import threading
import time
import numpy as np

import os
import xml.etree.ElementTree as ET

In [12]:
def capture_frame_and_annotation(frame, bbox, image_filename):
    image_dir = 'dataset/images/'
    annotation_dir = 'dataset/annotations/'
    # Save image
    image_path = os.path.join(image_dir, image_filename)
    print(image_path)
   
    cv2.imwrite(image_path, frame)

    # Create XML annotation
    annotation = ET.Element('annotation')
    filename = ET.SubElement(annotation, 'filename')
    filename.text = image_filename

    size = ET.SubElement(annotation, 'size')
    width_elem = ET.SubElement(size, 'width')
    width_elem.text = str(frame.shape[1])  # Assuming frame is BGR format
    height_elem = ET.SubElement(size, 'height')
    height_elem.text = str(frame.shape[0])
    depth_elem = ET.SubElement(size, 'depth')
    depth_elem.text = str(frame.shape[2])

    object_elem = ET.SubElement(annotation, 'object')
    name = ET.SubElement(object_elem, 'name')
    name.text = 'barcode'
    bndbox = ET.SubElement(object_elem, 'bndbox')
    xmin = ET.SubElement(bndbox, 'xmin')
    xmin.text = str(bbox[0])
    ymin = ET.SubElement(bndbox, 'ymin')
    ymin.text = str(bbox[1])
    xmax = ET.SubElement(bndbox, 'xmax')
    xmax.text = str(bbox[2])
    ymax = ET.SubElement(bndbox, 'ymax')
    ymax.text = str(bbox[3])

    # Save XML annotation file
    annotation_file = os.path.join(annotation_dir, os.path.splitext(image_filename)[0] + '.xml')
    tree = ET.ElementTree(annotation)
    tree.write(annotation_file)

    print(f"Saved image: {image_path} and annotation: {annotation_file}")

In [13]:
class VideoCaptureThread:
    def __init__(self, src=0):
        x_resolution = 2600
        y_resolution = 2600
        self.capture = cv2.VideoCapture(src)
        self.capture .set(cv2.CAP_PROP_FOURCC, cv2.VideoWriter_fourcc(*'MJPG'))
    
        self.capture.set(cv2.CAP_PROP_FPS, 30)
        self.capture.set(cv2.CAP_PROP_FRAME_WIDTH, x_resolution)
        self.capture.set(cv2.CAP_PROP_FRAME_HEIGHT, y_resolution)

        #check actual Data
        actual_width = self.capture.get(cv2.CAP_PROP_FRAME_WIDTH)
        actual_height = self.capture.get(cv2.CAP_PROP_FRAME_HEIGHT)
        actual_fps = self.capture.get(cv2.CAP_PROP_FPS)
        print(f"Resolution: {actual_width} x {actual_height}")
        print(f"Frame Rate: {actual_fps}")
        self.ret, self.frame = self.capture.read()
        self.running = True
        self.thread = threading.Thread(target=self.update, args=())
        self.thread.start()

    def update(self):
        while self.running:
            self.ret, self.frame = self.capture.read()

    def read(self):
        return self.ret, self.frame

    def stop(self):
        self.running = False
        self.thread.join()
        self.capture.release()

In [14]:
def detect_boundingbox(frame, K_pre, K, K_post,threshold_value,K_morph):
    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    
    blurred = cv2.GaussianBlur(gray, K_pre,0)
        
    # Compute the gradient in the x and y direction
    gradX = cv2.Sobel(blurred, ddepth=cv2.CV_32F, dx=1, dy=0, ksize=-1)
    gradY = cv2.Sobel(blurred, ddepth=cv2.CV_32F, dx=0, dy=1, ksize=-1)

    # Subtract the y-gradient from the x-gradient
    gradient = cv2.subtract(gradX, gradY)
    gradient = cv2.convertScaleAbs(gradient)
    #v2.imshow('gradient', gradient)
    # Blur the gradient image
    blurred = cv2.GaussianBlur(gradient, K_post, 0)
    #cv2.imshow('blurr', blurred)
    # Apply a binary threshold to the blurred image
    _,  thresh = cv2.threshold(blurred, threshold_value , 255, cv2.THRESH_BINARY)
    #cv2.imshow('thresh', thresh)
    # Construct a closing kernel and apply it to the thresholded image
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, K_morph)
    closed = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel)
    # Perform a series of erosions and dilations to remove small blobs
    closed = cv2.erode(closed, None, iterations=10)
    closed = cv2.dilate(closed, None, iterations=10)

    #cv2.imshow('closed', closed)
    
    # Find contours in the thresholded image
    contours, _ = cv2.findContours(closed.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # Sort the contours by area, keeping only the largest one
    contours = sorted(contours, key=cv2.contourArea, reverse=True)
    barcodeContour = None
  
    if contours:
        # Assume the largest contour is the barcode
        barcodeContour = contours[0]

        # Compute the bounding box of the barcode region and draw it on the image
        rect = cv2.minAreaRect(barcodeContour)
        box = cv2.boxPoints(rect)
        
        box = np.int32(box)
    
        #cut reigon
        width = int(rect[1][0])
        height = int(rect[1][1])

        # Get the rotation matrix   
        angle = rect[2]
      
        if width < height:
            angle = angle + 90

  
        M = cv2.getRotationMatrix2D(rect[0], angle, 1.0)
        # Rotate the entire image
        rotated = cv2.warpAffine(gray, M, (gray.shape[1], gray.shape[0]))

        ###Rotate Box#############
        rotated_box = cv2.transform(np.array([box]), M)[0]

        # Convert the rotated box points to integer coordinates
        rotated_box = np.int32(rotated_box)
        #print(rotated_box[0], rotated_box[1], rotated_box[2], rotated_box[3], )
        #cv2.drawContours(rotated, [rotated_box], -1, (0, 255, 0), 2)
         #########Test#############3
        # Extract the rotated bounding box coordinates
        x, y, w, h = cv2.boundingRect(rotated_box)
        #dim= max(w,h) #+ 100
        cropped = rotated[y:y+h, x:x+w]
    
        if cropped.size > 0:
            cv2.imshow('Cropped Image', cropped)


           
       
            
     
                    
    return rotated_box,gray

In [15]:
K= 7
K_pre = (3,3)
K_post = (K,K)
threshold_value = 255/2
K_morph = (21,21)


def main():
    cap_thread = VideoCaptureThread()
    i = 0
    while True:
 
        ret, frame = cap_thread.read()

        if not ret:
            print("Error: Failed to capture frame.")
            break
        
        box,gray  = detect_boundingbox(frame, K_pre, K, K_post, threshold_value, K_morph)
        
        if cv2.waitKey(1) & 0xFF == ord('s'):
            capture_frame_and_annotation(frame,box,f'image{i}.jpg')
            i +=1
        scale_factor = 0.3 # This will scale the image to half its original size

        # Calculate the new dimensions of the image
        new_width = int(frame.shape[1] * scale_factor)
        new_height = int(frame.shape[0] * scale_factor)
        resized_image = cv2.resize(frame, (new_width, new_height))
        cv2.imshow('Frame', resized_image)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break


    cap_thread.stop()
    cv2.destroyAllWindows()

if __name__ == "__main__":
    main()

Resolution: 2592.0 x 1944.0
Frame Rate: 30.0
dataset/images/image0.jpg
Saved image: dataset/images/image0.jpg and annotation: dataset/annotations/image0.xml
dataset/images/image1.jpg
Saved image: dataset/images/image1.jpg and annotation: dataset/annotations/image1.xml
dataset/images/image2.jpg
Saved image: dataset/images/image2.jpg and annotation: dataset/annotations/image2.xml
dataset/images/image3.jpg
Saved image: dataset/images/image3.jpg and annotation: dataset/annotations/image3.xml
dataset/images/image4.jpg
Saved image: dataset/images/image4.jpg and annotation: dataset/annotations/image4.xml
dataset/images/image5.jpg
Saved image: dataset/images/image5.jpg and annotation: dataset/annotations/image5.xml
dataset/images/image6.jpg
Saved image: dataset/images/image6.jpg and annotation: dataset/annotations/image6.xml
dataset/images/image7.jpg
Saved image: dataset/images/image7.jpg and annotation: dataset/annotations/image7.xml
dataset/images/image8.jpg
Saved image: dataset/images/image

UnboundLocalError: cannot access local variable 'rotated_box' where it is not associated with a value

: 