In [10]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
from PIL import Image, ImageDraw, ImageFont


class Panel:

    numbering = 'rtl-ttb'  # Right-to-left and top-to-bottom by default

    def __str__(self):
        return ("({}, {}) --- ({}, {})".format(self.x1, self.y1, self.x2, self.y2)) 
    
    def set_numbering(numbering):
        if not (numbering in ['ltr-ttb','rtl-ttb']):
            raise Exception('Fatal error, unknown numbering: '+ numbering)
        Panel.numbering = numbering

    def __init__(self, x1, y1, x2, y2):
        #for d in ['x','y','r','b']: super().__setattr__(d,0)  # dummy init so that all four edges have a value (see __setattr__)
        self.x1 = x1 # panel's left edge
        self.y1 = y1 # panel's top edge
        self.x2 = x2 # panel's right edge
        self.y2 = y2 # panel's bottom edge
        self.wt = abs(x2-x1)
        self.ht = abs(y2-y1)
   
    def overlapping_segment(self, a, b):
        s = max(a[0], b[0])
        e = min(a[1], b[1])
        return s, e   
    
    def __lt__(self, other):
        
        x_start, x_end = self.overlapping_segment((self.x1, self.x2), (other.x1, other.x2))
        y_start, y_end = self.overlapping_segment((self.y1, self.y2), (other.y1, other.y2))
 
        # panel is completely inside the other 
        if self.y1 >= other.y1 and self.y2 <= other.y2 \
            and self.x1 >= other.x1 and self.x2 <= other.x2:
            return True  

        # panel is completely surrounding the other 
        if self.y1 <= other.y1 and self.y2 >= other.y2 \
            and self.x1 <= other.x1 and self.x2 >= other.x2:
            return False 
        
        # panel is absolutely above the other
        if other.y1 >= self.y2: 
            return True
        
        # panel is absolutely below the other
        if self.y1 >= other.y2: 
            return False
        
        # panel is directly above the other and contains part of the other or vice versa (with overlapping)
        if y_start < y_end and y_end == self.y2 \
            and x_start < x_end \
            and ((self.x1 <= other.x1 and self.x2 >= other.x2) \
                 or (self.x1 >= other.x1 and self.x2 <= other.x2)): 
            return True   
        
        # panel is directly below the other and contains part of the other or vice versa (with overlapping)
        if y_start < y_end and y_start == self.y1 \
            and x_start < x_end \
            and ((self.x1 <= other.x1 and self.x2 >= other.x2) \
                 or (self.x1 >= other.x1 and self.x2 <= other.x2)): 
            return False
        
        # panel is between or vice versa and left from the other 
        if y_start < y_end \
            and ((other.y1 <= self.y1 and other.y2 >= self.y2) \
             or (other.y1 >= self.y1 and other.y2 <= self.y2)) \
            and x_start < x_end \
            and self.x2 == x_end: # and (x_end-x_start)/self.wt >= 0.50: 
            return False if Panel.numbering == 'rtl-ttb' else True 
        
        # panel is between or vice versa and right from the other 
        if y_start < y_end \
            and ((other.y1 <= self.y1 and other.y2 >= self.y2) \
             or (other.y1 >= self.y1 and other.y2 <= self.y2)) \
            and x_start < x_end \
            and self.x1 == x_start: 
            return True if Panel.numbering == 'rtl-ttb' else False 
                
        # panel is above and right from other 
        if y_start < y_end and y_end == self.y2 \
            and x_start < x_end and x_start == self.x1:
            return True if Panel.numbering == 'rtl-ttb' else False
            """
            if abs(x_end-x_start)/self.wt >= 0.5:
                return True if Panel.numbering == 'rtl-ttb' else False 
            else:
                return False if Panel.numbering == 'rtl-ttb' else True
            """
            
        # panel is below and left from the other
        if y_start < y_end and y_start == self.y1 \
            and x_start < x_end and x_end == self.x2:
            return False if Panel.numbering == 'rtl-ttb' else True  
            """
            if abs(x_end-x_start)/self.wt >= 0.5:
                return False if Panel.numbering == 'rtl-ttb' else True  
            else:
                return False if Panel.numbering == 'rtl-ttb' else True
            """
        
        # panel is above and left from other 
        if y_start < y_end and y_end == self.y2 \
            and x_start < x_end and x_end == self.x2: 
            if abs(x_end-x_start)/self.wt >= 0.5:
                return True if Panel.numbering == 'rtl-ttb' else False 
            else:
                return False if Panel.numbering == 'rtl-ttb' else True 
        
        # panel is below and right from the other
        if y_start < y_end and y_start == self.y1 \
            and x_start < x_end and x_start == self.x1:
            if abs(x_end-x_start)/self.wt < 0.5:
                return True if Panel.numbering == 'rtl-ttb' else False
            else:
                return False if Panel.numbering == 'rtl-ttb' else True

        # panel is right from the other (without overlapping)
        if self.x1 >= other.x2: 
            return True if Panel.numbering == 'rtl-ttb' else False

        # panel is left from the other (without overlapping)
        if other.x1 >= self.x2: # and other.x2 >= self.x2:
            return False if Panel.numbering == 'rtl-ttb' else True

        return False  # should not happen, TODO: raise an exception?    

    
def read_voc_annotation(data_path, anno_path, data_basename):

    classes = ['frame']
    anno_file = os.path.join(anno_path, data_basename + ".xml")
    
    print("anno_file = ", anno_file)
    root = ET.parse(anno_file).getroot()
    size = root.find('size')
    page_width = int(size.find('width').text.strip())
    page_height = int(size.find('height').text.strip())
    
    objects = root.findall('object')
    print("{} has {} objects.".format(anno_file, len(objects)))
    frame_count = 0
    frames = []
    for obj in objects:
        """
        difficult = obj.find('difficult').text.strip()
        if (not use_difficult_bbox) and(int(difficult) == 1):
            continue
        """
        bbox = obj.find('bndbox')
        if obj.find('name').text.lower().strip() == 'frame':
            frame_count += 1
            xmin = int(bbox.find('xmin').text.strip())
            xmax = int(bbox.find('xmax').text.strip())
            ymin = int(bbox.find('ymin').text.strip())
            ymax = int(bbox.find('ymax').text.strip())
            #frames.append([xmin, ymin, xmax, ymax, -1]) #+= ' ' + ','.join([xmin, ymin, xmax, ymax, str(class_ind)])
            frames.append(Panel(xmin, ymin, xmax, ymax))
    print("{} has {} frames:".format(anno_file, frame_count))
    frames.sort()
    #sorted_frames = sort_textblk_list(frames, page_width, page_height)
    return frames


input_path = "img"
output_path = "img_out"
files = os.listdir(input_path)
# 输出所有文件和文件夹
file_count = 0
for file in files:
    print(file)
    sorted_frames = read_voc_annotation(input_path, output_path, os.path.basename(file)[:-4])

    image_file  = os.path.join(input_path, os.path.basename(file)[:-4] + ".jpg")
    image       = Image.open(image_file)
    font        = ImageFont.truetype(font='model_data/simhei.ttf', size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
    for i, element in enumerate(sorted_frames):
        print(i+1, " : ", element)

        left   = element.x1 #element[0]
        top    = element.y1 #element[1]
        right  = element.x2 #element[2]
        bottom = element.y2 #element[3]

        draw = ImageDraw.Draw(image)
        for j in range(4):
            draw.rectangle([left+j, top+j, right-j, bottom-j], outline="red")
            #draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
        draw.text((round((left + right)/2), round((top + bottom)/2)), str(i+1), fill="blue", font=font)
    image.save(os.path.join(output_path, os.path.basename(file)[:-4] + ".jpg"), quality=100, subsampling=0)
    file_count += 1
    """
    if file_count > 20:
        break
    """

    

cover.jpg
anno_file =  img_out\cover.xml
img_out\cover.xml has 0 objects.
img_out\cover.xml has 0 frames:
kuchie-001.jpg
anno_file =  img_out\kuchie-001.xml
img_out\kuchie-001.xml has 0 objects.
img_out\kuchie-001.xml has 0 frames:
kuchie-002.jpg
anno_file =  img_out\kuchie-002.xml
img_out\kuchie-002.xml has 0 objects.
img_out\kuchie-002.xml has 0 frames:
p-colophon.jpg
anno_file =  img_out\p-colophon.xml
img_out\p-colophon.xml has 0 objects.
img_out\p-colophon.xml has 0 frames:
p-titlepage.jpg
anno_file =  img_out\p-titlepage.xml
img_out\p-titlepage.xml has 0 objects.
img_out\p-titlepage.xml has 0 frames:
sashie151.jpg
anno_file =  img_out\sashie151.xml
img_out\sashie151.xml has 0 objects.
img_out\sashie151.xml has 0 frames:
sashie152.jpg
anno_file =  img_out\sashie152.xml
img_out\sashie152.xml has 0 objects.
img_out\sashie152.xml has 0 frames:
sashie153.jpg
anno_file =  img_out\sashie153.xml
img_out\sashie153.xml has 0 objects.
img_out\sashie153.xml has 0 frames:
sashie154.jpg
anno_f

#### The following code block is cropping JPEG files first and combing all of them into a video file.

In [55]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET
#from PIL import Image, ImageDraw, ImageFont


class Panel:

    numbering = 'rtl-ttb'  # Right-to-left and top-to-bottom by default

    def __str__(self):
        return ("({}, {}) --- ({}, {})".format(self.x1, self.y1, self.x2, self.y2)) 
    
    def set_numbering(numbering):
        if not (numbering in ['ltr-ttb','rtl-ttb']):
            raise Exception('Fatal error, unknown numbering: '+ numbering)
        Panel.numbering = numbering

    def __init__(self, x1, y1, x2, y2):
        #for d in ['x','y','r','b']: super().__setattr__(d,0)  # dummy init so that all four edges have a value (see __setattr__)
        self.x1 = x1 # panel's left edge
        self.y1 = y1 # panel's top edge
        self.x2 = x2 # panel's right edge
        self.y2 = y2 # panel's bottom edge
        self.wt = abs(x2-x1)
        self.ht = abs(y2-y1)
   
    def overlapping_segment(self, a, b):
        s = max(a[0], b[0])
        e = min(a[1], b[1])
        return s, e   
    
    def __lt__(self, other):
        
        x_start, x_end = self.overlapping_segment((self.x1, self.x2), (other.x1, other.x2))
        y_start, y_end = self.overlapping_segment((self.y1, self.y2), (other.y1, other.y2))
 
        # panel is completely inside the other 
        if self.y1 >= other.y1 and self.y2 <= other.y2 \
            and self.x1 >= other.x1 and self.x2 <= other.x2:
            return True  

        # panel is completely surrounding the other 
        if self.y1 <= other.y1 and self.y2 >= other.y2 \
            and self.x1 <= other.x1 and self.x2 >= other.x2:
            return False 
        
        # panel is absolutely above the other
        if other.y1 >= self.y2: 
            return True
        
        # panel is absolutely below the other
        if self.y1 >= other.y2: 
            return False
        
        # panel is directly above the other and contains part of the other or vice versa (with overlapping)
        if y_start < y_end and y_end == self.y2 \
            and x_start < x_end \
            and ((self.x1 <= other.x1 and self.x2 >= other.x2) \
                 or (self.x1 >= other.x1 and self.x2 <= other.x2)): 
            return True   
        
        # panel is directly below the other and contains part of the other or vice versa (with overlapping)
        if y_start < y_end and y_start == self.y1 \
            and x_start < x_end \
            and ((self.x1 <= other.x1 and self.x2 >= other.x2) \
                 or (self.x1 >= other.x1 and self.x2 <= other.x2)): 
            return False
        
        # panel is between or vice versa and left from the other 
        if y_start < y_end \
            and ((other.y1 <= self.y1 and other.y2 >= self.y2) \
             or (other.y1 >= self.y1 and other.y2 <= self.y2)) \
            and x_start < x_end \
            and self.x2 == x_end: # and (x_end-x_start)/self.wt >= 0.50: 
            return False if Panel.numbering == 'rtl-ttb' else True 
        
        # panel is between or vice versa and right from the other 
        if y_start < y_end \
            and ((other.y1 <= self.y1 and other.y2 >= self.y2) \
             or (other.y1 >= self.y1 and other.y2 <= self.y2)) \
            and x_start < x_end \
            and self.x1 == x_start: 
            return True if Panel.numbering == 'rtl-ttb' else False 
                
        # panel is above and right from other 
        if y_start < y_end and y_end == self.y2 \
            and x_start < x_end and x_start == self.x1:
            return True if Panel.numbering == 'rtl-ttb' else False
            """
            if abs(x_end-x_start)/self.wt >= 0.5:
                return True if Panel.numbering == 'rtl-ttb' else False 
            else:
                return False if Panel.numbering == 'rtl-ttb' else True
            """
            
        # panel is below and left from the other
        if y_start < y_end and y_start == self.y1 \
            and x_start < x_end and x_end == self.x2:
            return False if Panel.numbering == 'rtl-ttb' else True  
            """
            if abs(x_end-x_start)/self.wt >= 0.5:
                return False if Panel.numbering == 'rtl-ttb' else True  
            else:
                return False if Panel.numbering == 'rtl-ttb' else True
            """
        
        # panel is above and left from other 
        if y_start < y_end and y_end == self.y2 \
            and x_start < x_end and x_end == self.x2: 
            if abs(x_end-x_start)/self.wt >= 0.5:
                return True if Panel.numbering == 'rtl-ttb' else False 
            else:
                return False if Panel.numbering == 'rtl-ttb' else True 
        
        # panel is below and right from the other
        if y_start < y_end and y_start == self.y1 \
            and x_start < x_end and x_start == self.x1:
            if abs(x_end-x_start)/self.wt < 0.5:
                return True if Panel.numbering == 'rtl-ttb' else False
            else:
                return False if Panel.numbering == 'rtl-ttb' else True

        # panel is right from the other (without overlapping)
        if self.x1 >= other.x2: 
            return True if Panel.numbering == 'rtl-ttb' else False

        # panel is left from the other (without overlapping)
        if other.x1 >= self.x2: # and other.x2 >= self.x2:
            return False if Panel.numbering == 'rtl-ttb' else True

        return False  # should not happen, TODO: raise an exception?    

    
def read_voc_annotation(anno_path, data_basename):

    classes = ['frame']
    anno_file = os.path.join(anno_path, data_basename + ".xml")
    
    print("anno_file = ", anno_file)
    root = ET.parse(anno_file).getroot()
    size = root.find('size')
    page_width = int(size.find('width').text.strip())
    page_height = int(size.find('height').text.strip())
    
    objects = root.findall('object')
    print("{} has {} objects.".format(anno_file, len(objects)))
    frame_count = 0
    frames = []
    #canvas = []
    for obj in objects:
        """
        difficult = obj.find('difficult').text.strip()
        if (not use_difficult_bbox) and(int(difficult) == 1):
            continue
        """
        bbox = obj.find('bndbox')
        if obj.find('name').text.lower().strip() == 'frame':
            frame_count += 1
            xmin = int(bbox.find('xmin').text.strip())
            xmax = int(bbox.find('xmax').text.strip())
            ymin = int(bbox.find('ymin').text.strip())
            ymax = int(bbox.find('ymax').text.strip())
            frames.append(Panel(xmin, ymin, xmax, ymax))
    print("{} has {} frames:".format(anno_file, frame_count))
    frames.sort()
    return frames


input_path = "img_out"
output_path = "video_out"
file_prefix = "img"
file_zfill_count = 4
file_start_no = 1
file_end_no = 17

files = [os.path.join(input_path, file_prefix + str(no).zfill(file_zfill_count) + ".jpg") for no in range(file_start_no, file_end_no + 1)]
print(files)

# 输出所有文件和文件夹
file_count = 0
crop_image_storage = []
for file in files:
    print(file)
    sorted_frames = read_voc_annotation(input_path, os.path.basename(file)[:-4])

    image_file  = os.path.join(input_path, os.path.basename(file)[:-4] + ".jpg")
    image       = cv2.imread(image_file)
    if len(sorted_frames) == 0:
        resized_crop_img = cv2.resize(image, (640, 480), interpolation=cv2.INTER_CUBIC)
        crop_image_storage.append(resized_crop_img)
        continue
    for i, element in enumerate(sorted_frames):
        print(i+1, " : ", element)

        left   = element.x1 #element[0]
        top    = element.y1 #element[1]
        right  = element.x2 #element[2]
        bottom = element.y2 #element[3]

        crop_img = image[top:bottom+1, left:right+1]
        resized_crop_img = cv2.resize(crop_img, (640, 480), interpolation=cv2.INTER_CUBIC)
        crop_image_storage.append(resized_crop_img)
        file_count += 1
        
fourcc = cv2.VideoWriter_fourcc(*"XVID") # 视频编码格式
videoWriter = cv2.VideoWriter(os.path.join(output_path, 'new.avi'), fourcc, 1, (640, 480))# 根据图片的大小，创建写入对象 （文件名，支持的编码器，帧率，视频大小（图片大小））
for i in range(len(crop_image_storage)):
    videoWriter.write(crop_image_storage[i])
videoWriter.release()
cv2.destroyAllWindows()

    

['img_out\\img0001.jpg', 'img_out\\img0002.jpg', 'img_out\\img0003.jpg', 'img_out\\img0004.jpg', 'img_out\\img0005.jpg', 'img_out\\img0006.jpg', 'img_out\\img0007.jpg', 'img_out\\img0008.jpg', 'img_out\\img0009.jpg', 'img_out\\img0010.jpg', 'img_out\\img0011.jpg', 'img_out\\img0012.jpg', 'img_out\\img0013.jpg', 'img_out\\img0014.jpg', 'img_out\\img0015.jpg', 'img_out\\img0016.jpg', 'img_out\\img0017.jpg']
img_out\img0001.jpg
anno_file =  img_out\img0001.xml
img_out\img0001.xml has 0 objects.
img_out\img0001.xml has 0 frames:
img_out\img0002.jpg
anno_file =  img_out\img0002.xml
img_out\img0002.xml has 6 objects.
img_out\img0002.xml has 6 frames:
1  :  (709, 105) --- (993, 433)
2  :  (705, 447) --- (993, 769)
3  :  (88, 106) --- (685, 770)
4  :  (89, 786) --- (989, 1110)
5  :  (489, 1122) --- (994, 1444)
6  :  (87, 1122) --- (470, 1445)
img_out\img0003.jpg
anno_file =  img_out\img0003.xml
img_out\img0003.xml has 10 objects.
img_out\img0003.xml has 10 frames:
1  :  (722, 104) --- (998, 42