In [7]:
import Utils
import matplotlib.pyplot as plt
import os
import cv2
import math
import random
import numpy as np
from skimage import util
import json

In [8]:
def resize(img, label, size=1600):
    h, w, _ = img.shape
    img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)
    new_label = []
    for x1,y1,x2,y2 in label: 
        x1,y1,x2,y2 = round(x1/w*size),round(y1/h*size),round(x2/w*size),round(y2/h*size)
        new_label.append((x1,y1,x2,y2))

    return img, new_label

# 平移放缩变换
def warp(img, label, rotate=False):    
    
    cols,rows,_ = img.shape
    if not label:
        return img, label

    x_min, y_min, a1, a2 = label[0]
    a1, a2, x_max, y_max = label[-1]
    x_min, y_min, x_max, y_max = max(0, x_min-100), max(0, y_min-100), max(5, rows-x_max-100), max(5, cols-y_max-100)
    x, y = random.randrange(-x_min,x_max), random.randrange(-y_min,y_max)
    matrix = np.float32([[1,0,x],[0,1,y]])
    warp_img = cv2.warpAffine(img,matrix,(rows,cols),borderValue=(255,255,255))
    
    if rotate:
        angle = random.randrange(-5,5)
    else:
        angle = 0
    scale = random.randrange(80,110)/100
#     scale = 0.8
    center_x, center_y = rows//2, cols//2
    a, b = scale*math.cos(math.pi/180*angle), scale*math.sin(math.pi/180*angle)
    M = [[a, b, (1-a)*center_x-b*center_y],
         [-b, a, b*center_x+(1-a)*center_y]]  
    matrix = np.array(M)  
    warp_img = cv2.warpAffine(warp_img,matrix,(rows,cols),borderValue=(255,255,255))

#     x水平y竖直
    new_label = []
    for x1,y1,x2,y2 in label: 
        x1 += x
        x2 += x
        y1 += y
        y2 += y
        
        xa, xb, xc = M[0]
        ya, yb, yc = M[1]
        nx1 = x1 * xa + y1 * xb + xc 
        nx2 = x2 * xa + y2 * xb + xc 
        ny1 = x1 * ya + y1 * yb + yc
        ny2 = x2 * ya + y2 * yb + yc
        nx1,ny1,nx2,ny2 = round(nx1),round(ny1),round(nx2),round(ny2)
        new_label.append((nx1,ny1,nx2,ny2))
    
    return warp_img, new_label

# 添加印章
def add_stamp(stamp_path, img):
    stamps =os.listdir(stamp_path)
    i = random.randrange(0,len(stamps))
    s_path = os.path.join(stamp_path, stamps[i])
    
    stamp = cv2.imread(s_path)
    h_s, w_s, _ = stamp.shape
    scale = random.randrange(80, 150)/100
    h_n, w_n = int(h_s*scale), int(w_s*scale)
    stamp = cv2.resize(stamp, (w_n, h_n))
    
    angle = random.randrange(-90,90)
    center_x, center_y = h_n//2, w_n//2
    a, b =math.cos(math.pi/180*angle), math.sin(math.pi/180*angle)
    M = [[a, b, (1-a)*center_x-b*center_y],
         [-b, a, b*center_x+(1-a)*center_y]]  
    matrix = np.array(M) 
    stamp = cv2.warpAffine(stamp,matrix,(w_n,h_n),borderValue=(255,255,255))

    h, w, _ = img.shape
    x, y = random.randrange(1,w-w_n), random.randrange(1,h-h_n)
    img_p = img[y:y+h_n, x:x+w_n]
    
    img[y:y+h_n, x:x+w_n] = cv2.bitwise_and(stamp, img_p)

    return img

# 添加手写体
def add_handwriting(hand_path, img):
    hands =os.listdir(hand_path)
    i = random.randrange(0,len(hands))
    h_path = os.path.join(hand_path, hands[i])
    
    hand = cv2.imread(h_path)
    h_s, w_s, _ = hand.shape
    
    scale = random.randrange(100, 200)/100
    h_n, w_n = int(h_s*scale), int(w_s*scale)
    hand = cv2.resize(hand, (w_n, h_n))
    
    h, w, _ = img.shape
    x, y = random.randrange(1,w-w_n), random.randrange(1,h-h_n)
    img_p = img[y:y+h_n, x:x+w_n]
    
    img[y:y+h_n, x:x+w_n] = cv2.bitwise_and(hand, img_p)

    return img

# 添加断点
def add_breakpoint(img, label, n=2):  
    if not label:
        return img 
    
    for _ in range(n):
        i = random.randrange(0,len(label))
        a,b,c,d = label[i]

        points = [(a,b),(a,d),(c,b),(c,d)]
        j = random.randrange(0,3)
        x, y = points[j]

        l = [(-1,0),(1,0),(0,1),(0,-1)]
        j = random.randrange(0,3)
        r = random.randrange(5,10)
        x += l[j][0] * r
        y += l[j][1] * r

        cv2.circle(img, (x, y), r, (255,255,255), -1)
    return img

In [15]:
# 获取标签
def get_label(dir_path):
    files =os.listdir(dir_path)
    d = {}
    for f in files:
        path = os.path.join(dir_path, f)
        cell  = Utils.get_table_pos(path)
        label = []
        for i in cell:
            a, b = i.split('_')
            y1, x1 = map(int, a.split('-'))
            y2, x2 = map(int, b.split('-'))
            label.append((x1,y1,x2,y2))
        d[f] = label
    return d

def generate(dir_path, stamp_path, hand_path, path_save, label_path, img_size=800, times=5):
    files =os.listdir(dir_path)
    d = get_label(dir_path)

    for f, label in d.items():
        for t in range(times): 
            path_raw = os.path.join(dir_path, f)
            img = cv2.imread(path_raw)

            path_noise  = os.path.join(path_save, '%s_%s.%s' % (f.split('.')[0], t, f.split('.')[1])  )
            path_label = os.path.join(label_path, '%s_%s.txt' % (f.split('.')[0], t)  )
            new_label = label
            
            #平移，放缩
            img, new_label = warp(img, new_label)
            #加印章
            img = add_stamp(stamp_path, img)
            #手写体
            img = add_handwriting(hand_path, img)
            #断点
            img = add_breakpoint(img, new_label)
            #resize
            img, new_label = resize(img, new_label, size=img_size)

            cv2.imwrite(path_noise, img)
            
#             fw = open(path_label, 'w', encoding='UTF-8')
#             h, w, _ = img.shape
#             tmp = []
#             for x1,y1,x2,y2 in new_label:
#                 tmp.append('0 %.6f %.6f %.6f %.6f\n' % (x1/w, y1/h, 0.03, 0.03))
#                 tmp.append('0 %.6f %.6f %.6f %.6f\n' % (x1/w, y2/h, 0.03, 0.03))
#                 tmp.append('0 %.6f %.6f %.6f %.6f\n' % (x2/w, y1/h, 0.03, 0.03))
#                 tmp.append('0 %.6f %.6f %.6f %.6f\n' % (x2/w, y2/h, 0.03, 0.03))
#             tmp = list(set(tmp))
#             for i in tmp:
#                 fw.write(i)
#             fw.close()
            
            fw = open(path_label, 'w', encoding='UTF-8')
            h, w, _ = img.shape
            for x1,y1,x2,y2 in new_label:
                if x1 > x2 or y1 > y2 or x1 < 0 or y1 < 0 or x2 > w or y2 > h:
                    pass
                else:       
                    a,b,c,d = (x1+x2) / 2 / w, (y1+y2) / 2 / h, (x2-x1) / w, (y2-y1) / h
                    to_write = '0 %.6f %.6f %.6f %.6f\n' % (a,b,c,d)
                    fw.write(to_write)
            fw.close()



In [4]:
dir_path = './rawdata_withtable'
stamp_path = './stamp'
hand_path = './handwriting'
path_save = '../DATA/YOLO/images/train'
label_path = '../DATA/YOLO/labels/train'
generate(dir_path, stamp_path, hand_path, path_save, label_path, img_size=800, times=10)

In [5]:
dir_path = './rawdata_withtable'
stamp_path = './stamp'
hand_path = './handwriting'
path_save = '../DATA/YOLO/images/val'
label_path = '../DATA/YOLO/labels/val'
generate(dir_path, stamp_path, hand_path, path_save, label_path, img_size=800, times=1)

In [6]:
dir_path = './rawdata_withtable'
stamp_path = './stamp'
hand_path = './handwriting'
path_save = '../DATA/YOLO/images/test'
label_path = '../DATA/YOLO/labels/test'
generate(dir_path, stamp_path, hand_path, path_save, label_path, img_size=800, times=1)

In [16]:
dir_path = './rawdata_withtable'
stamp_path = './stamp'
hand_path = './handwriting'
path_save = '../segment/data/images'
label_path = '../segment/data/groundTruth'
generate(dir_path, stamp_path, hand_path, path_save, label_path, img_size=800, times=5)

false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
false
