In [1]:
import random

from PIL import Image,ImageFilter
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import cv2

In [2]:
#generate the yolov5 dataset
object_img_path="pic1/1.png"
img=Image.open(object_img_path)
img=img.convert("RGBA")
#recolor the image

In [3]:
def colorize(img, color):
    tar=np.array(img)
    for c in range(3):
        tar[:,:,c]=np.where(tar[:,:,3]>0,color[c],tar[:,:,c])
    return tar

In [4]:
i1=colorize(img,(255,128,0))
#save the image
#Image.fromarray(i1).save("1_color.png")

In [5]:
#scale the image and perspective transform
def scale_img(img,scaled_size):

    w,h=img.size
    img=img.resize((scaled_size,scaled_size))
    return img
def horizontal_perspective_transform(img,max_offset=0.1):

    h,w=img.shape[:2]
    offset=np.random.uniform(-max_offset,max_offset)
    x1=0
    x2=w
    y1=0
    y2=h
    x1_offset=int(x1+offset*w)
    x2_offset=int(x2+offset*w)
    pts1=np.float32([[x1,y1],[x2,y1],[x1,y2],[x2,y2]])
    pts2=np.float32([[x1_offset,y1],[x2_offset,y1],[x1,y2],[x2,y2]])
    M=cv2.getPerspectiveTransform(pts1,pts2)
    img=cv2.warpPerspective(np.array(img),M,(w,h))
    return Image.fromarray(img)

In [6]:
i2=horizontal_perspective_transform(np.array(i1))
i2.save("2_perspective.png")

In [7]:
import cv2
import numpy as np
import random
def apply_trapezoidal_perspective(img_np, max_offset_ratio=0.1):
    """
    将正方形图像转为等腰梯形，左右边保持不变，上下边倾斜。
    :param img_np: 输入图像的 NumPy 数组（RGBA 格式）
    :param max_offset_ratio: 最大垂直方向的偏移比例，默认为图像高度的30%
    :return: 应用透视变换后的图像 NumPy 数组
    """
    h, w = img_np.shape[:2]

    # 原始四个角点
    pts1 = np.float32([[0, 0], [w, 0], [0, h], [w, h]])

    # 计算最大垂直方向的偏移量
    max_offset = h * max_offset_ratio

    # 上下边进行垂直缩放，左右边保持不变
    top_offset = np.random.uniform(0, max_offset)  # 上边垂直缩放
    bottom_offset = np.random.uniform(0, max_offset)  # 下边垂直缩放

    # 透视变换后的角点（左右边保持不变，上下边进行垂直缩放）
    pts2 = np.float32([[0, 0],  # 左上角垂直缩放
                       [w, top_offset],  # 右上角垂直缩放
                       [0, h ],  # 左下角垂直缩放
                       [w, h - bottom_offset]])  # 右下角垂直缩放
    #maybe pt2 will vertical flip
    pts3=np.float32([[0, top_offset],  # 左上角垂直缩放
                       [w, 0],  # 右上角垂直缩放
                       [0, h - bottom_offset],  # 左下角垂直缩放
                       [w, h]])  # 右下角垂直缩放
    # 计算透视变换矩阵
    #random choose the pts2 or pts3
    pts2=pts2 if random.random()>0.5 else pts3
    M = cv2.getPerspectiveTransform(pts1, pts2)

    # 应用透视变换
    transformed_img = cv2.warpPerspective(img_np, M, (w, h))

    return transformed_img


In [8]:
i3=apply_trapezoidal_perspective(np.array(i1))
Image.fromarray(i3).save("3_camera.png")

In [9]:
#packed the function
def random_transform(img,scale=0.1):
    #angle=np.random.uniform(-max_angle,max_angle)
    #img=random_rotate(img,angle)
    img=horizontal_perspective_transform(np.array(img))
    img=apply_trapezoidal_perspective(np.array(img))
    return img

In [10]:
Image.fromarray(random_transform(i1)).save("4_random.png")

In [11]:
def rot_pr(imgpath,max_angle=5):
    img=Image.open(imgpath)
    img=img.convert("RGBA")
    angle=np.random.uniform(-max_angle,max_angle)
    img=img.rotate(angle)
    #random color
    color=np.random.randint(0,255,(3,))
    img=colorize(img,color)
    
    img=np.array(img)
    img=random_transform(img)
    img=Image.fromarray(img).convert("RGBA")
    return img

In [12]:
ii=rot_pr("pic1/2.png")
Image.fromarray(ii).save("5_rot_pr.png")

TypeError: expected string or buffer

In [13]:
def random_background():
    #for yolov5 dataset, the background should be 640*640, generate the background through the random color, and the random noise or random filter
    #generate the random color
    color=np.random.randint(0,255,(3,))
    img=np.ones((640,640,3),dtype=np.uint8)
    img=img*color
    #generate the random noise
    noise=np.random.normal(0,64,(640,640,3))
    img=img+noise
    img=np.clip(img,0,255)
    img=img.astype(np.uint8)
    #generate the random filter
    img=Image.fromarray(img)
    img=img.filter(ImageFilter.GaussianBlur(radius=random.randint(0,5)))
    return img

In [89]:
random_background().save("8_background.png")

In [14]:
data_path="yolo"
def generate_random_yolo(type):
    bg=random_background().convert('RGBA')
    img=rot_pr(f"pic1/{type-10}.png")
    #scale the image
    w=random.randint(150,270)
    h=random.randint(150,270)
    img=img.resize((w,h))
    #random position
    x=random.randint(0,640-w)
    y=random.randint(0,640-h)
    bg.paste(img,(x,y),img)
    id=random.randint(0,100000)
    bg.save(f"{data_path}/pic/{type}_{id}.png")
    #generate the label
    with open(f"{data_path}/label/{type}_{id}.txt","w") as f:
        f.write(f"{type} {(1.0*x+w/2)/640} {(1.0*y+h/2)/640} {w*1.0/640} {h*1.0/640}")
        

In [105]:
for i in range(11,41):
    for _ in range(4):
        generate_random_yolo(i)

In [38]:
def bg(bname,oid):
    bg=Image.open(f"background/{bname}").convert("RGBA")
    obj=Image.open(f"pic1/{oid}.png").convert("RGBA")
    #resize the object and obj to 640*640
    obj=obj.resize((640,640))
    bg=bg.resize((640,640))
    r_color=np.random.randint(0,255,(3,))
    obj=colorize(obj,r_color)
    #to image
    obj=Image.fromarray(obj)
    #align the object to the center of the background
    x=(640-obj.width)//2
    y=(640-obj.height)//2
    bg.paste(obj,(x,y),obj)
    bg.save(f"pic2/{bname.split('.')[0]}_{oid}.png")

In [39]:
import os
bn=os.listdir(f"background")
bn

['1.jpeg', '2.jpg', '3.jpg', '4.jpg', '5.jpg']

In [40]:
bg(bn[0],11)

In [41]:
for b in bn:
    for i in range(1,31):
        bg(b,i)