In [None]:
# генерация с параметрами

import os, random
from PIL import Image, ImageDraw
import json

def generate_circle_dataset(out_dir, n_samples=5000, img_size=224, min_r=10, max_r=80):
    os.makedirs(out_dir, exist_ok=True)
    meta = []
    for i in range(n_samples):
        width = height = img_size
        # параметры
        r = random.randint(min_r, max_r)
        x = random.randint(r, img_size - r)
        y = random.randint(r, img_size - r)


        svg_content = f'''<?xml version="1.0" encoding="utf-8"?>
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
    width="{width}px" height="{height}px" viewBox="0 0 {width} {height}" style="enable-background:new 0 0 {width} {height};" xml:space="preserve">
    <rect width="{width}" height="{height}" fill="white"/>
    <g>
        <circle cx="{x}" cy="{y}" r="{r}" fill="black"/>
    </g>
</svg>'''
        # рисуем
        im = Image.new("RGB", (img_size, img_size), "white")
        draw = ImageDraw.Draw(im)
        draw.ellipse((x-r, y-r, x+r, y+r), fill="black")
        pngname = f"{i:05d}.png"
        im.save(os.path.join(out_dir, "png/", f"{i:05d}.png"))
        with open(os.path.join(out_dir, "svg/" f"{i:05d}.svg"), 'w') as file:
            file.write(svg_content) 
        meta.append((pngname, x, y, r))
    # сохраняем метаданные
    with open(os.path.join(out_dir, "labels.json"), "w") as f:
        json.dump(meta, f)


In [2]:
# генерация со строкой

import os, random
from PIL import Image, ImageDraw
import json

def generate_circle_dataset(out_dir, n_samples=5000, img_size=224, min_r=10, max_r=80):
    os.makedirs(out_dir, exist_ok=True)
    meta = []
    for i in range(n_samples):
        width = height = img_size
        # параметры
        r = random.randint(min_r, max_r)
        x = random.randint(r, img_size - r)
        y = random.randint(r, img_size - r)

        circle_content = f'''<circle cx="{x}" cy="{y}" r="{r}" fill="black"/>'''

        svg_content = f'''<?xml version="1.0" encoding="utf-8"?>
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
    width="{width}px" height="{height}px" viewBox="0 0 {width} {height}" style="enable-background:new 0 0 {width} {height};" xml:space="preserve">
    <rect width="{width}" height="{height}" fill="white"/>
    <g>
        {circle_content}
    </g>
</svg>'''
        # рисуем
        im = Image.new("RGB", (img_size, img_size), "white")
        draw = ImageDraw.Draw(im)
        draw.ellipse((x-r, y-r, x+r, y+r), fill="black")
        pngname = f"{i:05d}.png"
        im.save(os.path.join(out_dir, "png/", f"{i:05d}.png"))
        with open(os.path.join(out_dir, "svg/" f"{i:05d}.svg"), 'w') as file:
            file.write(svg_content) 
        meta.append((pngname, circle_content))
    # сохраняем метаданные
    with open(os.path.join(out_dir, "png/labels.json"), "w") as f:
        json.dump(meta, f)


In [7]:
generate_circle_dataset("../datasets/circles_str", n_samples=5000)

In [16]:
# генерация со строкой на несколько окружностей

import os, random
from PIL import Image, ImageDraw
import json

def generate_random_hex():
    return ''.join(random.choice('0123456789ABCDEF') for _ in range(6))

def generate_circle_dataset(out_dir, n_samples=5000, img_size=224, min_r=10, max_r=80, max_n = 1, min_n = 1):
    os.makedirs(out_dir, exist_ok=True)
    meta = []
    for i in range(n_samples):
        im = Image.new("RGB", (img_size, img_size), "white")
        draw = ImageDraw.Draw(im)
        width = height = img_size
        svg_content = f'''<?xml version="1.0" encoding="utf-8"?>
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
    width="{width}px" height="{height}px" viewBox="0 0 {width} {height}" style="enable-background:new 0 0 {width} {height};" xml:space="preserve">
    <rect width="{width}" height="{height}" fill="white"/>
    <g>'''
        circles_num = random.randint(min_n, max_n)
        for j in range(circles_num):

            r = random.randint(min_r, max_r)
            x = random.randint(r, img_size - r)
            y = random.randint(r, img_size - r)
            color = generate_random_hex()
            
            draw.ellipse((x-r, y-r, x+r, y+r), fill=f"#{color}")

            circle_content = f'''<circle cx="{x}" cy="{y}" r="{r}" fill="#{color}"/>'''

        
            add_content = f'''
        {circle_content}'''
            svg_content += add_content
        end_content = '''
    </g>
</svg>'''
        svg_content += end_content

        pngname = f"{i:05d}.png"
        im.save(os.path.join(out_dir, "png/", f"{i:05d}.png"))
        with open(os.path.join(out_dir, "svg/" f"{i:05d}.svg"), 'w') as file:
            file.write(svg_content) 
        meta.append((pngname, circle_content))
    # сохраняем метаданные
    with open(os.path.join(out_dir, "png/labels.json"), "w") as f:
        json.dump(meta, f)


In [17]:
generate_circle_dataset("../datasets/n_circles", n_samples=5000, max_n = 5)

In [None]:
# генерация с квадратами

import os, random
from PIL import Image, ImageDraw
import json


def generate_random_hex():
    return ''.join(random.choice('0123456789ABCDEF') for _ in range(6))


def generate_rect_dataset(out_dir, n_samples=5000, img_size=224, min_r=10, max_r=80, max_n = 1, min_n = 1):

    os.makedirs(out_dir, exist_ok=True)
    meta = []
    for i in range(n_samples):
        im = Image.new("RGB", (img_size, img_size), "white")
        draw = ImageDraw.Draw(im)
        width = height = img_size
        svg_content = f'''<?xml version="1.0" encoding="utf-8"?>
<svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
    width="{width}px" height="{height}px" viewBox="0 0 {width} {height}" style="enable-background:new 0 0 {width} {height};" xml:space="preserve">
    <rect width="{width}" height="{height}" fill="white"/>
    <g>'''
        for l in range(max_n):
            figure_type = random.randint(0, 1)
            if figure_type == 0:
                r = random.randint(min_r, max_r)
                x = random.randint(r, img_size - r)
                y = random.randint(r, img_size - r)
                color = generate_random_hex()
                
                draw.ellipse((x-r, y-r, x+r, y+r), fill=f"#{color}")

                circle_content = f'''<circle cx="{x}" cy="{y}" r="{r}" fill="#{color}"/>'''

                add_content = f'''
        {circle_content}'''
                svg_content += add_content

            elif figure_type == 1:
                r = random.randint(min_r, max_r)
                x = random.randint(r, img_size - r)
                y = random.randint(r, img_size - r)

                color = generate_random_hex()

                draw.rectangle((x-r, y-r, x+r, y+r), fill=f"#{color}")

                rect_content = f'''<rect x="{x}" y="{y}" width="{r}" height="{r}" fill="#{color}/>'''
                add_content = f'''
        {rect_content}'''

        end_content = '''
    </g>
</svg>'''
        svg_content += end_content

        pngname = f"{i:05d}.png"
        im.save(os.path.join(out_dir, "png/", f"{i:05d}.png"))
        with open(os.path.join(out_dir, "svg/" f"{i:05d}.svg"), 'w') as file:
            file.write(svg_content) 
        meta.append((pngname, circle_content))
    # сохраняем метаданные
    with open(os.path.join(out_dir, "png/labels.json"), "w") as f:
        json.dump(meta, f)


In [20]:
generate_rect_dataset("../datasets/circles_and_rect", n_samples=5000, max_n = 5)

In [None]:
import os
from PIL import Image
import numpy as np
import cairosvg

def compare_images(img1_path, img2_path):
    # Open images
    img1 = Image.open(img1_path)
    img2 = Image.open(img2_path)
    
    # Resize images to the same size
    img1 = img1.resize((224, 224))
    img2 = img2.resize((224, 224))
    
    arr1 = np.array(img1)
    arr2 = np.array(img2)

    difference = np.abs(arr1.astype(np.float32) - arr2.astype(np.float32))

    mse = np.mean(difference ** 2)
    max_pixel_value = 255.0
    percent_difference = (mse / (max_pixel_value ** 2)) * 100
    return percent_difference <= 5.0

folder1 = '../datasets/circles_str/png/'
folder2 = '../datasets/circles_str/pngg/'
equal = not_equal = 0
for i in range(5000):
    img1_path = os.path.join(folder1, f'{i:05d}.png')
    img2_path = os.path.join(folder2, f'{i:05d}.png')
    if os.path.exists(img1_path) and os.path.exists(img2_path):
        if compare_images(img1_path, img2_path):
            equal += 1
        else:
            not_equal += 1
    else:
        print(f'Одно из изображений {i:05d} отсутствует.')

print(f"совпадает: {equal} изображений")
print(f"не совпадает: {not_equal} изображений")
print(f"совпадает: {equal/(equal+not_equal)} процентов изображений")

совпадает: 5000 изображений
не совпадает: 0 изображений
совпадает: 1.0 процентов изображений


In [41]:
for i in range(5000):
    svg_file = os.path.join('../datasets/circles/svg/', f'{i:05d}.svg')
    png_file = os.path.join('../datasets/circles/pngg/', f'{i:05d}.png')
    # Convert SVG to PNG
    try:
        cairosvg.svg2png(url=svg_file, write_to=png_file)
    except Exception as e:
        print(f"Error converting {svg_file}: {e}")
        continue  # Skip to the next iteration if there's an error
    # Open the PNG file with Pillow
    try:
        with Image.open(png_file) as image:
            # Create a new image with a white background
            background = Image.new('RGB', image.size, (255, 255, 255))
            # Paste the original image on top of the background
            background.paste(image.convert('RGBA'), (0, 0), image.convert('RGBA'))
            # Save the final image
            background.save(png_file)
    except Exception as e:
        print(f"Error processing {png_file}: {e}")

In [8]:
import cairosvg
def convert_svg_to_png(svg_folder, png_folder, num_images=5000):
    os.makedirs(png_folder, exist_ok=True)
    for i in range(num_images):
        svg_file = os.path.join(svg_folder, f'{i:05d}.svg')
        png_file = os.path.join(png_folder, f'{i:05d}.png')
        
        try:
            cairosvg.svg2png(url=svg_file, write_to=png_file)
            with Image.open(png_file) as image:
                background = Image.new('RGB', image.size, (255, 255, 255))
                background.paste(image, (0, 0), image)
                background.save(png_file)
        except Exception as e:
            print(f'Error processing {svg_file}: {e}')

In [10]:
svg_folder = '../datasets/circles_str/svg/'
png_folder = '../datasets/circles_str/pngg/'
convert_svg_to_png(svg_folder, png_folder)

Error processing ../datasets/circles_str/svg/00000.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00001.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00002.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00003.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00004.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00005.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00006.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00007.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00008.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00009.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00010.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00011.svg: bad transparency mask
Error processing ../datasets/circles_str/svg/00012.svg: bad tran

In [None]:
import os
from PIL import Image
import numpy as np

def compare_images(img1_path, img2_path):
    with Image.open(img1_path) as img1, Image.open(img2_path) as img2:
        # Resize images to the same size
        img1 = img1.resize((224, 224))
        img2 = img2.resize((224, 224))
        
        arr1 = np.array(img1)
        arr2 = np.array(img2)
        difference = np.abs(arr1.astype(np.float32) - arr2.astype(np.float32))
        mse = np.mean(difference ** 2)
        max_pixel_value = 255.0
        percent_difference = (mse / (max_pixel_value ** 2)) * 100
        
        return percent_difference <= 15.0
    
def compare_image_folders(folder1, folder2, num_images=5000):
    equal = not_equal = 0
    for i in range(num_images):
        img1_path = os.path.join(folder1, f'{i:05d}.png')
        img2_path = os.path.join(folder2, f'{i:05d}.png')
        
        if os.path.exists(img1_path) and os.path.exists(img2_path):
            if compare_images(img1_path, img2_path):
                equal += 1
            else:
                not_equal += 1
        else:
            print(f'One of the images {i:05d} is missing.')
    
    total = equal + not_equal
    print(f"Matches: {equal} images")
    print(f"Mismatches: {not_equal} images")
    if total > 0:
        print(f"Match percentage: {equal / total * 100:.2f}%")
    else:
        print("No images to compare.")
# Paths
svg_folder = '../datasets/circles/svg/'
png_folder = '../datasets/circles/pngg/'

# Compare images
compare_image_folders('../datasets/circles/png/', png_folder)

Matches: 2806 images
Mismatches: 2194 images
Match percentage: 56.12%
