In [None]:
import random
import json
import cv2
import numpy as np
import os
from PIL import Image, ImageDraw, ImageFont
from IPython.display import Image as ImageDisplay, display
from faker import Faker

# Initialize the Faker object
fake = Faker('ko_KR')  # Korean locale

def label_to_json(boxes, image_filename, json_path, img_width, img_height):
    annotations = []
    for (label, xmin, ymin, xmax, ymax) in boxes:
        annotations.append({
            "label": label,
            "coordinates": {
                "xmin": xmin,
                "ymin": ymin,
                "xmax": xmax,
                "ymax": ymax
            }
        })
    data = {
        "image": image_filename,
        "annotations": annotations,
        "image_size": {
            "width": img_width,
            "height": img_height
        }
    }
    with open(json_path, 'w') as f:
        json.dump(data, f, indent=4)

def random_size(output_path, boxes):
    img = cv2.imread(output_path)
    width, height = img.shape[1], img.shape[0]
    new_img_width = random.randint(400, 1200)
    resize_factor = new_img_width / width
    new_img_height = int(height * resize_factor)
    
    img = cv2.resize(img, (new_img_width, new_img_height))
    
    zoom = random.uniform(1, 1.5)
    background = (55, 100, 10)
    
    new_h, new_w = int(new_img_height * zoom), int(new_img_width * zoom)
    
    new_img = img
    offset_h, offset_w = 0, 0
    
    corrected_box = []
    for box in boxes:
        xmin = int(box[1] * resize_factor) + offset_w
        ymin = int(box[2] * resize_factor) + offset_h
        xmax = int(box[3] * resize_factor) + offset_w
        ymax = int(box[4] * resize_factor) + offset_h
        corrected_box.append(tuple([box[0], xmin, ymin, xmax, ymax]))
    
    return new_img, corrected_box

def perspective_warp(image, boxes, max_distortion=0.1):
    height, width = image.shape[:2]
    
    src_points = np.float32([[0, 0], [width, 0], [width, height], [0, height]])

    p1 = [random.randint(0, int(width * max_distortion)), random.randint(0, int(height * max_distortion))]
    p2 = [random.randint(int(width * (1 - max_distortion)), width), random.randint(0, int(height * max_distortion))]
    p3 = [random.randint(int(width * (1 - max_distortion)), width), random.randint(int(height * (1 - max_distortion)), height)]
    p4 = [random.randint(0, int(width * max_distortion)), random.randint(int(height * (1 - max_distortion)), height)]
    
    dst_points = np.float32([p1, p2, p3, p4])
    
    perspective_matrix = cv2.getPerspectiveTransform(src_points, dst_points)
    warped_image = cv2.warpPerspective(image, perspective_matrix, (width, height))

    corrected_box = []
    for bbox in boxes:
        bbox_points = np.float32([[[bbox[1], bbox[2]]], [[bbox[3], bbox[2]]], [[bbox[3], bbox[4]]], [[bbox[1], bbox[4]]]])
        warped_points = cv2.perspectiveTransform(bbox_points, perspective_matrix)

        warped_bbox = np.squeeze(warped_points).tolist()
        warped_bbox = [int(x) for point in warped_bbox for x in point] 
        corrected_box.append(tuple([bbox[0], warped_bbox[0], min(warped_bbox[1], warped_bbox[3]),
                                    warped_bbox[4], max(warped_bbox[5], warped_bbox[7])]))
    
    return warped_image, corrected_box

def add_random_noise(image):
    noise = np.random.randint(0, 50, (image.shape[0], image.shape[1], 3), dtype='uint8')
    noisy_image = cv2.add(image, noise)
    return noisy_image

def adjust_brightness(image):
    brightness = random.uniform(0.5, 1.5)
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    hsv = np.array(hsv, dtype=np.float64)
    hsv[:, :, 1] = hsv[:, :, 1] * brightness
    hsv[:, :, 1][hsv[:, :, 1] > 255] = 255
    hsv = np.array(hsv, dtype=np.uint8)
    img_bright = cv2.cvtColor(hsv, cv2.COLOR_HSV2BGR)
    return img_bright

def generate_random_data_korea():
    name = fake.name()
    id_number = f"{fake.random_number(digits=6, fix_len=True)}-{fake.random_number(digits=7, fix_len=True)}"
    dob = fake.date_of_birth(minimum_age=18, maximum_age=90).strftime("%Y.%m.%d")
    address = fake.address().replace('\n', ' ')  # Replace newlines with spaces
    
    return name, id_number, dob, address

# Function to ensure address fits within the specified width
def fit_address(font, max_width):
    address = generate_random_data_korea()[3]
    while font.getlength(address) > max_width:
        address = generate_random_data_korea()[3].replace('\n', ' ')  # Regenerate address until it fits
    return address

# Function to create a dummy Korean ID
def create_dummy_korean_id(template_path, output_path):
    # Ensure the output directory exists
    os.makedirs(os.path.dirname(output_path), exist_ok=True)

    # Load template image
    template = Image.open(template_path)
    draw = ImageDraw.Draw(template)

    # Define font paths and sizes
    font_path = "GowunBatang-Bold.ttf"  # Path to GowunBatang Bold font file (or any suitable Korean font)
    font1 = ImageFont.truetype(font_path, size=24)
    font2 = ImageFont.truetype(font_path, size=22)
    font3 = ImageFont.truetype(font_path, size=19)
    font4 = ImageFont.truetype(font_path, size=17)

    name, id_number, dob, _ = generate_random_data_korea()
    fitted_address = fit_address(font3, 222)

    # Define text positions
    text_positions = {
        "Name": ((75, 89), name, font1),
        "IDNumber": ((62, 123), id_number, font2),
        "Address": ((54, 158), fitted_address, font3),
        "DOB": ((193, 225), dob, font4)
    }
    
    boxes = []
    for key, (position, text, font) in text_positions.items():
        draw.text((position[0], position[1] - 5), text, fill="black", font=font)
        bbox = draw.textbbox((position[0], position[1] - 5), text, font=font)
        boxes.append(tuple([key]) + bbox)
        
    # Convert template to RGB mode before saving
    template = template.convert("RGB")

    # Save the image
    image_path = output_path + ".jpg"
    json_path = output_path + ".json"
    
    template.save(image_path)

    # Resize and possibly warp the image (assuming these functions are defined elsewhere)
    image, corrected_box = random_size(image_path, boxes)
    if random.randint(0, 1) == 1:
        image, corrected_box = perspective_warp(image, corrected_box, max_distortion=0.1)
        
     # Apply additional augmentations
    if random.randint(0, 1) == 1:
        image = add_random_noise(image)
    if random.randint(0, 1) == 1:
        image = adjust_brightness(image)
    
    # Save final image and JSON (assuming this function is defined elsewhere)
    label_to_json(corrected_box, os.path.basename(image_path), json_path, image.shape[1], image.shape[0])
    cv2.imwrite(image_path, image)
    return image_path

# Function to generate multiple images
def generate_images(num):
    base_output_path = "synthetic_data/train/korea3/"
    os.makedirs(base_output_path, exist_ok=True)
    
    for i in range(num):
        output_path = os.path.join(base_output_path, f"korean_id1_{i}")
        template_path = "south-korea-1_masked.jpg"
        file = create_dummy_korean_id(template_path, output_path)
        display(ImageDisplay(filename=file))

generate_images(500)


In [None]:
import shutil

def zip_directory(directory, zip_filename):
    # Compress the directory into a zip file
    shutil.make_archive(zip_filename, 'zip', directory)

# Specify the directory to compress and the desired zip file name
directory_to_compress = "synthetic_data/train/korea3/"
zip_file_name = "korea1"

# Call the function to compress the directory
zip_directory(directory_to_compress, zip_file_name)