In [1]:
import os
import cv2
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import random
random.seed(42)

from PIL import Image

In [2]:
key = 'oil'
train_images = {0:[], 1:[], 2:[], 3:[]}                                    
train_jsons = {0:[], 1:[], 2:[], 3:[]}
train_path = './dataset/train' 
with open(os.path.join(train_path, "annotations.json"), "r") as json_file:
    train_ann = json.load(json_file)
images_info = train_ann['images']

for image_info in images_info:
    image_name = image_info['file_name']                                
    file_name = image_name.replace('jpg', 'json')                       

    with open(os.path.join(train_path, file_name), "r") as json_file:   
        img_json = json.load(json_file)             
    label = {}
    label[key] = img_json[key]
    if label[key] < 0: continue
    
    image_path = os.path.join(train_path, image_name)                       # 이미지 저장 파일 경로      
    image = cv2.imread(image_path)                                          # 이미지 불러옴
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)                          # BGR -> RGB 로 변경
    
    part = img_json['part']
    train_images[part].append(image)                                        # 이미지, 라벨 저장
    train_jsons[part].append(label)

In [3]:
combined_images = {0: [], 1: [], 2: [], 3: [], 4: []}

aim = 5000

part1_num = len(train_images[1])-1
part2_num = len(train_images[2])-1
part3_num = len(train_images[3])-1

num = 0
while num < aim:
    part1_idx = random.randint(0, part1_num)
    part2_idx = random.randint(0, part2_num)
    part3_idx = random.randint(0, part3_num)
    label = round((train_jsons[1][part1_idx][key]+train_jsons[2][part2_idx][key]) / 2)
    
    if len(combined_images[label]) >= 1000: continue
    combined_image = np.concatenate((train_images[1][part1_idx], train_images[2][part2_idx], train_images[3][part3_idx]), axis = 0)
    combined_images[label].append(combined_image)
    num += 1

In [None]:
# 각 라벨에 해당하는 이미지의 개수가 올바르게 만들어 졌는지 확인.
labels_num = []

for i in range(5):
    labels_num.append(len(combined_images[i]))

plt.bar(np.arange(5), labels_num)

In [5]:
train_path = './dataset/train'
save_path = os.path.join(train_path, f'combined_random_{key}_images')
if not os.path.exists(os.path.join(save_path)):
    os.mkdir(save_path)

image_num = 0

for label, images in combined_images.items():
    json_content = {key: label}
    for image in images:
        img_name = str(image_num).zfill(4)
        img = Image.fromarray(image)
        img.save(os.path.join(save_path, img_name+'.jpg'))
        with open(os.path.join(save_path, img_name+'.json'), 'w') as json_file:
            json.dump(json_content, json_file)
        image_num += 1