In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
from glob import glob
from PIL import Image, ImageOps, ImageFile
from tqdm import tqdm

ImageFile.LOAD_TRUNCATED_IMAGES = True

In [2]:
def create_folder(folder_path):
    if not os.path.exists(folder_path):
        os.mkdir(folder_path)


def get_json(json_path):
    f1 = open(json_path, 'r', encoding='UTF8')
    data = json.loads(f1.read())

    return data


def get_image(source_path):
    img = Image.open(source_path)
    img = ImageOps.exif_transpose(img)

    return img


def get_meta_list(data, kind, apple_class):
    temp = []
    temp.append(data["images"]["img_file_name"])  # 파일이름
    temp.append(f'{kind}/{apple_class}')  # 파일경로
    temp.append(data["annotations"]["sugar_grade"])  # 사과 당도 품질 클래스
    temp.append(data["collection"]["apple_kind"])  # 품종
    temp.append(data["collection"]["sugar_content"])  # 착즙당도
    temp.append(data["collection"]["sugar_content_nir"])  # 비파괴당도
    temp.append(data["collection"]["soil_ec"])  # 토양 전기전도도
    temp.append(data["collection"]["soil_temper"])  # 토양 온도
    temp.append(data["collection"]["soil_humidty"])  # 토양 습도
    temp.append(data["collection"]["soil_potential"])  # 토양 수분장력
    temp.append(data["collection"]["temperature"])  # 온도
    temp.append(data["collection"]["humidity"])  # 습도
    temp.append(data["collection"]["sunshine"])  # 일사량
    
    return temp

In [5]:
kind_list = ['시나노골드', '아리수', '홍로', '후지']
apple_class_list = ['당도A등급', '당도B등급', '당도C등급']
base_path = 'C:/Users/HP/Downloads/146.전북 장수 사과 당도 품질 데이터/01.데이터/2.Validation'
label_base_path = f'{base_path}/라벨링데이터'
source_base_path = f'{base_path}/원천데이터'

if not os.path.exists('apple_bbox'):
    os.mkdir('apple_bbox')

test = False
cnt = 0
columns = [
    "파일이름", "파일경로", "사과_당도_품질_클래스", "품종", "착즙당도", "비파괴당도", "토양_전기전도도", "토양_온도",
    "토양_습도", "토양_수분장력", "온도", "습도", "일사량"
]
result = [columns]
for kind in tqdm(kind_list):
    kind_path = f'apple_bbox/{kind}'
    create_folder(kind_path)

    for apple_class in apple_class_list:
        label_list = glob(f'{label_base_path}/{kind}/{apple_class}/*.json')

        class_path = f'{kind_path}/{apple_class}'
        create_folder(class_path)

        for json_path in label_list:
            try:
                data = get_json(json_path)
                filename = data['images']['img_file_name']
                bbox = data['annotations']['bbox']
                area = data['annotations']['area']
                top, left, bottom, right = bbox
                img_path = f'{source_base_path}/{kind}/{apple_class}/{filename}'

                img = get_image(img_path)
                img2 = img.crop(
                    (left - 500, top - 300, right + 700, bottom + 850))
                img2.save(f'{class_path}/{filename}')

                result.append(get_meta_list(data, kind, apple_class))
            except:
                continue

            cnt += 1
            if test: break
        if test: break
    if test: break


df = pd.DataFrame(result)
df.to_csv('apple.csv', index=False)

print('-' * 100)
print(f'total image count: {cnt}')
print('apple.csv successfully saved!!')

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [35:15<00:00, 528.96s/it]

----------------------------------------------------------------------------------------------------
total image count: 48020
apple.csv successfully saved!!



