In [70]:
from PIL import Image
import cv2
import numpy as np
import os

# 이미지 패딩 및 리사이즈 함수
def pad_and_resize(image_path, # .jpg or .jpeg
                   output_path, 
                   target_size=(640, 640)):
    
    if not os.path.exists(image_path):
        print(f"Error: The file {image_path} does not exist.")
        return
    
    # 한글 경로는 인식못하므로 cv2.imread() 가 안된다
    img_array = np.fromfile(image_path, np.uint8)
    # BGR 
    image_BGR = cv2.imdecode(img_array, cv2.IMREAD_COLOR)

    # image = cv2.imread(img_path)

    if image_BGR is None:
            print(f"Error: Failed to load image from {image_path}")
            return
    # BGR ->  RGB
    image = cv2.cvtColor(image_BGR, cv2.COLOR_BGR2RGB)
    
    height, width = image.shape[0:2]
    # height, width = image.shape[0:2]

    padding = [np.abs(height - width) // 2, np.abs(height - width) // 2]

    # 부족한 길이가 절반으로 안 떨어질 경우 +1
    if np.abs(height-width) % 2 != 0:
        padding[0] += 1

    # 가로, 세로 가운데 부족한 쪽에 margin 추가
    if height < width:
        margin_list = [padding, [0, 0]]
    else:
        margin_list = [[0, 0], padding]

    # color 이미지일 경우 color 채널 margin 추가
    if len(image_BGR.shape) == 3:
        margin_list.append([0,0])

    # 이미지에 margin 추가
    padded_image = np.pad(image, margin_list, mode='constant')

    resized_image = cv2.resize(padded_image, target_size)

    ## 필요시 이미지 exif 메타데이터의 orientation 값을 보정하기 위한 rotation
    ## open cv 라이브러리를 쓰는 지금의 경우라면 필요없다.
    # image = Image.open(image_path)
    # exif_data = image._getexif()
    # orientation = exif_data.get(274) # orientation tag 는 274 번

    # # print("orientation : ", orientation) # test

    # if orientation == 6:
    #      output = cv2.rotate(resized_image, cv2.ROTATE_90_CLOCKWISE)
    # elif orientation == 8:
    #      output= cv2.rotate(resized_image, cv2.ROTATE_90_COUNTERCLOCKWISE)
    # elif orientation == 3:
    #      output= cv2.rotate(resized_image, cv2.ROTATE_180)
    # else :
    #      output = resized_image

    # view
    # cv2.imshow('Resized Image', resized_image)
    # cv2.waitKey(0)
    # cv2.destroyAllWindows()

    # save
    # 한글 경로 문제 핸들링
    output_RGB = cv2.cvtColor(resized_image, cv2.COLOR_RGB2BGR)
    result, encoded_img = cv2.imencode('.jpg', output_RGB)  # 확장자 .jpg
    if result:
        encoded_img.tofile(output_path)  # np.tofile로 한글 경로에 저장
    else:
        print("이미지 인코딩 실패")

    #save
    # output_RGB = cv2.cvtColor(resized_image, cv2.COLOR_RGB2BGR)
    # success = cv2.imwrite(output_path, output_RGB)
    # if success:
    #     pass
    # else:
    #     print(output_path)
    #     print("이미지 저장 실패")

### test code ###
images = ["2_09_1_1_1_1_20210716_0000037039.jpg", #samsung
          "2_09_1_1_1_1_20210719_0000006002.jpg", # apple
          ]

for image in images:
    image_path = f'./원천데이터/09.점자블럭/1.불량/{image}'
    output_path = f'./{image}'

    pad_and_resize(image_path, output_path)

In [73]:
import os

root_path = "./원천데이터"

# folders = ["09.점자블럭", "12.보도블록", "13.자전거도로"]
# sub_folders= ["0.양호", "1.불량"]

folders = ["12.보도블록", "13.자전거도로"]
sub_folders= ["0.양호", "1.불량"]

# folders = ["09.점자블럭"] # test
# sub_folders= ["1.불량"] # test

for folder in folders:
    for sub_folder in sub_folders:
        image_path = f"{root_path}/{folder}/{sub_folder}"
        output_path = f"{root_path}/{folder}/{sub_folder}640"

        print(image_path)

        # image_names = [f for f in os.listdir(image_path) if f.endswith('.jpg')]
        image_names = [f for f in os.listdir(image_path) if f.endswith('.jpg') or f.endswith('.jpeg')] # .jpeg 도 극히 일부 존재
        
        total = len(image_names)

        for i, image_name in enumerate(image_names):
            if (i % 200 == 0): print(f"{i+1} / {total}")
            pad_and_resize(f"{image_path}/{image_name}", f"{output_path}/{image_name}")

./원천데이터/12.보도블록/0.양호
1 / 1416
201 / 1416
401 / 1416
601 / 1416
801 / 1416
1001 / 1416
1201 / 1416
1401 / 1416
./원천데이터/12.보도블록/1.불량
1 / 3624
201 / 3624
401 / 3624
601 / 3624
801 / 3624
1001 / 3624
1201 / 3624
1401 / 3624
1601 / 3624
1801 / 3624
2001 / 3624
2201 / 3624
2401 / 3624
2601 / 3624
2801 / 3624
3001 / 3624
3201 / 3624
3401 / 3624
3601 / 3624
./원천데이터/13.자전거도로/0.양호
1 / 1380
201 / 1380
401 / 1380
601 / 1380
801 / 1380
1001 / 1380
1201 / 1380
./원천데이터/13.자전거도로/1.불량
1 / 3540
201 / 3540
401 / 3540
601 / 3540
801 / 3540
1001 / 3540
1201 / 3540
1401 / 3540
1601 / 3540
1801 / 3540
2001 / 3540
2201 / 3540
2401 / 3540
2601 / 3540
2801 / 3540
3001 / 3540
3201 / 3540
3401 / 3540


### 누락된 파일이 없는지 갯수 확인  

In [74]:
folders = ["09.점자블럭", "12.보도블록", "13.자전거도로"]
sub_folders= ["0.양호", "1.불량"]

root_path = "./원천데이터"
# 특정 경로 설정
for folder in folders:
    for sub_folder in sub_folders:
        path = f"{root_path}/{folder}/{sub_folder}"
        output_path = f"{root_path}/{folder}/{sub_folder}640"

        print("\n",path)
        # json_files = [f for f in os.listdir(path) if f.endswith('.jpg')]
        json_files = [f for f in os.listdir(path) if f.endswith('.jpg') or f.endswith('.jpeg')]
        json_total = len(json_files)
        print("original json file counts : ", json_total)

        print("\n", output_path)
        txt_files = [f for f in os.listdir(output_path) if f.endswith('.jpg') or f.endswith('.jpeg')]
        txt_total = len(txt_files)
        print("txt file counts : ", txt_total)


 ./원천데이터/09.점자블럭/0.양호
original json file counts :  504

 ./원천데이터/09.점자블럭/0.양호640
txt file counts :  504

 ./원천데이터/09.점자블럭/1.불량
original json file counts :  1578

 ./원천데이터/09.점자블럭/1.불량640
txt file counts :  1578

 ./원천데이터/12.보도블록/0.양호
original json file counts :  1416

 ./원천데이터/12.보도블록/0.양호640
txt file counts :  1416

 ./원천데이터/12.보도블록/1.불량
original json file counts :  3624

 ./원천데이터/12.보도블록/1.불량640
txt file counts :  3624

 ./원천데이터/13.자전거도로/0.양호
original json file counts :  1380

 ./원천데이터/13.자전거도로/0.양호640
txt file counts :  1380

 ./원천데이터/13.자전거도로/1.불량
original json file counts :  3540

 ./원천데이터/13.자전거도로/1.불량640
txt file counts :  3540


### 이미지 사이즈 탐색  
각자 다름  

In [40]:
import os
import cv2
import numpy as np

root_path = "./원천데이터"

# folders = ["09.점자블럭", "12.보도블록", "13.자전거도로"]
# sub_folders= ["0.양호", "1.불량"]

folders = ["09.점자블럭"]
sub_folders= ["1.불량"]

for folder in folders:
    for sub_folder in sub_folders:
        image_path = f"{root_path}/{folder}/{sub_folder}"
        output_path = f"{root_path}/{folder}/{sub_folder}640"

        print(image_path)

        # image_names = [f for f in os.listdir(image_path) if f.endswith('.jpg')] 
        image_names = [f for f in os.listdir(image_path) if f.endswith('.jpg') or f.endswith('.jpeg')] # .jpeg 도 극히 일부 존재

        for image_name in image_names:
            # img = Image.open(f"{image_path}/{image_name}")
            # width, height = img.size

            # 한글 경로는 인식못하므로 cv2.imread() 가 안된다
            img_array = np.fromfile(f"{image_path}/{image_name}", np.uint8)
            # BGR 
            image_BGR = cv2.imdecode(img_array, cv2.IMREAD_COLOR)

            # image = cv2.imread(img_path)

            if image_BGR is None:
                    print(f"Error: Failed to load image from {image_path}")
        
            # BGR ->  RGB
            image = cv2.cvtColor(image_BGR, cv2.COLOR_BGR2RGB)
            height, width = image.shape[0:2]
            
            # edge case 검사 : 없음
            if (width > height): 
                # print("width > height")
                print(f"({width}, {height})")
            # print(f"({width}, {height})")
            # print(image_name)

./원천데이터/09.점자블럭/1.불량
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4000, 2250)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(3264, 1836)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2272)
(4032, 2272)
(4032, 2272)
(4032, 2272)
(4032, 2272)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(2560, 1440)
(2848, 1314)
(2848, 1314)
(2848, 1314)
(2848, 1314)
(4000, 2250)
(3264, 1836)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(2848, 1314)
(4032, 2272)
(4032, 2272)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2272)
(4000, 2250)
(4032, 2272)
(4032, 2272)
(4032, 2272)
(4032, 2272)
(4032, 2272)
(4032, 2272)
(4032, 2272)
(4032, 2272)
(4032, 2272)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(4032, 2268)
(403

KeyboardInterrupt: 

애플 디바이스 사진이랑
삼성 디바이스 사진이랑  
EXIF 태그 중 하나인 orientation 값 차이로 존재  

orientation
ref. https://feel5ny.github.io/2018/08/06/JS_13/

사진을 resize 하면서 기존 이미지 파일에 있던 이미지 메타데이터가 사라진다.  
원래 사진이 orientation : 6 일 경우,  
이미지 뷰어나 cv2 라이브러리에서는 이 점을 감안하여 orientation : 1 으로 보정해서 처리가 된다.  

전처리를 거친 이미지는 orientation 값이 None 이 되어서 다음 둘 중 하나로 방법을 정해야 될 듯 하다.
1. orientation 값을 이전 이미지의 값으로 추가  
2. 회전  

In [42]:
from PIL import Image
from PIL.ExifTags import TAGS
import os

image_path = './원천데이터/09.점자블럭/1.불량'
# images = ['2_09_0_1_6_1_20211026_0000763010.jpg', '2_09_0_1_6_2_20210909_0000493166.jpg']

# images = [f for f in os.listdir(image_path) if f.endswith('.jpg') or f.endswith('.jpeg')]
images = ["2_09_1_1_1_1_20210717_0000000343.jpg"]

for img in images:
    image = Image.open(f"{image_path}/{img}")

    # EXIF 데이터 가져오기
    exif_data = image._getexif()

    # EXIF 태그 확인
    if exif_data:
        # for tag, value in exif_data.items():
        #     tag_name = TAGS.get(tag, tag)
        #     if (tag_name == "Model"):
        #         print(f"{tag_name}: {value}")

        orientation = exif_data.get(274)  # Orientation 태그는 274번

        if orientation == 6:
            print(f"{image_path}/{img}")
            print(exif_data.get(272)) # Model
            print("orientation : ", orientation)

        if orientation == None: # 로테이션 필요없음, 애플 디바이스
            pass
        elif orientation == 1: # 로테이션 필요없음, 애플 디바이스
            pass  # Normal
        elif orientation == 3: # 확인 안됨
            image = image.rotate(180, expand=True)
        elif orientation == 6: # 로테이션 필요, 삼성 디바이스
            image = image.rotate(270, expand=True)
        elif orientation == 8: # 로테이션 필요, 삼성 디바이스
            image = image.rotate(90, expand=True)

        # # 이미지 저장
        # image.save(f'./{img}')

./원천데이터/09.점자블럭/1.불량/2_09_1_1_1_1_20210717_0000000343.jpg
SM-A505N
orientation :  6
