# 라이브러리

In [1]:
import pandas as pd
from PIL import Image, ImageDraw
from tqdm import tqdm
from multiprocessing import Pool

import os
import random
import warnings
warnings.filterwarnings(action='ignore')

## 데이터가져오기, test는 절대 만지지 말것

In [3]:
train_df = pd.read_csv('./data/train.csv')

In [4]:
train_df.head()

Unnamed: 0,ID,img_path,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,TRAIN_00000,./train/TRAIN_00000.jpg,8,1,16,12,5,10,14,2,13,4,7,3,6,9,11,15
1,TRAIN_00001,./train/TRAIN_00001.jpg,3,7,2,13,1,5,10,4,11,14,9,15,16,12,8,6
2,TRAIN_00002,./train/TRAIN_00002.jpg,9,12,11,14,6,16,10,13,15,8,3,1,4,5,2,7
3,TRAIN_00003,./train/TRAIN_00003.jpg,14,1,6,15,9,4,8,5,16,10,13,7,2,3,11,12
4,TRAIN_00004,./train/TRAIN_00004.jpg,3,15,12,4,16,13,11,10,9,6,14,5,1,8,2,7


In [5]:
test_df.head()

NameError: name 'test_df' is not defined

In [14]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 70000 entries, 0 to 69999
Data columns (total 18 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   ID        70000 non-null  object
 1   img_path  70000 non-null  object
 2   1         70000 non-null  int64 
 3   2         70000 non-null  int64 
 4   3         70000 non-null  int64 
 5   4         70000 non-null  int64 
 6   5         70000 non-null  int64 
 7   6         70000 non-null  int64 
 8   7         70000 non-null  int64 
 9   8         70000 non-null  int64 
 10  9         70000 non-null  int64 
 11  10        70000 non-null  int64 
 12  11        70000 non-null  int64 
 13  12        70000 non-null  int64 
 14  13        70000 non-null  int64 
 15  14        70000 non-null  int64 
 16  15        70000 non-null  int64 
 17  16        70000 non-null  int64 
dtypes: int64(16), object(2)
memory usage: 9.6+ MB


# 원본 추출

In [None]:
from PIL import Image, ImageDraw
import pandas as pd
from tqdm import tqdm
from multiprocessing import Pool

def process_image(index, train_df):
    sample_df = train_df.iloc[index]

    train_path = sample_df['img_path'].split('/')[-1]
    train_img = Image.open('./train/' + train_path)

    width, height = train_img.size
    cell_width = width // 4
    cell_height = height // 4

    target_positions = list(sample_df)[2:]

    origin_img = Image.new("RGB", (width, height))

    # 각 타일을 올바른 위치로 이동
    for target_pos in range(1, 17):
        # 타일의 현재 위치 찾기
        current_pos = target_positions.index(target_pos) + 1
        current_row, current_col = divmod(current_pos - 1, 4)

        # 타일의 목표 위치
        target_row, target_col = divmod(target_pos - 1, 4)

        # 타일을 추출
        tile = train_img.crop((current_col * cell_width, current_row * cell_height, (current_col + 1) * cell_width, (current_row + 1) * cell_height))
        # 타일을 올바른 위치에 붙여넣기
        origin_img.paste(tile, (target_col * cell_width, target_row * cell_height))

    # 재구성된 이미지 저장
    origin_name = f'ORIGIN_{index:05}.jpg'
    origin_path = './origin/' + origin_name
    origin_img.save(origin_path)

    return {'ID': origin_name, 'img_path': origin_path}

def main(train_df):
    num_processes = 16
    pool = Pool(num_processes)

    # tqdm
    results = list(tqdm(pool.starmap(process_image, [(i, train_df) for i in range(len(train_df))]), total=len(train_df)))
    
    pool.close()
    pool.join()

    origin_df = pd.DataFrame(results)
    origin_df.to_csv('./origin.csv', index=False)
    print('./origin.csv 저장완료')

if __name__ == '__main__':
    train_df = pd.read_csv('./train.csv')  # 데이터 경로에 맞게 수정하세요
    main(train_df)


## 90도 회전

In [14]:
from PIL import Image
import pandas as pd
from multiprocessing import Pool
import random
from tqdm import tqdm

def rotate_and_shuffle_image(index, img_path):
    # 원본 이미지 불러오기
    img = Image.open(img_path)

    # 이미지 좌로 90도 회전
    img = img.rotate(90, expand=True)

    width, height = img.size
    cell_width = width // 4
    cell_height = height // 4

    # 타일 추출 및 재배치
    tiles = []
    for i in range(16):
        row, col = divmod(i, 4)
        tile = img.crop((col * cell_width, row * cell_height, (col + 1) * cell_width, (row + 1) * cell_height))
        tiles.append(tile)

    # 타일 임의로 재배치
    shuffled_indices = random.sample(range(16), 16)

    # 재배치된 이미지 생성
    shuffled_img = Image.new('RGB', (width, height))
    for i, idx in enumerate(shuffled_indices):
        row, col = divmod(i, 4)
        shuffled_img.paste(tiles[idx], (col * cell_width, row * cell_height))

    # 재배치된 이미지 저장
    aug_name = f'augment_2_left_90_{index:05}.jpg'
    aug_path = './augment_2_left_90/' + aug_name
    shuffled_img.save(aug_path)

    # 데이터프레임을 위한 정보 생성
    data = {'ID': aug_name, 'img_path': aug_path}
    for i, idx in enumerate(shuffled_indices, 1):
        data[str(i)] = idx + 1  # 인덱스를 1부터 시작하도록 조정

    return data

def main():
    origin_df = pd.read_csv('./origin.csv')  # 데이터 경로에 맞게 수정하세요
    num_processes = 16  # 코어 수

    with Pool(num_processes) as pool:
        args = [(index, row['img_path']) for index, row in origin_df.iterrows()]
        results = list(tqdm(pool.starmap(rotate_and_shuffle_image, args), total=len(origin_df)))

    aug_df = pd.DataFrame(results)
    aug_df.to_csv('./augment_2_left_90.csv', index=False)
    print('./augment_2_left_90.csv 저장완료')

if __name__ == '__main__':
    main()


100%|███████████████████████████████████████████████████████████████████████| 70000/70000 [00:00<00:00, 16123079.63it/s]


./augment_2_left_90.csv 저장완료


TypeError: 'NoneType' object is not callable

# 180도 회전

In [None]:
def rotate_and_shuffle_image_180(index, img_path):
    # 원본 이미지 불러오기
    img = Image.open(img_path)

    # 이미지 180도 회전
    img = img.rotate(180, expand=True)  # 180도 회전


    width, height = img.size
    cell_width = width // 4
    cell_height = height // 4

    # 타일 추출 및 재배치
    tiles = []
    for i in range(16):
        row, col = divmod(i, 4)
        tile = img.crop((col * cell_width, row * cell_height, (col + 1) * cell_width, (row + 1) * cell_height))
        tiles.append(tile)

    # 타일 임의로 재배치
    shuffled_indices = random.sample(range(16), 16)

    # 재배치된 이미지 생성
    shuffled_img = Image.new('RGB', (width, height))
    for i, idx in enumerate(shuffled_indices):
        row, col = divmod(i, 4)
        shuffled_img.paste(tiles[idx], (col * cell_width, row * cell_height))
    
    aug_name = f'augment_2_180_{index:05}.jpg'
    aug_path = './augment_2_180/' + aug_name
    shuffled_img.save(aug_path)

    # 데이터프레임을 위한 정보 생성
    data = {'ID': aug_name, 'img_path': aug_path}
    for i, idx in enumerate(shuffled_indices, 1):
        data[str(i)] = idx + 1  # 인덱스를 1부터 시작하도록 조정

    return data

def main_180():
    origin_df = pd.read_csv('./origin.csv')
    num_processes = 16  # 코어 수

    with Pool(num_processes) as pool:
        args = [(index, row['img_path']) for index, row in origin_df.iterrows()]
        results = list(tqdm(pool.starmap(rotate_and_shuffle_image_180, args), total=len(origin_df)))

    aug_df = pd.DataFrame(results)
    aug_df.to_csv('./augment_2_180.csv', index=False)
    print('./augment_2_180.csv 저장완료')

if __name__ == '__main__':
    main_180()

# 270도회전

In [21]:
def rotate_and_shuffle_image_right_90(index, img_path):
    # 원본 이미지 불러오기
    img = Image.open(img_path)

    # 이미지 오른쪽으로 90도 회전
    img = img.rotate(-90, expand=True)  # 오른쪽으로 90도 회전

    width, height = img.size
    cell_width = width // 4
    cell_height = height // 4

    # 타일 추출 및 재배치
    tiles = []
    for i in range(16):
        row, col = divmod(i, 4)
        tile = img.crop((col * cell_width, row * cell_height, (col + 1) * cell_width, (row + 1) * cell_height))
        tiles.append(tile)

    # 타일 임의로 재배치
    shuffled_indices = random.sample(range(16), 16)

    # 재배치된 이미지 생성
    shuffled_img = Image.new('RGB', (width, height))
    for i, idx in enumerate(shuffled_indices):
        row, col = divmod(i, 4)
        shuffled_img.paste(tiles[idx], (col * cell_width, row * cell_height))
    
    aug_name = f'augment_2_right_90_{index:05}.jpg'
    aug_path = './augment_2_right_90/' + aug_name
    shuffled_img.save(aug_path)

    # 데이터프레임을 위한 정보 생성
    data = {'ID': aug_name, 'img_path': aug_path}
    for i, idx in enumerate(shuffled_indices, 1):
        data[str(i)] = idx + 1  # 인덱스를 1부터 시작하도록 조정

    return data

def main_right_90():
    origin_df = pd.read_csv('./origin.csv')
    num_processes = 16  # 코어 수

    with Pool(num_processes) as pool:
        args = [(index, row['img_path']) for index, row in origin_df.iterrows()]
        results = list(tqdm(pool.starmap(rotate_and_shuffle_image_right_90, args), total=len(origin_df)))

    aug_df = pd.DataFrame(results)
    aug_df.to_csv('./augment_2_right_90.csv', index=False)
    print('./augment_2_right_90.csv 저장완료')

if __name__ == '__main__':
    main_right_90()

100%|███████████████████████████████████████████████████████████████████████| 70000/70000 [00:00<00:00, 14951432.50it/s]


./augment_2_right_90.csv 저장완료


# Augmentation이 잘 되었는지 확인하는용(path체크꼭)

In [33]:
def check_img_save_origin(train_df, show_num, save_origin=False):

    # 재정렬한 이미지 데이터 프레임 생성
    dict_origin = {'ID':[],
                   'img_path':[]}

    for i in range(1,16+1):
        dict_origin[str(i)] = [i]*len(train_df)

    # 출력할 이미지 개수 새기
    count = 0

    if save_origin == False:
       repeat = [i for i in range(show_num)]
    else:
       repeat = [i for i in range(len(train_df))]

    for index in tqdm(repeat):

      sample_df = train_df.iloc[index]

      # train 이미지 불러오기
      train_path = sample_df['img_path'].split('/')[-1]
      train_img = Image.open('./augment_2_right_90/'+train_path)
      raw_img = Image.open('./augment_2_right_90/'+train_path)

      # train 이미지에 숫자 표기
      draw = ImageDraw.Draw(train_img)

      width, height = train_img.size

      cell_width = width // 4
      cell_height = height // 4

      font_size = 50

      numbers = list(sample_df)[2:]

      for i, number in enumerate(numbers):
          row = i // 4
          col = i % 4
          x = col * cell_width + (cell_width - font_size) // 2
          y = row * cell_height + (cell_height - font_size) // 2
          draw.text((x, y), str(number), fill="red")

      # 정렬된 이미지 생성 및 저장
      i = 0
      dict_tile = {}

      for row in range(4):
          for col in range(4):
              left = col * cell_width
              upper = row * cell_height
              right = left + cell_width
              lower = upper + cell_height

              # 부분 이미지 추출
              tile = raw_img.crop((left, upper, right, lower))
              dict_tile[numbers[i]] = tile

              i += 1

      # 4x4 이미지 행렬 생성
      origin_img = Image.new("RGB", (width, height))

      # 각 부분 이미지 크기 계산
      tile_width = origin_img.width // 4
      tile_height = origin_img.height // 4

      # 16개 부분 이미지를 4x4 행렬로 배열
      i = 1
      for row in range(4):
          for col in range(4):
              tile = dict_tile[i]

              i += 1

              # 부분 이미지를 4x4 행렬 위치에 합성
              left = col * tile_width
              upper = row * tile_height
              right = left + tile_width
              lower = upper + tile_height
              origin_img.paste(tile, (left, upper, right, lower))

      # 재정렬된 이미지 저장
      if save_origin == False:
         pass
      else:
         origin_name = f'ORIGIN_{count:05}.jpg'
         origin_path = cloud_path+'/DATA/origin/'+origin_name
         origin_img.save(origin_path)

         dict_origin['ID'].append(origin_name)
         dict_origin['img_path'].append(origin_path)

      # train 및 재정렬된 이미지 출력
      fig = plt.figure()

      ax1 = fig.add_subplot(1, 2, 1)
      ax1.imshow(train_img)
      ax1.set_title('Train Image')
      ax1.axis('off')

      ax2 = fig.add_subplot(1, 2, 2)
      ax2.imshow(origin_img)
      ax2.set_title('Original Image')
      ax2.axis('off')

      if count > show_num:
         pass
      else:
         print(train_path)
         plt.show()
         print()

      count += 1

    # 재정렬한 이미지 데이터 프레임 저장
    if save_origin == False:
       pass

    else:
       origin_df = pd.DataFrame(dict_origin)
       origin_df.to_csv(data_path+'/origin.csv', index=False)