# Library

In [1]:
import pandas as pd
import numpy as np

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

In [4]:
from torchvision import transforms
import torchvision.models as models

In [5]:
import random
from tqdm.auto import tqdm
import os

In [6]:
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont

In [7]:
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
import warnings
warnings.filterwarnings(action='ignore')

# GPU

In [9]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)

cuda


# Seed

In [10]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed=45
seed_everything(seed) # Seed 고정

In [11]:
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

In [None]:
data = train_df
df = pd.DataFrame(data)

#범위에 따라 데이터프레임을 나누는 함수
def split_dataframe(df):
    df1 = df.iloc[0:70000]
    
    return df1
#함수 호출
df1 = split_dataframe(df)


In [None]:
def check_img_save_origin(train_df, show_num, save_origin=False):

    # 재정렬한 이미지 데이터 프레임 생성
    dict_origin = {'ID':[],
                   'img_path':[]}

    for i in range(1,16+1):
        dict_origin[str(i)] = [i]*len(train_df)

    # 출력할 이미지 개수 새기
    count = 0

    if save_origin == False:
       repeat = [i for i in range(show_num)]
    else:
       repeat = [i for i in range(len(train_df))]

    for index in tqdm(repeat):

      sample_df = train_df.iloc[index]

      # train 이미지 불러오기
      train_path = sample_df['img_path'].split('/')[-1]
      train_img = Image.open('./train/'+train_path)
      raw_img = Image.open('./train/'+train_path)

      # train 이미지에 숫자 표기
      draw = ImageDraw.Draw(train_img)

      width, height = train_img.size

      cell_width = width // 4
      cell_height = height // 4

      font_size = 50
      font = ImageFont.truetype("arial.ttf", font_size)

      numbers = list(sample_df)[2:]

      for i, number in enumerate(numbers):
          row = i // 4
          col = i % 4
          x = col * cell_width + (cell_width - font_size) // 2
          y = row * cell_height + (cell_height - font_size) // 2
          draw.text((x, y), str(number), fill="red", font=font)

      # 정렬된 이미지 생성 및 저장
      i = 0
      dict_tile = {}

      for row in range(4):
          for col in range(4):
              left = col * cell_width
              upper = row * cell_height
              right = left + cell_width
              lower = upper + cell_height

              # 부분 이미지 추출
              tile = raw_img.crop((left, upper, right, lower))
              dict_tile[numbers[i]] = tile

              i += 1

      # 4x4 이미지 행렬 생성
      origin_img = Image.new("RGB", (width, height))

      # 각 부분 이미지 크기 계산
      tile_width = origin_img.width // 4
      tile_height = origin_img.height // 4

      # 16개 부분 이미지를 4x4 행렬로 배열
      i = 1
      for row in range(4):
          for col in range(4):
              tile = dict_tile[i]

              i += 1

              # 부분 이미지를 4x4 행렬 위치에 합성
              left = col * tile_width
              upper = row * tile_height
              right = left + tile_width
              lower = upper + tile_height
              origin_img.paste(tile, (left, upper, right, lower))

      # 재정려된 이미지 저장
      if save_origin == False:
         pass
      else:
         origin_name = f'ORIGIN_{count:05}.jpg'
         origin_path = './origin/'+origin_name
         origin_img.save(origin_path)

         dict_origin['ID'].append(origin_name)
         dict_origin['img_path'].append(origin_path)

      count += 1

    # 재정렬한 이미지 데이터 프레임 저장
    if save_origin == False:
       pass

    else:
       origin_df = pd.DataFrame(dict_origin)
       origin_df.to_csv('./origin.csv', index=False)

In [12]:
check_img_save_origin(df1, 15, save_origin=True)

NameError: name 'check_img_save_origin' is not defined

In [13]:
origin_df = pd.read_csv('origin.csv')
origin_df

Unnamed: 0,ID,img_path,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,ORIGIN_00000.jpg,./origin/ORIGIN_00000.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
1,ORIGIN_00001.jpg,./origin/ORIGIN_00001.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
2,ORIGIN_00002.jpg,./origin/ORIGIN_00002.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
3,ORIGIN_00003.jpg,./origin/ORIGIN_00003.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
4,ORIGIN_00004.jpg,./origin/ORIGIN_00004.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,ORIGIN_69995.jpg,./origin/ORIGIN_69995.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
69996,ORIGIN_69996.jpg,./origin/ORIGIN_69996.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
69997,ORIGIN_69997.jpg,./origin/ORIGIN_69997.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
69998,ORIGIN_69998.jpg,./origin/ORIGIN_69998.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16


In [103]:
origin_df

Unnamed: 0,ID,img_path,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
0,ORIGIN_00000.jpg,./origin/ORIGIN_00000.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
1,ORIGIN_00001.jpg,./origin/ORIGIN_00001.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
2,ORIGIN_00002.jpg,./origin/ORIGIN_00002.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
3,ORIGIN_00003.jpg,./origin/ORIGIN_00003.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
4,ORIGIN_00004.jpg,./origin/ORIGIN_00004.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
69995,ORIGIN_69995.jpg,./origin/ORIGIN_69995.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
69996,ORIGIN_69996.jpg,./origin/ORIGIN_69996.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
69997,ORIGIN_69997.jpg,./origin/ORIGIN_69997.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16
69998,ORIGIN_69998.jpg,./origin/ORIGIN_69998.jpg,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16


In [32]:
#범위에 따라 데이터프레임을 나누는 함수
def split_dataframe(origin_df):
    df1 = origin_df.iloc[0:10]
    
    return df1
#함수 호출
df1 = split_dataframe(origin_df)


In [106]:
import random
from PIL import Image
import pandas as pd
from tqdm import tqdm

def aug_img_save(train_df, show_num, save_aug=False):

    # 증강된 이미지 데이터 프레임 생성
    dict_augment = {'ID': [],
                   'img_path': []}

    for i in range(1, 16+1):
        dict_augment[str(i)] = []

    # 출력할 이미지 개수 새기
    count = 0

    if save_aug == False:
        repeat = [i for i in range(show_num)]
    else:
        repeat = [i for i in range(len(train_df))]

    for index in tqdm(repeat):

        sample_df = train_df.iloc[index]

        # train 이미지 불러오기
        train_path = sample_df['img_path'].split('/')[-1]
        train_img = Image.open('./origin/'+train_path)

        width, height = train_img.size
        cell_width = width // 4
        cell_height = height // 4

        numbers = list(sample_df)[2:]

        i = 0
        dict_tile = {}

        for row in range(4):
            for col in range(4):
                left = col * cell_width
                upper = row * cell_height
                right = left + cell_width
                lower = upper + cell_height

                # 부분 이미지 추출
                tile = train_img.crop((left, upper, right, lower))
                dict_tile[numbers[i]] = tile

                i += 1
                
            
        # Generate new random numbers for each augmented image
        
        fixed_positions = [1, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 13, 0, 0, 16]
        remaining_positions = [i for i in range(1, 16 + 1) if i not in fixed_positions]
        
        random.shuffle(remaining_positions)

        random_numbers = fixed_positions.copy()

        for i in range(len(random_numbers)):
            if random_numbers[i] == 0:
                random_numbers[i] = remaining_positions.pop()
    
                   

        # 4x4 이미지 행렬 생성
        augment_img = Image.new("RGB", (width, height))

        # 각 부분 이미지 크기 계산
        tile_width = augment_img.width // 4
        tile_height = augment_img.height // 4

        # 16개 부분 이미지를 4x4 행렬로 배열
        i = 0

        for row in range(4):
            for col in range(4):
                random_index = random_numbers[i]
                tile = dict_tile[random_index]

                i += 1

                # 부분 이미지를 4x4 행렬 위치에 합성
                left = col * tile_width
                upper = row * tile_height
                right = left + tile_width
                lower = upper + tile_height
                augment_img.paste(tile, (left, upper, right, lower))

        # 재정려된 이미지 저장
        if save_aug == False:
            pass
        else:
            augment_name = f'fixedAUGMENT_{count:05}.jpg'
            augment_path = './augment/'+augment_name
            augment_img.save(augment_path)

            dict_augment['ID'].append(augment_name)
            dict_augment['img_path'].append(augment_path)

            # Update the existing columns in the DataFrame
            for i in range(1, 16+1):
                shuffled_index = random_numbers[i-1]
                dict_augment[str(i)].append(shuffled_index)

        # train 및 재정렬된 이미지 출력
        count += 1

    # 재정렬한 이미지 데이터 프레임 저장
    if save_aug == False:
        pass
    else:
        augment_df = pd.DataFrame(dict_augment)
        augment_df.to_csv('./augment_real_real_final.csv', index=False)

# Example usage
# Assuming train_df is your DataFrame with appropriate columns (ID, img_path, 1, 2, ..., 16)
# aug_img_save(train_df, show_num=10, save_aug=True)


In [107]:
aug_img_save(origin_df, 15, save_aug=True)

100%|███████████████████████████████████████████████████████████████████████████| 70000/70000 [08:38<00:00, 134.98it/s]


다시 df1 부터 df5 까지 증강 하기

In [None]:
1,4,13,16