In [16]:
import os
import numpy as np
import pandas as pd
from PIL import Image
import cv2

In [21]:
BASE_IMAGE_DIR_PATH = '/home/jupyter/datasphere/project/satellite_hack/train_updated_titiles/images/'
BASE_MASKS_DIR_PATH = '/home/jupyter/datasphere/project/satellite_hack/train_updated_titiles/masks/'

images_folder_path = '/home/jupyter/datasphere/project/satellite_hack/train_splitted/images/'
masks_folder_path = '/home/jupyter/datasphere/project/satellite_hack/train_splitted/masks/'


In [20]:
!rm -rf ../train_splitted

In [22]:
def init_folder(path: str):
  if not os.path.exists(path):
      os.makedirs(path)


# init folders for images and masks if does not exist
init_folder(images_folder_path)
init_folder(masks_folder_path)

In [23]:
def get_count_of_files_in_dir(dir_name):
    files = os.listdir(dir_name)
    return len(files) - 1


COUNT_OF_IMAGES = get_count_of_files_in_dir(BASE_IMAGE_DIR_PATH)
COUNT_OF_IMAGES

21

In [24]:
def add_lead_zeros(_id: int):
  if 0 <= _id <= 9:
    return '00' + str(_id)
  elif 10 <= _id <= 99:
    return '0' + str(_id)
  else:
    return str(_id)

def image_name(_id: int):
  return f'train_image_{add_lead_zeros(_id)}.png'

def mask_name(_id: int):
  return f'train_mask_{add_lead_zeros(_id)}.png'

def image_path(_id: int):
  return BASE_IMAGE_DIR_PATH + image_name(_id)

def mask_path(_id: int):
  return BASE_MASKS_DIR_PATH + mask_name(_id)

Augmentation functions

In [25]:
import random
import albumentations as A


def rotate_image(image, mask, rotate_angle):
  transform = A.ShiftScaleRotate(border_mode=cv2.BORDER_CONSTANT, 
                                scale_limit=0.3,
                                rotate_limit=rotate_angle,
                                p=0.7)
  augmented_image = transform(
    image=np.array(image),
    mask=np.array(mask),
  )
  
  rotated_image = augmented_image['image']
  rotated_mask = augmented_image['mask']
  return Image.fromarray(rotated_image), Image.fromarray(rotated_mask)


def transform_image(image):
  
  '''
  Optical transforms
  '''
  
  transform = A.Compose([
      A.OpticalDistortion(p=0.5),
      A.GaussianBlur(p=0.5),
      A.Equalize(p=0.5),
      A.RandomBrightnessContrast(p=0.5),
      A.RandomGamma(p=0.5)
  ])
  random.seed(42)
  transformed = transform(image=np.array(image))
  return Image.fromarray(transformed['image'])

In [26]:
def get_augmented_images(image_id: int, rotation):
  
  base_image = Image.open(image_path(image_id))
  base_mask = Image.open(mask_path(image_id))
    
  _rotation = next(rotation)
  
  # rotations
  base_image, base_mask = rotate_image(base_image, base_mask, _rotation)
  
  # transform image
  base_image = transform_image(base_image)
  
  _image_name = f'image_{image_id}.png'
  _mask_name = f'mask_{image_id}.png'
  path_image_name = images_folder_path + _image_name
  path_mask_name = masks_folder_path + _mask_name
  base_image.save(path_image_name, quality=95)
  base_mask.save(path_mask_name, quality=95)

  return path_image_name, path_mask_name

Сгенерируем набор предобработанных картинок

In [27]:
def generate_augmented_images(images_count: int):
  
  '''
  Generate fragments using augmentation transform
  '''
  
  augmented_images = []
  rotations = []
  random.seed(42)
  
  for _ in range(images_count):
    rotations.append(random.randint(-180, 180))
  
  def get_rotation():
    yield from rotations
  
  rotation_generator = get_rotation()
  
  for image_id in range(images_count):
    augmented_images.append(get_augmented_images(image_id, rotation_generator))
  
  return augmented_images

In [28]:
# generate
final_df = pd.DataFrame(generate_augmented_images(COUNT_OF_IMAGES))

# save
final_df.to_csv('./metadata.prepr.csv', index=False)

Измерим итоговую папку

In [29]:
import os
from pathlib import Path

#Вычисляет размер папки, количество файлов и количество итераций функции
def folderSize(path):
    fsize = 0
    numfile = 0
    iteration = 0
    for file in Path(path).rglob('*'):
        if (os.path.isfile(file)):
            fsize += os.path.getsize(file)
            numfile += 1
        iteration += 1
    return fsize, numfile, iteration
  
  
folder = './train_splitted' # train fragments dir path

print("Вычисление размера выбранной папки...")
size, numfile, iteration = folderSize(folder)
print(f'Выбрана папка: {folder}')
print(f'Найдено файлов: {numfile}')
print("Размер папки:")
print(f'{size} Bytes')       
print(f'{size/1048576:.2f} Mb')
print(f'{size/1073741824:.2f} Gb')

Вычисление размера выбранной папки...
Выбрана папка: ./train_splitted
Найдено файлов: 0
Размер папки:
0 Bytes
0.00 Mb
0.00 Gb
