In [12]:
import os
import sys
from os.path import dirname as up
from sklearn.model_selection import train_test_split
from pathlib import Path
from tqdm import tqdm
import shutil
import glob

In [13]:
maindir = up(os.path.abspath(os.getcwd()))
folder = 'image_after_2010_1024' # change this var for different folder data processing
# datadir = os.path.join(up(up(maindir)), 'VIMS', 'MA_ortho_2009', 'paper', folder, 'train_patches')
datadir = os.path.join(up(up(maindir)), 'VIMS', 'VABP', 'paper', folder, 'train_patches')

In [14]:
allfiles = [os.path.join(datadir, file) for file in os.listdir(datadir) if file.endswith('tif')]

In [15]:
training_data_temp, testing_data = train_test_split(allfiles, test_size=0.15, random_state=25)
training_data, val_data = train_test_split(training_data_temp, test_size=0.2, random_state=25)

# for MA groins
# training_data_temp, testing_data = train_test_split(allfiles, test_size=0.5, random_state=25)
# training_data, val_data = train_test_split(training_data_temp, test_size=0.5, random_state=25)

In [16]:
print(f"No. of training examples: {len(training_data)}")
print(f"No. of validation examples: {len(val_data)}")
print(f"No. of testing examples: {len(testing_data)}")

No. of training examples: 346
No. of validation examples: 87
No. of testing examples: 77


In [17]:
Path(os.path.join(up(datadir), 'train')).mkdir(exist_ok=True, parents=True)
Path(os.path.join(up(datadir), 'test')).mkdir(exist_ok=True, parents=True)
Path(os.path.join(up(datadir), 'val')).mkdir(exist_ok=True, parents=True)

In [18]:
datasets = {'train': training_data, 'val': val_data, 'test': testing_data}

for key, value in datasets.items():
    
    for val in tqdm(value):
        dpath = os.path.join(up(up(val)), key, os.path.basename(val))
        shutil.copyfile(val, dpath)


100%|██████████| 346/346 [00:24<00:00, 14.34it/s]
100%|██████████| 87/87 [00:06<00:00, 14.39it/s]
100%|██████████| 77/77 [00:05<00:00, 14.36it/s]


In [20]:
maindir

'/rapids/notebooks/sciclone/geograd/Miranda/github/shoreline_structure'

In [21]:
# data merge: VA and MA
# This code is separated from the first session codes, used to merge VA and MA dataset into the correct data folder

dest_folder = os.path.join(maindir, 'datasets', 'Image_after_2010_merged_1024') #Image_after_2010_merged_256, Image_allyear_merged_256
MA_folder = os.path.join(up(up(maindir)), 'VIMS', 'MA_ortho_2009', 'paper', 'image_1024')
VA_folder = os.path.join(up(up(maindir)), 'VIMS', 'VABP', 'paper', 'image_after_2010_1024') #image_after_2010_512, image_allyear_512

Path(os.path.join(dest_folder, 'train')).mkdir(exist_ok=True, parents=True)
Path(os.path.join(dest_folder, 'val')).mkdir(exist_ok=True, parents=True)
Path(os.path.join(dest_folder, 'test')).mkdir(exist_ok=True, parents=True)
Path(os.path.join(dest_folder, 'masks')).mkdir(exist_ok=True, parents=True)

In [22]:
folders = ['train', 'val', 'test', 'masks']

for folder in tqdm(folders):
    
    MA_src_dir = glob.glob(os.path.join(MA_folder, folder, '*.tif'))
    VA_src_dir = glob.glob(os.path.join(VA_folder, folder, '*.tif'))
    
    dest_dir = os.path.join(dest_folder, folder)
    
    for MA_file in tqdm(MA_src_dir):

        shutil.copy(MA_file, dest_dir)
    
    for VA_file in tqdm(VA_src_dir):
        
        shutil.copy(VA_file, dest_dir)
    
    

  0%|          | 0/4 [00:00<?, ?it/s]
  0%|          | 0/10 [00:00<?, ?it/s][A
 30%|███       | 3/10 [00:00<00:00, 27.01it/s][A
 60%|██████    | 6/10 [00:00<00:00, 26.42it/s][A
100%|██████████| 10/10 [00:00<00:00, 26.68it/s][A

  0%|          | 0/346 [00:00<?, ?it/s][A
  1%|          | 2/346 [00:00<00:22, 15.21it/s][A
  1%|          | 4/346 [00:00<00:22, 15.18it/s][A
  2%|▏         | 6/346 [00:00<00:22, 15.17it/s][A
  2%|▏         | 8/346 [00:00<00:22, 15.14it/s][A
  3%|▎         | 10/346 [00:00<00:22, 15.05it/s][A
  3%|▎         | 12/346 [00:00<00:22, 15.02it/s][A
  4%|▍         | 14/346 [00:00<00:22, 15.04it/s][A
  5%|▍         | 16/346 [00:01<00:22, 14.64it/s][A
  5%|▌         | 18/346 [00:01<00:22, 14.72it/s][A
  6%|▌         | 20/346 [00:01<00:21, 14.85it/s][A
  6%|▋         | 22/346 [00:01<00:21, 14.97it/s][A
  7%|▋         | 24/346 [00:01<00:21, 14.92it/s][A
  8%|▊         | 26/346 [00:01<00:21, 14.79it/s][A
  8%|▊         | 28/346 [00:01<00:21, 14.84it/s][A
  