# Delineate the training-testing set

## Import Toolkit

In [1]:
import os
import shutil
import random

from tqdm import tqdm

## Specify the dataset path

In [2]:
Dataset_Path = 'CFA87_Semantic_Seg_Labelme'

## View dataset catalog structure

In [3]:
import seedir as sd
sd.seedir(Dataset_Path, style='emoji', depthlimit=1)

📁 CFA87_Semantic_Seg_Labelme/
├─📁 ann_dir/
└─📁 img_dir/


## Create folders

In [4]:
os.chdir(Dataset_Path)
os.mkdir('train')
os.mkdir('val')

In [5]:
len(os.listdir('img_dir'))

73

In [6]:
len(os.listdir('ann_dir'))

73

## Remove redundant files automatically generated by the system

### View redundant files to be deleted

In [7]:
!find . -iname '__MACOSX'

In [8]:
!find . -iname '.DS_Store'

In [9]:
!find . -iname '.ipynb_checkpoints'

### Remove redundant files

In [10]:
!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done

In [11]:
!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done

In [12]:
!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done

### Verify that redundant files have been removed

In [13]:
!find . -iname '__MACOSX'

In [14]:
!find . -iname '.DS_Store'

In [15]:
!find . -iname '.ipynb_checkpoints'

## In the image folder, divide the training set and test set

In [16]:
test_frac = 0.2  # Test set proportion
random.seed(123) # Random seed for reproducibility

In [17]:
folder = 'img_dir'

In [None]:
img_paths = os.listdir(folder)
random.shuffle(img_paths) # Shuffle randomly

val_number = int(len(img_paths) * test_frac) # Number of test set files   73
train_files = img_paths[val_number:]         # List of training set file names  59
val_files = img_paths[:val_number]           # List of test set file names  14

print('Total number of dataset files', len(img_paths))
print('Number of training set files', len(train_files))
print('Number of test set files', len(val_files))

## Move training set images to `train` directory

In [19]:
for each in tqdm(train_files):
    src_path = os.path.join(folder, each)
    dst_path = os.path.join('train', each)
    shutil.move(src_path, dst_path)

100%|██████████| 59/59 [00:00<00:00, 2315.45it/s]


## Move test set images to `val` directory

In [20]:
for each in tqdm(val_files):
    src_path = os.path.join(folder, each)
    dst_path = os.path.join('val', each)
    shutil.move(src_path, dst_path)

100%|██████████| 14/14 [00:00<00:00, 1307.80it/s]


In [21]:
len(os.listdir('train')) + len(os.listdir('val'))

73

## Cut `train` and `val` to `img_dir`.

In [22]:
shutil.move('train', 'img_dir/train')
shutil.move('val', 'img_dir/val')

'img_dir/val'

## In the labeling folder, divide the training set and test set

In [23]:
folder = 'ann_dir'

In [24]:
os.mkdir('train')
os.mkdir('val')

## Move training set annotations to `train` directory

In [25]:
for each in tqdm(train_files):
    src_path = os.path.join(folder, each.split('.')[0]+'.png')
    dst_path = os.path.join('train', each.split('.')[0]+'.png')
    shutil.move(src_path, dst_path)

100%|██████████| 59/59 [00:00<00:00, 1435.84it/s]


## Move test set labeling to `val` directory

In [26]:
for each in tqdm(val_files):
    src_path = os.path.join(folder, each.split('.')[0]+'.png')
    dst_path = os.path.join('val', each.split('.')[0]+'.png')
    shutil.move(src_path, dst_path)

100%|██████████| 14/14 [00:00<00:00, 1415.39it/s]


In [27]:
len(os.listdir('train')) + len(os.listdir('val'))

73

## Cut `train` and `val` to `ann_dir`.

In [28]:
shutil.move('train', 'ann_dir/train')
shutil.move('val', 'ann_dir/val')

'ann_dir/val'

## Remove redundant files automatically generated by the system

In [29]:
os.chdir('../')

### View redundant files to be deleted

In [30]:
!find . -iname '__MACOSX'

In [31]:
!find . -iname '.DS_Store'

In [32]:
!find . -iname '.ipynb_checkpoints'

./.ipynb_checkpoints


### Remove redundant files

In [33]:
!for i in `find . -iname '__MACOSX'`; do rm -rf $i;done

In [34]:
!for i in `find . -iname '.DS_Store'`; do rm -rf $i;done

In [35]:
!for i in `find . -iname '.ipynb_checkpoints'`; do rm -rf $i;done

### Verify that redundant files have been removed

In [36]:
!find . -iname '__MACOSX'

In [37]:
!find . -iname '.DS_Store'

In [38]:
!find . -iname '.ipynb_checkpoints'