In [None]:
import os

# check if `pytorch-CycleGAN-and-pix2pix` is already cloned
if not os.path.exists('../pytorch-CycleGAN-and-pix2pix'):
    os.chdir('../')
    !git clone https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix
    os.chdir('./dataset')	# change directory back to `./dataset`

# Data Preprocess for the Dataset

In [1]:
import os
import shutil

# Training Dataset

## Prepare Raw Training Data (Baseline)

The Training dataset contains two subfolder:
- label_img: contains the draft images
- img: contains the corresponding ground truth images

In [None]:
import zipfile

train_dataset_zip = '34_Competition 1_Training dataset.zip'


# unzip the train_dataset_zip
with zipfile.ZipFile(train_dataset_zip, 'r') as zip_ref:
    zip_ref.extractall()

In [None]:
train_dir = 'training_dataset'

# rename the extracted folder
os.rename('Training dataset', train_dir)

In [None]:
# train_dir = './training_dataset'
print(os.listdir(train_dir))

### Rename the subfolders as trainA and trainB
mapping the folder name to the model input:
- `training_dataset/label_img` -> `training_dataset/trainA`
- `training_dataset/img` -> `training_dataset/trainB`

In [None]:
# rename the subfolders
os.rename(train_dir + '/label_img', train_dir + '/trainA')
os.rename(train_dir + '/img', train_dir + '/trainB')

### Copy the folders to the input folder

move the `training_dataset` folders to `../pytorch-CycleGAN-and-pix2pix/datasets`

In [None]:
# copy the fodler to target folder
target_dir = '../pytorch-CycleGAN-and-pix2pix/datasets'
shutil.copytree(train_dir, target_dir + '/' + train_dir)

### Align the trainA and trainB images

In [None]:
os.chdir('../pytorch-CycleGAN-and-pix2pix/datasets')

! python make_dataset_aligned.py --dataset-path training_dataset
print('Done!')

## Prepare 2 domain datasets (Enhanced)

### Extract the Images from Raw Training Data and create 2 folders
Each folder contains 2 types of images:
- River images: e.g. TRA_RI_1000000.png
- Road images: e.g. TRA_RO_1000000.png

so we need to create 2 folders:
- River (contains `img` and `label_img` subfolders, each contains river images)
- Road (contains `img` and `label_img` subfolders, each contains road images)

In [None]:
river_dir = 'RIVER'
road_dir = 'ROAD'

# create the folders
if not os.path.exists(river_dir):
	os.makedirs(river_dir)
if not os.path.exists(road_dir):
	os.makedirs(road_dir)

for subdir in os.listdir(train_dir):
	# create the subfolders if not exist
	if not os.path.exists(river_dir + '/' + subdir):
		os.makedirs(river_dir + '/' + subdir)
	if not os.path.exists(road_dir + '/' + subdir):
		os.makedirs(road_dir + '/' + subdir)
	
	# move or copy the files
	for file in os.listdir(train_dir + '/' + subdir):
		if 'RI' in file:
			shutil.copy(train_dir + '/' + subdir + '/' + file, river_dir + '/' + subdir + '/' + file)
		elif 'RO' in file:
			shutil.copy(train_dir + '/' + subdir + '/' + file, road_dir + '/' + subdir + '/' + file)
		else:
			print('ERROR: file name not recognized: ' + file)

### Rename the subfolders as trainA and trainB
mapping the folder name to the model input:
- `RIVER/label_img` -> `RIVER/trainA`
- `RIVER/img` -> `RIVER/trainB`
- `ROAD/label_img` -> `ROAD/trainB`
- `ROAD/img` -> `ROAD/trainA`

In [None]:
river_dir = 'RIVER'
road_dir = 'ROAD'

# rename the subfolders in the river and road folders
os.rename(river_dir + '/label_img', river_dir + '/trainA')
os.rename(river_dir + '/img', river_dir + '/trainB')
os.rename(road_dir + '/label_img', road_dir + '/trainA')
os.rename(road_dir + '/img', road_dir + '/trainB')

### Copy the folders to the input folder

move the `RIVER` and `ROAD` folders to `../pytorch-CycleGAN-and-pix2pix/datasets`

In [None]:
# copy the fodler to target folder
target_dir = '../pytorch-CycleGAN-and-pix2pix/datasets'
shutil.copytree(river_dir, target_dir + '/' + river_dir)
shutil.copytree(road_dir, target_dir + '/' + road_dir)

### Align the trainA and trainB images

In [None]:
os.chdir('../pytorch-CycleGAN-and-pix2pix/datasets')

! python make_dataset_aligned.py --dataset-path RIVER
! python make_dataset_aligned.py --dataset-path ROAD
print('Done!')

# Testing Dataset

## Prepare Raw Testing Data (Baseline)

The extracted zip file only contains `label_img` folder, so we need to create the parent folder `public_test` and move the `label_img` folder to `public_test`

In [15]:
import os

# change directory to the root of the project
try:
	os.chdir('../dataset')
except:
	print("Already in the root directory")

In [3]:
import zipfile

test_dataset_zip = 'public_testing_dataset_1.zip'
test_dir = 'public_testing_dataset_1'

# unzip the test_dataset_zip
with zipfile.ZipFile(test_dataset_zip, 'r') as zip_ref:
	zip_ref.extractall(test_dir)

In [4]:
test_dir = 'public_testing_dataset_1'
print(os.listdir(test_dir))

['label_img']


since the ground truth images are not provided, we just need to rename the folder `label_img` as `testA`

In [None]:
# # create the subfolders img
# if not os.path.exists(test_dir + '/img'):
# 	os.makedirs(test_dir + '/img')

# # copy the images from label_img to img
# for file in os.listdir(test_dir + '/label_img'):
# 	shutil.copy(test_dir + '/label_img/' + file, test_dir + '/img/' + file)

### Rename the subfolder as testA

mapping the folder name to the model input:
- `public_test/label_img` -> `public_test/testA`
<!-- - `public_test/img` -> `public_test/testB` -->

In [5]:
os.rename(test_dir + '/label_img', test_dir + '/testA')
# os.rename(test_dir + '/img', test_dir + '/testB')

### Copy the folders to the input folder

move the `public_testing_dataset_1` folders to `../pytorch-CycleGAN-and-pix2pix/datasets`

In [7]:
# copy the fodler to target folder
target_dir = '../pytorch-CycleGAN-and-pix2pix/datasets'

# check if the folder exists
if not os.path.exists(target_dir + '/' + test_dir):
	shutil.copytree(test_dir, target_dir + '/' + test_dir)
else:
	# remove the existing folder
	shutil.rmtree(target_dir + '/' + test_dir)
	# copy the new folder
	shutil.copytree(test_dir, target_dir + '/' + test_dir)

### Align the testA and testB images

In [None]:
# os.chdir('../pytorch-CycleGAN-and-pix2pix/datasets')

In [None]:
# ! python make_dataset_aligned.py --dataset-path {test_dir}
# print('Done!')