In [None]:
import os
from efficient_v2_unet.utils.data_generation import split_folder_files_to_train_val_test
from efficient_v2_unet.model.efficient_v2_unet import create_and_train

# Split training data and train a model (and evaluate)

This notebook guides you through semi-automatic splitting of your training data into train, validation and test images.

Please check the `1-1_train_notebook` notebook for more details and explanations.

Basically, this notebook allows you to **randomly** split a folder of images (and the corresponding mask folder).

## Data splitting

Splitting the data will moving (not copying) your images into corresponding subfolders, named `train`, `val` and/or `test`.

Further, you can decide:
- creating only train and validation sets
- creating only train and test sets
- creating train, validation and test sets
- choose the split perentage (default is 15% for validation, 15% test, and the rest is train)
    - (it will not allow you to have more than 80% of validation and test images combined)

If you have already split your train, validation and test images, you can skip the split function,
and directly specify the image and mask paths in the `create_and_train` function.

## Training

Please see the `1-1_train_notebook` notebook for details.

In [None]:
# Variables
image_folder = 'path/to/images'     # folder with the images
mask_folder = 'path/to/masks'       # folder with the corresponding masks [0=background, 1=foreground]
basedir = 'path/to/saving_location' # base directory to save the model to

efficientnet_basemodel = 'b0'       # any of ['b0', 'b1', 'b2', 'b3', 's', 'm', 'l']


## Splitting your training data

In [None]:
(
    train_image_path,
    train_mask_path,
    validation_image_path,
    validation_mask_path,
    test_image_path,
    test_mask_path) = split_folder_files_to_train_val_test(
        image_dir=image_folder,
        mask_dir=mask_folder,
        do_val=True,        # whether to create a subset / folder for validation images
        do_test=True,       # wether to create a subset / folder for test images
        split_val=0.15,     # percentage of validation split (i.e. 15%)
        split_test=0.15,    # percentage of test split (i.e. 15%)
        file_ext='.tif'
    ) 

## Train your model (and evaluate)

In [None]:
model = create_and_train(
    name=None,                          # if not specified it is named 'myEfficeintUNet_<efficientnet_basemodel>'
    basedir=basedir,                    # if not specified it will be placed in the current wordking directory
    train_img_dir=train_image_path,     # path (str) returned by the split_folder_files_to_train_val_test function
    train_mask_dir=train_mask_path,     # path (str) returned by the split_folder_files_to_train_val_test function
    val_img_dir=validation_image_path,  # path (str) returned by the split_folder_files_to_train_val_test function
    val_mask_dir=validation_mask_path,  # path (str) returned by the split_folder_files_to_train_val_test function
    test_img_dir=test_image_path,       # path (str) returned by the split_folder_files_to_train_val_test function
    test_mask_dir=test_mask_path,       # path (str) returned by the split_folder_files_to_train_val_test function
    efficientnet=efficientnet_basemodel,
    epochs=100,                         # default
    batch_size=64,                      # default
    file_ext='.tif'                     # default
)