### script - do not run these cells 

relative imports fail when run as a script so scripts stays above library

In [1]:
from fastai.vision import *
from fastai.distributed import *
from fastai.script import *
from fastai.utils.mem import *

from local.segmentation.dataset import *
from local.segmentation import metrics
from local.segmentation import losses
from local.distributed import *
from local.optimizers import *

ModuleNotFoundError: No module named 'local.distributed'

In [None]:
# Always keeps this in cell index position: 2
from fastai.vision import *
from fastai.distributed import *
from fastai.script import *
from fastai.utils.mem import *

from local.segmentation.dataset import *
from local.segmentation import metrics
from local.segmentation import losses
from local.callbacks import *
from local.optimizers import *

# https://stackoverflow.com/questions/8299270/ultimate-answer-to-relative-python-imports
@call_parse
def main(    
    PATH:Param("Path which have data", str)="",
    IMAGES:Param("images folder path name", str)="images",
    MASKS:Param("mask folder path name", str)="masks",
    CODES:Param("codes.txt with pixel codes", str)="",
    TRAIN:Param("train.txt with training image names", str)="",
    VALID:Param("valid.txt with validation image names", str)=None,
    TEST:Param("test.txt with test image names", str)=None,
    sample_size:Param("", int)=None,
    bs:Param("Batch size", int)=80,
    size:Param("Image size", int)=224,
    imagenet_pretrained:Param("Use imagenet weights for DynamicUnet", int)=1,
    max_lr:Param("Learning Rate", float)=3e-3,
    model_name:Param("Model name for save", str)="mybestmodel",
    epochs:Param("Number of max epochs to train", int)=10,
    tracking_metric:Param("Which metric to use for tracking and evaluation", str)="dice",
    void_name:Param("Background class name", str)=None,
    loss_function:Param("Loss function for training", str)="crossentropy",
    opt:Param("Optimizer for training", str)=None,
    arch_name:Param("Architecture backbone for training", str)="resnet34",
    EXPORT_PATH:Param("Where to export trained model", str)=".",
    
    gpu:Param("GPU to run on, can handle multi gpu", str)=None):
    
    """
    For Multi GPU Run: python ../fastai/fastai/launch.py {--gpus=0123} ./training.py {--your args}
    For Single GPU Run: python ./training.py {--your args}
    bs: 80 size: 224 , bs: 320 size: 112 
    """
        
    # Setup init
    gpu = setup_distrib(gpu)
    
    # Args
    if not gpu: print(f"Print args here: ")
        
    # Get data
    PATH = Path(PATH)
    try: VALID = float(VALID)
    except: pass
    ssdata = SemanticSegmentationData(PATH, IMAGES, MASKS, CODES, TRAIN, VALID, TEST, sample_size, bs, size)
    data = ssdata.get_data()
    if imagenet_pretrained: data.normalize(imagenet_stats)
    else: data.normalize()   
    
    # learn - models: 'resnet101', 'resnet152', 'resnet18', 'resnet34', 'resnet50',
    arch = getattr(models, arch_name)
    if not gpu: print(f"Training with arch: {arch}")
    learn = unet_learner(data, arch = arch, pretrained = True)
    learn.path, learn.model_dir = Path(EXPORT_PATH), 'models'

    # metric
    metric = getattr(metrics, tracking_metric)
    if not gpu: print(f"Tracking metric: {metric}")
    if tracking_metric in ["multilabel_dice", "multilabel_iou"]: metric = partial(metric, c=learn.data.c)
    if tracking_metric == "foreground_acc": 
        void_code = np.where(learn.data.classes == void_name)[0].item()
        metric = partial(metric, void_code=void_code)
    learn.metrics = [metric]
    
    # loss
    loss = getattr(losses, loss_function, None)
    if loss: learn.loss_func = loss 
    if not gpu: print(f"Training with loss: {learn.loss_func}")

    # callbacks
    save_cb = SaveDistributedModelCallback(learn, tracking_metric, "max", name=model_name, gpu=gpu)
    csvlog_cb = CSVLogger(learn, 'training_log', append=True)
    cbs = [save_cb, csvlog_cb]
        
    # optimizer / scheduler
    alpha=0.99; mom=0.9; eps=1e-8
    
    if   opt=='adam' : opt_func = partial(optim.Adam, betas=(mom,alpha), eps=eps)
    elif opt=='radam' : opt_func = partial(RAdam, betas=(mom,alpha), eps=eps)
    elif opt=='novograd' : opt_func = partial(Novograd, betas=(mom,alpha), eps=eps)
    elif opt=='rms'  : opt_func = partial(optim.RMSprop, alpha=alpha, eps=eps)
    elif opt=='sgd'  : opt_func = partial(optim.SGD, momentum=mom)
    elif opt=='ranger'  : opt_func = partial(Ranger,  betas=(mom,alpha), eps=eps)
    elif opt=='ralamb'  : opt_func = partial(Ralamb,  betas=(mom,alpha), eps=eps)
    elif opt=='rangerlars'  : opt_func = partial(RangerLars,  betas=(mom,alpha), eps=eps)
    elif opt=='lookahead'  : opt_func = partial(LookaheadAdam, betas=(mom,alpha), eps=eps)
    elif opt=='lamb'  : opt_func = partial(Lamb, betas=(mom,alpha), eps=eps)
    if opt: learn.opt_func = opt_func

    # distributed
    if (gpu is not None) & (num_distrib()>1): learn.to_distributed(gpu)
    
    # to_fp16 
    learn.to_fp16()
    
    # train
    if not gpu: print(f"Starting training with max_lr: {max_lr}")
    if imagenet_pretrained:
        if not gpu: print("Training with transfer learning")
        # stage-1
        learn.freeze_to(-1)
        learn.fit_one_cycle(epochs, max_lr, callbacks=cbs)

        # stage-2
        lrs = slice(max_lr/100,max_lr/4)
        learn.freeze_to(-2)
        learn.fit_one_cycle(epochs, lrs, pct_start=0.8, callbacks=cbs)
 
        # stage-3
        lrs = slice(max_lr/100,max_lr/4)
        learn.unfreeze()
        learn.fit_one_cycle(epochs, lrs, pct_start=0.8, callbacks=cbs)
    else:
        if not gpu: print("Training from scratch")
        learn.fit_one_cycle(epochs, max_lr, callbacks=cbs)
        
    # save valid and test preds 
    if TEST: dtypes = ["Valid", "Test"]
    else: dtypes = ["Valid"]
    for dtype in dtypes:
        if not gpu: print(f"Generating Raw Predictions for {dtype}...")
        preds, targs = learn.get_preds(getattr(DatasetType, dtype))
        fnames = list(data.test_ds.items)
        try_save({"fnames":fnames, "preds":to_cpu(preds), "targs":to_cpu(targs)},
                 path=Path(EXPORT_PATH), file=f"{dtype}_raw_preds.pkl")

    # to_fp32 + export learn
    learn.to_fp32()    
    learn.load(model_name) # load best saved model
    if not gpu: print(f"Exporting model to: {EXPORT_PATH}")
    learn.export(f"{model_name}_export.pkl")

In [2]:
from local.notebook.export import *
# export script
cells = read_nb("segmentation_training.ipynb")['cells']
src = cells[2]['source']
with open("segmentation_training.py", "w") as f: f.write(src)

###  `run_command`

In [3]:
import fastai
from local.script import run_command

In [5]:
run_command??

In [9]:
run_command(["ls", "-l"])

total 452
-rw-rw-r--. 1 turgutluk turgutluk   2168 Sep 13 17:40 00_testing_notebook_export.ipynb
-rw-rw-r--. 1 turgutluk turgutluk 135347 Sep 13 22:47 00_test.ipynb
-rw-rw-r--. 1 turgutluk turgutluk   2870 Sep 15 20:40 01_script.ipynb
-rw-rw-r--. 1 turgutluk turgutluk  58279 Sep 15 19:59 02_scheduler.ipynb
-rw-rw-r--. 1 turgutluk turgutluk  32124 Sep 15 20:30 03_callbacks.ipynb
-rw-r--r--. 1 turgutluk turgutluk 158973 Sep 13 22:05 10_segmentation_dataset.ipynb
-rw-rw-r--. 1 turgutluk turgutluk   2657 Sep 13 22:27 11_segmentation_losses.ipynb
-rw-rw-r--. 1 turgutluk turgutluk   5016 Sep 14 16:48 12_segmentation_metrics.ipynb
drwxrwxr-x. 4 turgutluk turgutluk   4096 Sep 15 20:38 experiment_export
drwxrwxr-x. 2 turgutluk turgutluk   4096 Sep 13 22:46 images
-rw-rw-r--. 1 turgutluk turgutluk    895 Sep 13 17:38 lib.pkl
drwxrwxr-x. 7 turgutluk turgutluk   4096 Sep 15 20:25 local
drwxrwxr-x. 2 turgutluk turgutluk   4096 Sep 15 20:28 models
-rw-r--r--. 1 turgutluk turgutluk  15001 Sep 15 20:4

(b'', b'')

In [10]:
stdout, stderr = run_command(f"""
python {Path(fastai.__file__).parent}/launch.py 
--gpus=0123 segmentation_training.py \
--PATH=/home/turgutluk/.fastai/data/camvid \
--IMAGES=images \
--MASKS=labels \
--CODES=codes.txt \
--TRAIN=train.txt \
--VALID=0.2 \
--TEST=test.txt \
--bs=4 \
--size=112 \
--imagenet_pretrained=1 \
--max_lr=3e-3 \
--model_name=mybestmodel \
--epochs=1 \
--tracking_metric=foreground_acc \
--void_name=Void \
--loss_function=xentropy \
--opt=radam
--EXPORT_PATH=./experiment_export
""")

To use this log_lamb_rs, please run 'pip install tensorboardx'. Also you must have Tensorboard running to see results
To use this log_lamb_rs, please run 'pip install tensorboardx'. Also you must have Tensorboard running to see results
To use this log_lamb_rs, please run 'pip install tensorboardx'. Also you must have Tensorboard running to see results
To use this log_lamb_rs, please run 'pip install tensorboardx'. Also you must have Tensorboard running to see results
Print args here:
Training with arch: <function resnet34 at 0x7f4b31a4b0d0>
Tracking metric: <function foreground_acc at 0x7f4a3a131620>
Training with loss: FlattenedLoss of CrossEntropyLoss()
Starting training with max_lr: 0.003
Training with transfer learning
Initializing self.best
epoch     train_loss  valid_loss  foreground_acc  time
Initializing self.best
Initializing self.best
Initializing self.best
Total time: 00:11
0         129.438187  2.394941    0.331523        00:11
Better model found at epoch 0 with foreground_

In [12]:
stderr

b''