In [None]:
#default_exp distributed

In [1]:
#export
from fastai.vision import *
from fastai.callbacks import *

In [2]:
#export
class SaveDistributedModelCallback(TrackerCallback):
    "SaveModelCallback modified for distributed transfer learning - remove torch.load"
    def __init__(self, learn:Learner, monitor:str='val_loss', mode:str='auto', every:str='improvement',
                 name:str='bestmodel', gpu=None):
        super().__init__(learn, monitor=monitor, mode=mode)
        self.every,self.name = every,name
        if self.every not in ['improvement', 'epoch']:
            warn(f'SaveModel every {self.every} is invalid, falling back to "improvement".')
            self.every = 'improvement'
        self.gpu = gpu
      
    def on_train_begin(self, **kwargs:Any)->None:
        "Initializes the best value."
        if not hasattr(self, 'best'):
            print("Initializing self.best")
            self.best = float('inf') if self.operator == np.less else -float('inf')
        
    def jump_to_epoch(self, epoch:int)->None:
        try: 
            self.learn.load(f'{self.name}_{epoch-1}', purge=False)
            print(f"Loaded {self.name}_{epoch-1}")
        except: print(f'Model {self.name}_{epoch-1} not found.')

    def on_epoch_end(self, epoch:int, **kwargs:Any)->None:
        "Compare the value monitored to its best score and maybe save the model."
        if self.every=="epoch": self.learn.save(f'{self.name}_{epoch}')
        else: 
            current = self.get_monitor_value()
            if current is not None and self.operator(current, self.best):
                if not self.gpu: print(f'Better model found at epoch {epoch} with {self.monitor} value: {current}.')
                self.best = current
                self.learn.save(f'{self.name}')

In [3]:
from local.segmentation.dataset import SemanticSegmentationData
from local.segmentation.metrics import *
# test data creation
PATH = Path("/home/turgutluk/.fastai/data/camvid")
IMAGES = "images"
MASKS = "labels"
CODES = "codes.txt"
TRAIN, VALID, TEST = "train.txt", "valid.txt", "test.txt"
ssdata = SemanticSegmentationData(PATH, IMAGES, MASKS, CODES, TRAIN, VALID, TEST, sample_size=None, bs=4, size=112)
data = ssdata.get_data()

In [4]:
learn = unet_learner(data, models.resnet34); learn.metrics = [partial(foreground_acc, void_code=30)]

In [5]:
def _get_metric_name(f):
    try: return f.func.__name__
    except: return f.__name__

In [6]:
save_cb = SaveDistributedModelCallback(learn, monitor=_get_metric_name(learn.metrics[0]))

In [7]:
learn.fit_one_cycle(1, callbacks=[save_cb])

Initializing self.best


epoch,train_loss,valid_loss,foreground_acc,time
0,1.220616,1.063044,0.645511,00:18


Better model found at epoch 0 with foreground_acc value: 0.6455112099647522.


In [10]:
save_cb.best

tensor(0.6455)

In [11]:
learn.fit_one_cycle(1, callbacks=[save_cb])

epoch,train_loss,valid_loss,foreground_acc,time
0,0.924665,0.784912,0.799802,00:15


Better model found at epoch 0 with foreground_acc value: 0.7998024225234985.


In [14]:
from local.notebook.export import notebook2script
notebook2script("03_distributed.ipynb")

Converted 03_distributed.ipynb.
