In [1]:
from pl_model import YOLOv4PL

In [2]:
import pytorch_lightning as pl
from argparse import Namespace
from pytorch_lightning.callbacks import LearningRateLogger

In [3]:
hparams = {
    "train_ds" : "train.txt",
    "valid_ds" : "valid.txt",
    "bs" : 1,
    "momentum": 0.9,
    "wd": 0.001,
    "lr": 1e-8,
    "epochs" : 100,
    "pct_start" : 10/100,
    "optimizer" : "SGD",
    "SAT" : False,
    "epsilon" : 0.1,
    "SAM" : False,
    "ECA" : False,
    "WS" : False,
    "Dropblock" : False
}

In [4]:
hparams = Namespace(**hparams)
m = YOLOv4PL(hparams)

In [5]:
tb_logger = pl.loggers.TensorBoardLogger('logs/', name = "yolov4")

In [6]:
checkpoint_callback = pl.callbacks.ModelCheckpoint(
    filepath='model_checkpoints/yolov4{epoch:02d}',
    verbose=True,
    monitor="training_loss_epoch",
    mode='min',
)

In [7]:
t = pl.Trainer(logger = tb_logger,
           checkpoint_callback=checkpoint_callback,
           gpus=1,
           precision=32,
           benchmark=True,
           callbacks=[LearningRateLogger()],
           min_epochs=100,


#            resume_from_checkpoint="model_checkpoints/yolov4epoch=82.ckpt",
        #    auto_lr_find=True,
          #  auto_scale_batch_size='binsearch',
        #    fast_dev_run=True
          )


GPU available: True, used: True
No environment variable for node rank defined. Set as 0.
CUDA_VISIBLE_DEVICES: [0]


In [8]:
r = t.lr_find(m, min_lr=1e-10, max_lr=1e-3, early_stop_threshold=None)
r.plot()


    | Name                                          | Type              | Params
--------------------------------------------------------------------------------
0   | model                                         | YOLOv4            | 63 M  
1   | model.backbone                                | Backbone          | 26 M  
2   | model.backbone.d1                             | DownSampleFirst   | 61 K  
3   | model.backbone.d1.c1                          | ConvBlock         | 928   
4   | model.backbone.d1.c1.module                   | Sequential        | 928   
5   | model.backbone.d1.c1.module.0                 | Conv2dWS          | 864   
6   | model.backbone.d1.c1.module.1                 | BatchNorm2d       | 64    
7   | model.backbone.d1.c1.module.2                 | Mish              | 0     
8   | model.backbone.d1.c2                          | ConvBlock         | 18 K  
9   | model.backbone.d1.c2.module                   | Sequential        | 18 K  
10  | model.backbone.d1.c2.

Max:  tensor(4.5239, device='cuda:0') min:  tensor(-0.3088, device='cuda:0')
Max:  tensor(38.3290, device='cuda:0') min:  tensor(-0.3088, device='cuda:0')
Max:  tensor(181.8175, device='cuda:0') min:  tensor(-0.3088, device='cuda:0')
Max:  tensor(737.9935, device='cuda:0') min:  tensor(-0.3088, device='cuda:0')
Max:  tensor(7428.8398, device='cuda:0') min:  tensor(-0.3088, device='cuda:0')
Max:  tensor(38176.8789, device='cuda:0') min:  tensor(-0.3088, device='cuda:0')
Max:  tensor(194.6959, device='cuda:0') min:  tensor(-0.3088, device='cuda:0')
Max:  tensor(207033.6094, device='cuda:0') min:  tensor(-0.3088, device='cuda:0')
Max:  tensor(2507236.2500, device='cuda:0') min:  tensor(-0.3061, device='cuda:0')
Max:  tensor(14705994., device='cuda:0') min:  tensor(-0.1797, device='cuda:0')
Max:  tensor(80379104., device='cuda:0') min:  tensor(-3.0404e-25, device='cuda:0')
Max:  tensor(8.1883e+08, device='cuda:0') min:  tensor(0., device='cuda:0')
Max:  tensor(3.9135e+09, device='cuda:0') 

HBox(children=(FloatProgress(value=0.0, description='Finding best initial lr', style=ProgressStyle(description…

Max:  tensor(9.4533, device='cuda:0', grad_fn=<MaxBackward1>) min:  tensor(-0.3088, device='cuda:0', grad_fn=<MinBackward1>)
Max:  tensor(10.4358, device='cuda:0', grad_fn=<MaxBackward1>) min:  tensor(-0.3088, device='cuda:0', grad_fn=<MinBackward1>)
Max:  tensor(9.0428, device='cuda:0', grad_fn=<MaxBackward1>) min:  tensor(-0.3088, device='cuda:0', grad_fn=<MinBackward1>)
Max:  tensor(9.1704, device='cuda:0', grad_fn=<MaxBackward1>) min:  tensor(-0.3088, device='cuda:0', grad_fn=<MinBackward1>)
Max:  tensor(8.1803, device='cuda:0', grad_fn=<MaxBackward1>) min:  tensor(-0.3088, device='cuda:0', grad_fn=<MinBackward1>)
Max:  tensor(9.4565, device='cuda:0', grad_fn=<MaxBackward1>) min:  tensor(-0.3088, device='cuda:0', grad_fn=<MinBackward1>)
Max:  tensor(11.2166, device='cuda:0', grad_fn=<MaxBackward1>) min:  tensor(-0.3088, device='cuda:0', grad_fn=<MinBackward1>)
Max:  tensor(9.7651, device='cuda:0', grad_fn=<MaxBackward1>) min:  tensor(-0.3088, device='cuda:0', grad_fn=<MinBackward1>

Detected KeyboardInterrupt, attempting graceful shutdown...
LR finder stopped early due to diverging loss.
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "D:\Apps\Anaconda\lib\site-packages\IPython\core\interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-e4602f59d843>", line 1, in <module>
    r = t.lr_find(m, min_lr=1e-10, max_lr=1e-3, early_stop_threshold=None)
  File "D:\Apps\Anaconda\lib\site-packages\pytorch_lightning\trainer\lr_finder.py", line 182, in lr_find
    self.restore(str(save_path), on_gpu=self.on_gpu)
  File "D:\Apps\Anaconda\lib\site-packages\pytorch_lightning\trainer\training_io.py", line 300, in restore
    model.cuda(self.root_gpu)
  File "D:\Apps\Anaconda\lib\site-packages\pytorch_lightning\core\properties.py", line 109, in cuda
    return super().cuda(device=device)
  File "D:\Apps\Anaconda\lib\site-packages\torch\nn\modules\module.py", line 307, in cuda
    return self._apply(lambda t: t.cuda(device))
  File "D:\Apps\Anaconda\lib\site-packages\torch\nn\modules\module.py", line 203, in _apply
    modul

TypeError: object of type 'NoneType' has no len()

In [None]:
t.fit(m)