# Training

In [2]:
# imports
import os
import pathlib

import albumentations as A
import numpy as np
from pytorch_lightning import Trainer
from pytorch_lightning import seed_everything
from pytorch_lightning.callbacks import ModelCheckpoint, LearningRateMonitor, EarlyStopping
from pytorch_lightning.loggers.neptune import NeptuneLogger
from torch.utils.data import DataLoader

from pytorch_faster_rcnn_tutorial.datasets import ObjectDetectionDataSet
from pytorch_faster_rcnn_tutorial.faster_RCNN import FasterRCNN_lightning
from pytorch_faster_rcnn_tutorial.faster_RCNN import get_fasterRCNN_resnet
from pytorch_faster_rcnn_tutorial.transformations import Clip, ComposeDouble
from pytorch_faster_rcnn_tutorial.transformations import AlbumentationWrapper
from pytorch_faster_rcnn_tutorial.transformations import FunctionWrapperDouble
from pytorch_faster_rcnn_tutorial.transformations import normalize_01
from pytorch_faster_rcnn_tutorial.utils import get_filenames_of_path, collate_double
from pytorch_faster_rcnn_tutorial.utils import log_mapping_neptune
from pytorch_faster_rcnn_tutorial.utils import log_model_neptune
from pytorch_faster_rcnn_tutorial.utils import log_packages_neptune

import neptune.new as neptune

run = neptune.init(
    project="hpnestler/zafrens-image",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJjYTIwMTFhZi03ZWNhLTQwNWUtOTYwYy02MWZlMWNkZWI5N2MifQ==",
)  # your credentials

  warn(f"Failed to load image Python extension: {e}")


ModuleNotFoundError: No module named 'neptunecontrib'

In [2]:
# hyper-parameters
params = {'BATCH_SIZE': 2,
          'OWNER': 'hpnestler',  # set your name here, e.g. johndoe22
          'SAVE_DIR': None,  # checkpoints will be saved to cwd
          'LOG_MODEL': False,  # whether to log the model to neptune after training
          'GPU': None,  # set to None for cpu training
          'LR': 0.001,
          'PRECISION': 32,
          'CLASSES': 2,
          'SEED': 42,
          'PROJECT': 'zafrens-image',
          'EXPERIMENT': 'heads',
          'MAXEPOCHS': 10,
          'BACKBONE': 'resnet34',
          'FPN': False,
          'ANCHOR_SIZE': ((32, 64, 128, 256, 512),),
          'ASPECT_RATIOS': ((0.5, 1.0, 2.0),),
          'MIN_SIZE': 1024,
          'MAX_SIZE': 1024,
          'IMG_MEAN': [0.485, 0.456, 0.406],
          'IMG_STD': [0.229, 0.224, 0.225],
          'IOU_THRESHOLD': 0.5
          }

In [3]:
os.environ['NEPTUNE'] = 'eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJjYTIwMTFhZi03ZWNhLTQwNWUtOTYwYy02MWZlMWNkZWI5N2MifQ=='

#env_var = os.environ
#print(env_var)
# api key
api_key = os.environ['NEPTUNE']  # if this throws an error, you didn't set your env var

#api_key = "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJjYTIwMTFhZi03ZWNhLTQwNWUtOTYwYy02MWZlMWNkZWI5N2MifQ=="

In [4]:
# save directory
if not params['SAVE_DIR']:
    save_dir = os.getcwd()

In [5]:
# root directory
root = pathlib.Path('pytorch_faster_rcnn_tutorial/data/heads')

In [6]:
# input and target files
inputs = get_filenames_of_path(root / 'input')
targets = get_filenames_of_path(root / 'target')

inputs.sort()
targets.sort()


In [7]:
# mapping
mapping = {
    'head': 1,
}

In [8]:
# training transformations and augmentations
transforms_training = ComposeDouble([
    Clip(),
    AlbumentationWrapper(albumentation=A.HorizontalFlip(p=0.5)),
    AlbumentationWrapper(albumentation=A.RandomScale(p=0.5, scale_limit=0.5)),
    # AlbuWrapper(albu=A.VerticalFlip(p=0.5)),
    FunctionWrapperDouble(np.moveaxis, source=-1, destination=0),
    FunctionWrapperDouble(normalize_01)
])

In [9]:
# validation transformations
transforms_validation = ComposeDouble([
    Clip(),
    FunctionWrapperDouble(np.moveaxis, source=-1, destination=0),
    FunctionWrapperDouble(normalize_01)
])

In [10]:
# test transformations
transforms_test = ComposeDouble([
    Clip(),
    FunctionWrapperDouble(np.moveaxis, source=-1, destination=0),
    FunctionWrapperDouble(normalize_01)
])

In [11]:
# random seed
seed_everything(params['SEED'])

Global seed set to 42


42

In [12]:
# training validation test split
inputs_train, inputs_valid, inputs_test = inputs[:12], inputs[12:16], inputs[16:]
targets_train, targets_valid, targets_test = targets[:12], targets[12:16], targets[16:]

In [13]:
# dataset training
dataset_train = ObjectDetectionDataSet(inputs=inputs_train,
                                       targets=targets_train,
                                       transform=transforms_training,
                                       use_cache=True,
                                       convert_to_format=None,
                                       mapping=mapping)

In [14]:
# dataset validation
dataset_valid = ObjectDetectionDataSet(inputs=inputs_valid,
                                       targets=targets_valid,
                                       transform=transforms_validation,
                                       use_cache=True,
                                       convert_to_format=None,
                                       mapping=mapping)

In [15]:
# dataset test
dataset_test = ObjectDetectionDataSet(inputs=inputs_test,
                                      targets=targets_test,
                                      transform=transforms_test,
                                      use_cache=True,
                                      convert_to_format=None,
                                      mapping=mapping)

In [16]:
# dataloader training
dataloader_train = DataLoader(dataset=dataset_train,
                              batch_size=params['BATCH_SIZE'],
                              shuffle=True,
                              num_workers=0,
                              collate_fn=collate_double)

In [17]:
# dataloader validation
dataloader_valid = DataLoader(dataset=dataset_valid,
                              batch_size=1,
                              shuffle=False,
                              num_workers=0,
                              collate_fn=collate_double)

In [18]:
# dataloader test
dataloader_test = DataLoader(dataset=dataset_test,
                             batch_size=1,
                             shuffle=False,
                             num_workers=0,
                             collate_fn=collate_double)

In [19]:
# neptune logger
neptune_logger = NeptuneLogger(
    api_key=api_key,
    project='hpnestler/zafrens-image', # use your neptune name here
    name = "tester"
    #experiment_name=params['EXPERIMENT'],
    #params=params
)

assert neptune_logger.name  # http GET request to check if the project exists

In [20]:
# model init
model = get_fasterRCNN_resnet(num_classes=params['CLASSES'],
                              backbone_name=params['BACKBONE'],
                              anchor_size=params['ANCHOR_SIZE'],
                              aspect_ratios=params['ASPECT_RATIOS'],
                              fpn=params['FPN'],
                              min_size=params['MIN_SIZE'],
                              max_size=params['MAX_SIZE'])

In [21]:
# lightning init
task = FasterRCNN_lightning(model=model, lr=params['LR'], iou_threshold=params['IOU_THRESHOLD'])

In [22]:
# callbacks
checkpoint_callback = ModelCheckpoint(monitor='Validation_mAP', mode='max')
learningrate_callback = LearningRateMonitor(logging_interval='step', log_momentum=False)
early_stopping_callback = EarlyStopping(monitor='Validation_mAP', patience=50, mode='max')

In [23]:
# trainer init
trainer = Trainer(gpus=params['GPU'],
                  precision=params['PRECISION'],  # try 16 with enable_pl_optimizer=False
                  callbacks=[checkpoint_callback, learningrate_callback, early_stopping_callback],
                  default_root_dir=params['SAVE_DIR'],  # where checkpoints are saved to
                  logger=neptune_logger,
                  log_every_n_steps=1,
                  num_sanity_val_steps=0,
                  max_epochs = params['MAXEPOCHS']
                  )

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [24]:
# start training
#trainer.max_epochs = params['MAXEPOCHS']
trainer.fit(task,
            train_dataloader=dataloader_train,
            val_dataloaders=dataloader_valid)

https://app.neptune.ai/hpnestler/zafrens-image/e/ZAF-10
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.



  | Name  | Type       | Params
-------------------------------------
0 | model | FasterRCNN | 50.4 M
-------------------------------------
50.4 M    Trainable params
0         Non-trainable params
50.4 M    Total params
201.736   Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [25]:
# start testing
trainer.test(ckpt_path='best', test_dataloaders=dataloader_test)

Restoring states from the checkpoint path at /mnt/raid1/Python_projects/Zafrens_Image/.neptune/tester/ZAF-10/checkpoints/epoch=9-step=59.ckpt
Loaded model weights from checkpoint at /mnt/raid1/Python_projects/Zafrens_Image/.neptune/tester/ZAF-10/checkpoints/epoch=9-step=59.ckpt


Testing: 0it [00:00, ?it/s]

--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Test_AP_1': 0.3490897475693392, 'Test_mAP': 0.3490897475693392}
--------------------------------------------------------------------------------


[{'Test_mAP': 0.3490897475693392, 'Test_AP_1': 0.3490897475693392}]

In [26]:
# log packages
log_packages_neptune(neptune_logger)

NeptunePossibleLegacyUsageException: 
[95m
----NeptunePossibleLegacyUsageException----------------------------------------------------------------
[0m
It seems you are trying to use legacy API, but imported the new one.

Simply update your import statement to:
    [96mimport neptune[0m

You may want to check the Legacy API docs:
    - https://docs-legacy.neptune.ai

If you want to update your code with the new API we prepared a handy migration guide:
    - https://docs.neptune.ai/migration-guide

You can read more about neptune.new in the release blog post:
    - https://neptune.ai/blog/neptune-new

You may also want to check the following docs pages:
    - https://docs-legacy.neptune.ai/getting-started/integrate-neptune-into-your-codebase.html

[92mNeed help?[0m-> https://docs.neptune.ai/getting-started/getting-help


In [27]:
# log mapping as table
log_mapping_neptune(mapping, neptune_logger)

NeptunePossibleLegacyUsageException: 
[95m
----NeptunePossibleLegacyUsageException----------------------------------------------------------------
[0m
It seems you are trying to use legacy API, but imported the new one.

Simply update your import statement to:
    [96mimport neptune[0m

You may want to check the Legacy API docs:
    - https://docs-legacy.neptune.ai

If you want to update your code with the new API we prepared a handy migration guide:
    - https://docs.neptune.ai/migration-guide

You can read more about neptune.new in the release blog post:
    - https://neptune.ai/blog/neptune-new

You may also want to check the following docs pages:
    - https://docs-legacy.neptune.ai/getting-started/integrate-neptune-into-your-codebase.html

[92mNeed help?[0m-> https://docs.neptune.ai/getting-started/getting-help


In [28]:
# log model
if params['LOG_MODEL']:
    checkpoint_path = pathlib.Path(checkpoint_callback.best_model_path)
    log_model_neptune(checkpoint_path=checkpoint_path,
                      save_directory=pathlib.Path.home(),
                      name='best_model.pt',
                      neptune_logger=neptune_logger)

Run ZAF-9 received stop signal. Exiting


Shutting down background jobs, please wait a moment...
Done!


Waiting for the remaining 3 operations to synchronize with Neptune. Do not kill this process.


All 3 operations synced, thanks for waiting!
