## Notebook 2.

### 16bit precision on a single gpu -- include a batch_size tuning step before training then testing

Created by: Jacob A Rose  
Created on: Monday July 5th, 2021

### Scaling model training series

A collection of notebooks meant to demonstrate minimal-complexity examples for:
* Integrating new training methods for scaling up experiments to large numbers in parallel &
* Making maximum use of hardware resources

1. 16bit precision, single gpu, train -> test
2. 16bit precision, single gpu, batch_size tune -> train -> test


In [None]:
from typing import Any, List
from pytorch_lightning.metrics.classification import Accuracy
# from lightning_hydra_classifiers.models.modules.simple_dense_net import SimpleDenseNet

import os
import torch
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms
import pytorch_lightning as pl

# Replace default file cloud urls from Yann Lecun's website to offiial aws s3 bucket
# new_mirror = 'https://ossci-datasets.s3.amazonaws.com/mnist'
# MNIST.resources = [
#                    ('/'.join([new_mirror, url.split('/')[-1]]), md5)
#                    for url, md5 in MNIST.resources
#                    ]

if 'TOY_DATA_DIR' not in os.environ: 
    os.environ['TOY_DATA_DIR'] = "/media/data_cifs/projects/prj_fossils/data/toy_data"
        
default_root_dir = os.environ['TOY_DATA_DIR']

In [None]:
    
    
def main():
    
    config_path = "/media/data/jacob/GitHub/lightning-hydra-classifiers/configs/experiment/2_16bit-precision-single-gpu_tune-batchsize.yaml"
    config = read_hydra_config(config_dir = str(Path(config_path).parent),
                               job_name="test_app",
                               config_name=Path(config_path).stem)
    
    template_utils.extras(config)
    
    if "seed" in config:
        pl.seed_everything(config.seed)
    
    datamodule, config = configure_datamodule(config)
    
    model = configure_model(config)
    
#     model = CoolSystem()

    trainer = configure_trainer(config)
    
#     trainer = pl.Trainer(gpus=1, 
#                          precision=16,
#                          progress_bar_refresh_rate=5,
#                          max_epochs=10)
    
    
    bsz_tuner = trainer.tune(model, datamodule=datamodule)
#     best_bsz = model.hparams.batch_size
    best_bsz = model.batch_size
    
    print(f'[START] Training with tuned batch_size = {best_bsz}')
    
    trainer.fit(model, datamodule=datamodule)
    
    test_results = trainer.test(datamodule=datamodule)
    
    return test_results, best_bsz

### Function definitions

1. Configure logger (using python's logging module)
2. Configure experiment Config (using hydra + omegaconf.DictConfig)
3. Configure datamodule (using custom LightningDataModule)
4. Configure model (using custom LightningModule)
5. Configure trainer (using pl.Trainer, as well as pytorch lightning loggers & callbacks)

In [None]:
import sys
import logging
from lightning_hydra_classifiers.utils import template_utils
import hydra
from hydra.experimental import compose, initialize_config_dir
from omegaconf import OmegaConf, DictConfig
from pathlib import Path


def get_standard_python_logger(name: str='notebook'):
    """
    Set up the standard python logging module for command line debugging
    """

    logging.basicConfig(
        format='%(asctime)s [%(levelname)s] %(name)s - %(message)s',
        level=logging.INFO,
        datefmt='%Y-%m-%d %H:%M:%S',
        stream=sys.stdout,
    )
    log = logging.getLogger(name)
    
    return log

log = get_standard_python_logger(name='notebook')


def read_hydra_config(config_dir: str,
                      job_name: str="test_app",
                      config_name: str="experiment") -> DictConfig:
    """
    Read a yaml config file from disk using hydra and return as a DictConfig.
    """
    os.chdir(config_dir)
    with initialize_config_dir(config_dir=config_dir, job_name=job_name):
        cfg = compose(config_name=config_name)
        
    if cfg.get("print_config"):
        template_utils.print_config(cfg, resolve=True)        
    return cfg

In [None]:
def configure_datamodule(config: DictConfig) -> pl.LightningDataModule:
    log.info(f"Instantiating datamodule <{config.datamodule._target_}>")
    datamodule: pl.LightningDataModule = hydra.utils.instantiate(config.datamodule)
        
    try:
        datamodule.setup(stage="fit")
        config.datamodule.classes = datamodule.classes
        config.datamodule.num_classes = len(config.datamodule.classes)
    except Exception as e:
        print(e)
        pass
        
    return datamodule, config

In [None]:
def configure_model(config: DictConfig) -> pl.LightningModule:
    log.info(f"Instantiating model <{config.model._target_}>")
    model: pl.LightningModule = hydra.utils.instantiate(config.model)
    
    return model

In [None]:
from typing import List
# from pytorch_lightning import LightningModule, LightningDataModule, Callback, Trainer
# from pytorch_lightning.loggers import LightningLoggerBase
# from pytorch_lightning import seed_everything

def configure_trainer(config: DictConfig) -> pl.Trainer:

    # Init Lightning callbacks
    callbacks: List[pl.Callback] = []
    if "callbacks" in config:
        for cb_name, cb_conf in config["callbacks"].items():
            if "_target_" in cb_conf:
                log.info(f"Instantiating callback <{cb_conf._target_}>")
                if cb_name == "wandb":
                    callbacks.append(hydra.utils.instantiate(cb_conf, config=OmegaConf.to_container(config, resolve=True)))
                else:
                    callbacks.append(hydra.utils.instantiate(cb_conf))

    # Init Lightning loggers
    logger: List[pl.loggers.LightningLoggerBase] = []
    if "logger" in config:
        for _, lg_conf in config["logger"].items():
            if "_target_" in lg_conf:
                log.info(f"Instantiating logger <{lg_conf._target_}>")
                logger.append(hydra.utils.instantiate(lg_conf))


    log.info(f"Instantiating trainer <{config.trainer._target_}>")
    trainer: pl.Trainer = hydra.utils.instantiate(config.trainer,
                                                  callbacks=callbacks,
                                                  logger=logger,
                                                  _convert_="partial")
        
    return trainer

# trainer = configure_trainer(config)

### Main

In [None]:
%reload_ext tensorboard
%tensorboard --port 0 --logdir lightning_logs/


main()

## Scratch

In [None]:
# class CoolSystem(pl.LightningModule):

#     def __init__(self,
#                  classes: int=10,
#                  batch_size: int=32):
#         super().__init__()
#         self.batch_size = batch_size
#         self.save_hyperparameters()

#         self.l1 = torch.nn.Linear(28 * 28, self.hparams.classes)

#     def forward(self, x):
#         return torch.relu(self.l1(x.view(x.size(0), -1)))

#     def training_step(self, batch, batch_idx):
#         x, y = batch
#         y_hat = self(x)
#         loss = F.cross_entropy(y_hat, y)
#         tensorboard_logs = {'train_loss': loss}
#         return {'loss': loss, 'log': tensorboard_logs}

#     def configure_optimizers(self):
#         return torch.optim.Adam(self.parameters(), lr=0.001)

#     def prepare_data(self):
#         MNIST(default_root_dir, train=True, download=True, transform=transforms.ToTensor())
#         MNIST(default_root_dir, train=False, download=True,  transform=transforms.ToTensor())

#     def train_dataloader(self):
#         return self.trainer.datamodule.train_dataloader()
#         mnist_train = MNIST(default_root_dir, train=True, download=False, transform=transforms.ToTensor())
#         loader = DataLoader(mnist_train, batch_size=self.batch_size, num_workers=4)
#         return loader

#     def test_dataloader(self):
#         return self.trainer.datamodule.test_dataloader()
#         mnist_test = MNIST(default_root_dir, train=False, download=False, transform=transforms.ToTensor())
#         loader = DataLoader(mnist_test, batch_size=self.batch_size, num_workers=4)
#         return loader

    

In [None]:
# from rich import print as pp
# import yaml
# def read_config(path:  str) -> dict:
#     try:
#         with open (path, 'r') as file:
#             config = yaml.safe_load(file)
#         return config
#     except Exception as e:
#         print('Error reading the config file')
#         print(e)

In [None]:
# config_path = "/media/data/jacob/GitHub/lightning-hydra-classifiers/configs/experiment/2_16bit-precision-single-gpu_tune-batchsize.yaml"
# config = read_hydra_config(config_dir = str(Path(config_path).parent),
#                            job_name="test_app",
#                            config_name=Path(config_path).stem)

# template_utils.extras(config)

# if "seed" in config:
#     pl.seed_everything(config.seed)

# datamodule, config = configure_datamodule(config)


# dir(datamodule)

# datamodule.batch_size=16

# datamodule.show_batch()

In [None]:
# config_path = "/media/data/jacob/GitHub/lightning-hydra-classifiers/configs/experiment/2_16bit-precision-single-gpu_tune-batchsize.yaml"
# config = read_hydra_config(config_dir = str(Path(config_path).parent),
#                            job_name="test_app",
#                            config_name=Path(config_path).stem)

# template_utils.extras(config)
# # OmegaConf.set_struct(config, False)