## Basic experiment tools dev notebook

Created on: Tuesday March 22nd, 2022  
Created by: Jacob Alexander Rose  

In [1]:
%load_ext autoreload
%autoreload 2

import os

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

from IPython.display import display
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import pandas as pd
from pathlib import Path
from icecream import ic
from rich import print as pp
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# from imutils.big.datamodule import Herbarium2022DataModule, Herbarium2022Dataset
from imutils.ml.data.datamodule import Herbarium2022DataModule, Herbarium2022Dataset
from imutils.ml.utils.etl_utils import ETL

import pytorch_lightning as pl
from torchvision import transforms as T
import argparse
import imutils

from hydra.experimental import compose, initialize, initialize_config_dir
import hydra

from omegaconf import DictConfig, OmegaConf
from typing import *

Importing imutils
ASSETS_DIR: /media/data/jacob/GitHub/image-utils/assets
SAMPLE_IMAGE_PATHS: [PosixPath('/media/data/jacob/GitHub/image-utils/assets/Ericaceae_Zenobia_pulverulenta_7984 {WolfeUSGS} [1.96x].jpg'), PosixPath('/media/data/jacob/GitHub/image-utils/assets/Ericaceae_Arbutus_densiflora_1440 {WolfeUSGS} [1.96x].jpg')]
IMUTILS_ML_ROOT: /media/data/jacob/GitHub/image-utils/imutils/ml


### helper display func

In [2]:
def display_train_timing_info(batches_per_epoch: int,
                              batches_per_second: float,
                              batch_size: int):
    
    samples_per_epoch = batches_per_epoch*batch_size
    seconds_per_epoch = batches_per_epoch * batches_per_second
    min_per_epoch = seconds_per_epoch / 60
    hrs_per_epoch = min_per_epoch / 60
    
    samples_per_second = batches_per_second * batch_size

    batches_per_min = batches_per_second * 60
    batches_per_hr = batches_per_min * 60
    
    samples_per_min = samples_per_second * 60
    samples_per_hr = samples_per_min * 60


    pp([f"seconds_per_epoch: {seconds_per_epoch:>,}",
       f"min_per_epoch: {min_per_epoch:.4f}",
       f"hrs_per_epoch: {hrs_per_epoch:.4f}",
       f"epochs_per_second: {1/seconds_per_epoch:.4f}",
       f"epochs_per_min: {1/min_per_epoch:.4f}",
       f"epochs_per_hr: {1/hrs_per_epoch:.4f}",
       f"batches_per_epoch: {batches_per_epoch:.4g}",
       f"samples_per_epoch: {samples_per_epoch:.4g}",
       f"seconds_per_batch: {1/batches_per_second:.4f}",
       f"batches_per_second: {batches_per_second:.4f}",
       f"batches_per_min: {batches_per_min:.4f}",
       f"batches_per_hr: {batches_per_hr:.4f}",
       f"samples_per_second: {samples_per_second:.4f}",
       f"samples_per_min: {samples_per_min:.4f}",
       f"samples_per_hr: {samples_per_hr:.4g}",
       f"batch_size: {batch_size}"])

### Experiment #2


In [3]:
batches_per_second = (1/1.7)
batches_per_epoch = 4374
batch_size=48

print(f"Experiment #2: batch_size={batch_size}, num_processes=4, num_devices=2")
print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*2,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #2: batch_size=48, num_processes=4, num_devices=2
Using 50% of samples


(Extrapolated prediction) Using 100% of samples


### Experiment #3

In [4]:
batches_per_second = (1/2.15)
batches_per_epoch = 3280
batch_size=64
print(f"Experiment #3: batch_size=64, num_processes=4, num_devices=2")
print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*2,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #3: batch_size=64, num_processes=4, num_devices=2
Using 50% of samples


(Extrapolated prediction) Using 100% of samples


### Experiment #4

In [5]:
batches_per_second = (1/3.3)
batches_per_epoch = 2187
batch_size=96

print(f"Experiment #4: batch_size={batch_size}, num_processes=4, num_devices=2")
print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*2,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #4: batch_size=96, num_processes=4, num_devices=2
Using 50% of samples


(Extrapolated prediction) Using 100% of samples


### Experiment #5

Started: 3:15 AM - 2022-03-23  
Ended:   4:30 AM - 2022-03-23  

In [6]:
batches_per_second = (1/4.3)
batches_per_epoch = 1640
batch_size=128

print(f"Experiment #5: batch_size={batch_size}, num_processes=4, num_devices=2")
print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*2,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #5: batch_size=128, num_processes=4, num_devices=2
Using 50% of samples


(Extrapolated prediction) Using 100% of samples


### Experiment #6

Started: 4:30 AM - 2022-03-23  
Ended:   5:45 AM - 2022-03-23  

In [7]:
batches_per_second = (1/5.15)
batches_per_epoch = 1458
batch_size=144

print(f"Experiment #6: batch_size={batch_size}, num_processes=4, num_devices=2")
print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*2,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #6: batch_size=144, num_processes=4, num_devices=2
Using 50% of samples


(Extrapolated prediction) Using 100% of samples


In [8]:
batches_per_second = (1/4.87)
batches_per_epoch = 1458
batch_size=144

print(f"Experiment #6: batch_size={batch_size}, num_processes=4, num_devices=2")
print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*2,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #6: batch_size=144, num_processes=4, num_devices=2
Using 50% of samples


(Extrapolated prediction) Using 100% of samples


### Experiment #7

- Using Accumulate_grad_batches=2

Started: 5:45 AM - 2022-03-23  
Ended:   x:xx AM - 2022-03-23  

In [9]:
batches_per_second = (1/4.81)
batches_per_epoch = 1458
batch_size=144

print(f"Experiment #7: batch_size={batch_size}, num_processes=4, num_devices=2")
print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*2,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #7: batch_size=144, num_processes=4, num_devices=2
Using 50% of samples


(Extrapolated prediction) Using 100% of samples


### Experiment #8

- Using Accumulate_grad_batches=2
- lr=1e-2

- Removed base_callbacks.yaml:
        -train.callbacks.lr_monitor \
        -train.callbacks.early_stopping \
        -train.callbacks.model_checkpoint


Started: 9:00 AM - 2022-03-23  
Ended:   x:xx AM - 2022-03-23  

In [10]:
batches_per_second = (1/ 4.26)
batches_per_epoch = 229
batch_size=128

print(f"Experiment #8: batch_size={batch_size}, num_processes=4, num_devices=2")


print("Using 1% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)


print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*50,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*100,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #8: batch_size=128, num_processes=4, num_devices=2
Using 1% of samples


Using 50% of samples


(Extrapolated prediction) Using 100% of samples


### Experiment #11

- Using Accumulate_grad_batches=1
- lr=0.5e-3
- freeze_backbone_up_to=-4


Started: 12:25 PM - 2022-03-23  
Ended:   2:55 PM - 2022-03-23  

In [11]:
batches_per_second = (1/ 4.53)
batches_per_epoch = 3282
batch_size=128

print(f"Experiment #11: batch_size={batch_size}, num_processes=4, num_devices=2")


print("Using 1% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)


print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*50,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch*100,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #11: batch_size=128, num_processes=4, num_devices=2
Using 1% of samples


Using 50% of samples


(Extrapolated prediction) Using 100% of samples


### Experiment #12

- Using Accumulate_grad_batches=1
- lr=1e-2
- freeze_backbone_up_to=-4
- batch_size=128
- preprocess_size=256
- resolution=224


Started: 3:00 PM - 2022-03-23  
Ended:   x:xx AM - 2022-03-23  

In [12]:
batches_per_second = (1/ 2.58)
batches_per_epoch = 3282
batch_size=128

print(f"Experiment #8: batch_size={batch_size}, num_processes=4, num_devices=2")


print("Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)


print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch/2,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 1% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch/100,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #8: batch_size=128, num_processes=4, num_devices=2
Using 100% of samples


Using 50% of samples


(Extrapolated prediction) Using 1% of samples


### Experiment #13
(Running in parallel to #12, since 4 GPUs just opened up.Tried doubling the scaling of the lr to accomodate the doubling of the # of GPUs


- Increased num_devices from 2->4
- Using Accumulate_grad_batches=1
- lr=2e-2
- freeze_backbone_up_to=-4
- batch_size=128
- preprocess_size=256
- resolution=224


Started: 3:52 PM - 2022-03-23  
Ended:   x:xx AM - 2022-03-23  

In [13]:
batches_per_second = (1/ 4.2)
batches_per_epoch = 1642
batch_size=128

print(f"Experiment #8: batch_size={batch_size}, num_processes=4, num_devices=2")


print("Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)


print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch/2,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 1% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch/100,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #8: batch_size=128, num_processes=4, num_devices=2
Using 100% of samples


Using 50% of samples


(Extrapolated prediction) Using 1% of samples


### Experiment #14


- Increased num_devices from 4
- Using Accumulate_grad_batches=2
- lr=1e-3
- freeze_backbone=False
- batch_size=64
- preprocess_size=256
- resolution=224


Started: 5:00 PM - 2022-03-23  
Ended:   x:xx AM - 2022-03-24

In [24]:
print(f"{1/((3282)/(90*60)):.2f}")

1.65


In [15]:
batches_per_second = (1/1.65 )
batches_per_epoch = 3282
batch_size=64

print(f"Experiment #8: batch_size={batch_size}, num_processes=4, num_devices=2")


print("Using 100% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)


print("Using 50% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch/2,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

print("(Extrapolated prediction) Using 1% of samples")
display_train_timing_info(batches_per_epoch=batches_per_epoch/100,
                          batches_per_second=batches_per_second,
                          batch_size=batch_size)

Experiment #8: batch_size=64, num_processes=4, num_devices=2
Using 100% of samples


Using 50% of samples


(Extrapolated prediction) Using 1% of samples


In [125]:
229/16/60

0.23854166666666668

In [14]:
27.96*2

55.92