Merge branch 'dev/0.5.4' into 'master'

Begin netharn 0.5.4 See merge request computer-vision/netharn!4
Erotemic · Feb 19, 2020 · 57cff9a · 57cff9a
2 parents 599866d + a5b0bb0
commit 57cff9a
Show file tree

Hide file tree

Showing 46 changed files with 2,158 additions and 629 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -218,6 +218,35 @@ stages:
 ### JOBS ###
 # Define the actual jobs
 
+# ---------------
+# Python 3.8 Jobs
+
+build/cp38-cp38-linux:
+    <<: 
+        - *build_template
+    image:
+        python:3.8
+
+test_full/cp38-cp38-linux:
+    <<: 
+        - *test_full_template
+    image:
+        python:3.8
+
+
+# for universal builds we only need to gpg sign once
+gpgsign/cp38-cp38-linux:
+    <<: 
+        - *gpgsign_template
+    image:
+        python:3.8
+
+deploy/cp38-cp38-linux:
+    <<: 
+        - *deploy_template
+    image:
+        python:3.8
+
 
 # ---------------
 # Python 3.7 Jobs

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,24 @@
 This changelog follows the specifications detailed in: [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html), although we have not yet reached a `1.0.0` release.
 
+## Version 0.5.4
+
+### Added
+* EfficientNet backbone and Swish activation
+
+### Fixed 
+* Handle "No running processes found" case in `XPU.coerce('auto')`
+* Resize now works with newer `imgaug` versions
+* Fixed incorrect use of the word "logit", what I was calling logits are
+  actually log probabilities.
+
+### Changed 
+* Using new mode in `gpu_info`, this is more stable
+* Examples are now in the netharn.examples directory, which means you can run
+  them without having to git clone netharn.
+* Moved data grabbers into netharn.data
+* Moved unfinished examples to dev
+
 
 ## Version 0.5.3
 

diff --git a/README.rst b/README.rst
@@ -53,18 +53,18 @@ KNOWN BUGS:
    * The metrics for computing detection mAP / AP might not be correct.
    * The YOLO example gets to about 70% mAP (using Girshik's mAP code) whereas we should be hitting 74-76%
 AUTHORS COMMENTS:
-   * My MNIST, CIFAR, and VOC examples will download the data as needed.
-   * My CIFAR example for ResNet50 achieves 95.72% accuracy, outperforming the
+   * The MNIST, CIFAR, and VOC examples will download the data as needed.
+   * The CIFAR example for ResNet50 achieves 95.72% accuracy, outperforming the
      best DPN92 result (95.16%) that I'm aware of.
      This result seems real, I do not believe I've made an error in measurement
      (but this has need been peer-reviewed so, caveat emptor).  I've reproduced
      this results a few times. You can use the code in examples/cifar.py to see
      if you can too (please tell me if you cannot). 
-   * My YOLO example is based of of EAVise's excellent lightnet (https://gitlab.com/EAVISE/lightnet/) package.
+   * The YOLO example is based of of EAVise's excellent lightnet (https://gitlab.com/EAVISE/lightnet/) package.
    * I reimplemented the CocoAPI (see nh.data.coco_api), because I had some
      (probably minor) issue with the original implementation. I've extended it
      quite a bit, and I'd recommend using it.
-   * My metric-learning example requires code requires the ibeis software:
+   * The metric-learning example requires code requires the ibeis software:
      `https://github.com/Erotemic/ibeis`.
 DEPENDENCIES:
     * torch
@@ -475,11 +475,6 @@ it would produce this more detailed description of what it was doing:
 
 
 ]
-
-
-Acknowledgements:
-=================
-The authors would like to thank AFRL for their support of this work via `SBIR Contract FA8650-18-C-1075 <https://govtribe.com/award/federal-contract-award/definitive-contract-fa865018c1075>`_. This library is approved for public release via 88ABW.
 
 
 .. |Pypi| image:: https://img.shields.io/pypi/v/netharn.svg

diff --git a/dev/_devcheck_optim_without_bias_decay.py b/dev/_devcheck_optim_without_bias_decay.py
@@ -0,0 +1,78 @@
+
+def main():
+    import netharn as nh
+    import ubelt as ub
+
+    model = nh.layers.Sequential(*[
+        nh.layers.ConvNormNd(2, 3, 1),
+        # nh.layers.ConvNormNd(2, 1, 1),
+        # nh.layers.ConvNormNd(2, 1, 1),
+    ])
+
+    params = dict(model.named_parameters())
+    param_keys = set(params)
+    key_groups = {}
+    other_keys = param_keys.copy()
+    if 1:
+        key_groups['norm'] = {p for p in other_keys if p.endswith(('.norm.weight', '.norm.weight'))}
+        other_keys -= key_groups['norm']
+    if 1:
+        key_groups['bias'] = {p for p in other_keys if p.endswith('.bias')}
+        other_keys -= key_groups['bias']
+    if 1:
+        key_groups['weight']  = {p for p in other_keys if p.endswith('.weight')}
+        other_keys -= key_groups['weight']
+    key_groups['other'] = other_keys
+
+    named_param_groups = {}
+    for group_name, keys in key_groups.items():
+        if keys:
+            param_group = {}
+            param_group['params'] = list(ub.dict_subset(params, keys).values())
+            named_param_groups[group_name] = param_group
+
+    if 'bias' in named_param_groups:
+        named_param_groups['bias']['weight_decay'] = 0
+    if 'norm' in named_param_groups:
+        named_param_groups['norm']['weight_decay'] = 0
+
+    import torch
+    param_groups = list(named_param_groups.values())
+
+    optim_defaults = {
+        'lr': 1e-3,
+        'weight_decay': 1e1,
+    }
+    optim = torch.optim.AdamW(param_groups, **optim_defaults)
+
+    learn = True
+
+    model = model.train(learn)
+    import time
+
+    with torch.set_grad_enabled(learn):
+        for i in range(10000):
+
+            if learn:
+                optim.zero_grad()
+            inputs = torch.rand(3, 3, 2, 2)
+            outputs = model(inputs)
+            target = outputs.data.detach()
+            # target = target * 1.0001
+            target = torch.rand(3, 1, 2, 2) * 1e-3
+            # target.fill_(0)
+            loss = ((outputs - target) ** 2).sum()
+
+            if learn:
+                loss.backward()
+                optim.step()
+                optim.zero_grad()
+            # print(ub.repr2(named_param_groups, nl=2))
+            state = model.state_dict()
+            state = ub.dict_diff(state, params)
+            time.sleep(0.01)
+            print('loss = {!r}'.format(float(loss.item())))
+            print('param_state = ' + ub.repr2(params) + '\n' +
+                  'buffer_state = ' + ub.repr2(state, nl=3))
+
+            time.sleep(0.1)
diff --git a/dev/debug_optimizer.py b/dev/debug_optimizer.py
@@ -0,0 +1,92 @@
+
+
+def debug_optimizer(harn, snapshot_state):
+    """
+    debuging an issue where the param groups were created in different orders
+    each time.
+    """
+    if False:
+        # DEBUG: check that all optimizer params exist in the model
+        self = harn.optimizer
+        state_dict = snapshot_state['optimizer_state_dict']
+        for param_group in harn.optimizer.param_groups:
+            print('-----')
+            print(param_group['weight_decay'])
+            print('-----')
+            for p in param_group['params']:
+
+                # Find the model param that correspond to this
+                found = None
+                for name, mp in harn.model.named_parameters():
+                    if mp is p:
+                        found = name
+                        break
+
+                assert found is not None
+                print('found = {!r}'.format(found))
+
+                state = self.state[p]
+                if state:
+                    avg_shape = tuple(state['exp_avg'].shape)
+                    p_shape = tuple(p.shape)
+                    if avg_shape == p_shape:
+                        print('avg_shape = {!r}'.format(avg_shape))
+                    else:
+                        print('p_shape = {!r}'.format(p_shape))
+                        print('avg_shape = {!r}'.format(avg_shape))
+
+        if 0:
+            self = harn.optimizer
+            for param_group in harn.optimizer.param_groups:
+                for p in param_group['params']:
+                    print(p.grad is None)
+
+            for n, mp in harn.model.named_parameters():
+                assert mp.requires_grad
+                if mp.grad is not None:
+                    mp.grad.detach_()
+                    mp.grad.zero_()
+
+            batch = harn._demo_batch()
+            outputs = harn.model(batch['im'])
+            loss = outputs['class_energy'].mean()
+
+            harn.optimizer.zero_grad()
+            loss.backward()
+
+            for param_group in harn.optimizer.param_groups:
+                for param in param_group['params']:
+                    if param.grad is None:
+                        found = None
+                        for name, mp in harn.model.named_parameters():
+                            if mp is p:
+                                found = name
+                                break
+                        print('no grad for found = {!r}'.format(found))
+
+            harn.optimizer.step()
+
+        if 0:
+            snapshot_state_old = harn.get_snapshot_state()
+            torch.save(snapshot_state_old, 'foo.pt')
+            snapshot_state = harn.xpu.load('foo.pt')
+
+            prev_states = harn.prev_snapshots()
+            snapshot_state = harn.xpu.load(prev_states[-1])
+
+            snapshot_state_old['optimizer_state_dict']['state'].keys()
+            snapshot_state['optimizer_state_dict']['state'].keys()
+            state_dict = snapshot_state['optimizer_state_dict']
+
+            for id, state in state_dict['state'].items():
+                pass
+
+            for group in self.param_groups:
+                for param in group['params']:
+                    print(param.shape)
+
+            for group in state_dict['param_groups']:
+                for paramid in group['params']:
+                    state = state_dict['state'][paramid]
+                    print(state['exp_avg'].shape)
+
diff --git a/examples/ggr_matching.py → dev/ggr_matching.py b/examples/ggr_matching.py → dev/ggr_matching.py
@@ -735,7 +735,7 @@ def setup_harn(**kwargs):
         },
     })
     harn = MatchingHarness(hyper=hyper)
-    harn.config['prog_backend'] = 'progiter'
+    harn.preferences['prog_backend'] = 'progiter'
     harn.intervals['log_iter_train'] = 1
     harn.intervals['log_iter_test'] = None
     harn.intervals['log_iter_vali'] = None

diff --git a/examples/imagenet.py → dev/imagenet.py b/examples/imagenet.py → dev/imagenet.py
@@ -325,7 +325,7 @@ def setup_harn(cmdline=True, **kwargs):
     # Create harness
     harn = ImageClfHarn(hyper=hyper)
     harn.classes = torch_datasets['train'].classes
-    harn.config.update({
+    harn.preferences.update({
         'num_keep': 5,
         'keyboard_debug': True,
         # 'export_modules': ['netharn'],
@@ -334,5 +334,5 @@ def setup_harn(cmdline=True, **kwargs):
         'vali': 1,
         'test': 10,
     })
-    harn._custom_config = config
+    harn.script_config = config
     return harn
diff --git a/examples/mnist_matching.py → dev/mnist_matching.py b/examples/mnist_matching.py → dev/mnist_matching.py
@@ -609,7 +609,7 @@ def trycast(x, type):
     )
 
     harn = MNIST_MatchingHarness(hyper=hyper)
-    harn.config
+    harn.preferences
     return harn
 
 

diff --git a/netharn/__init__.py b/netharn/__init__.py
@@ -4,7 +4,7 @@
 mkinit netharn --noattrs --dry
 mkinit netharn --noattrs
 """
-__version__ = '0.5.3'
+__version__ = '0.5.4'
 
 try:
     # PIL 7.0.0 removed PIL_VERSION, which breaks torchvision, monkey patch it