In [1]:
%matplotlib inline
from fastai import *
from fastai.vision import *
from fastai.callbacks.tracker import *
from fastai.vision.models.wrn import wrn_22


torch.backends.cudnn.benchmark = True

In [2]:
#from Init27 notebook, a generic training with 
#https://github.com/EricPerbos/RTX-2080Ti-Vs-GTX-1080Ti-CIFAR-100-Benchmarks/blob/master/1080Ti%20Notebook.ipynb

import functools
import traceback
def get_ref_free_exc_info():
    "Free traceback from references to locals/globals to avoid circular reference leading to gc.collect() unable to reclaim memory"
    type, val, tb = sys.exc_info()
    traceback.clear_frames(tb)
    return (type, val, tb)

def gpu_mem_restore(func):
    "Reclaim GPU RAM if CUDA out of memory happened, or execution was interrupted"
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
        try:
            return func(*args, **kwargs)
        except:
            type, val, tb = get_ref_free_exc_info() # must!
            raise type(val).with_traceback(tb) from None
    return wrapper

In [3]:
class gpu_mem_restore_ctx():
    " context manager to reclaim GPU RAM if CUDA out of memory happened, or execution was interrupted"
    def __enter__(self): return self
    def __exit__(self, exc_type, exc_val, exc_tb):
        if not exc_val: return True
        traceback.clear_frames(exc_tb)
        raise exc_type(exc_val).with_traceback(exc_tb) from None

In [4]:
torch.cuda.set_device(0)
torch.cuda.current_device()

0

In [5]:
torch.cuda.get_device_name(0)

'GeForce GTX 1080 Ti'

In [6]:
path = untar_data(URLs.CIFAR)
path

PosixPath('/home/eric/Link_fastaiV1/data/cifar10')

## ResNet 18 in FP32

In [7]:
bs = 512
np.random.seed(42)

In [8]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [9]:
learn = create_cnn(data, models.resnet18, metrics=accuracy)

In [10]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 09:18
epoch  train_loss  valid_loss  accuracy
1      2.200201    1.770271    0.387200  (00:10)
2      1.873866    1.563576    0.458600  (00:09)
3      1.660982    1.422434    0.501100  (00:09)
4      1.505127    1.311411    0.533300  (00:09)
5      1.369722    1.225595    0.563500  (00:08)
6      1.275731    1.140750    0.590400  (00:09)
7      1.194634    1.080315    0.621100  (00:09)
8      1.123352    1.019682    0.642200  (00:09)
9      1.075948    0.969278    0.659500  (00:09)
10     1.027185    0.938903    0.669800  (00:09)
11     0.992491    0.893126    0.683300  (00:09)
12     0.959926    0.868093    0.695300  (00:09)
13     0.929484    0.841902    0.702600  (00:09)
14     0.898935    0.824227    0.708000  (00:09)
15     0.887263    0.809403    0.716600  (00:09)
16     0.858852    0.788126    0.719500  (00:09)
17     0.845001    0.776555    0.729900  (00:09)
18     0.824738    0.769374    0.728700  (00:09)
19     0.820285    0.766737    0.728100  (00:09)
20     0.80

## ResNet 18 in FP16

In [11]:
bs = 512
np.random.seed(42)

In [12]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [13]:
learn = create_cnn(data, models.resnet18, metrics=accuracy).to_fp16()

In [14]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 09:05
epoch  train_loss  valid_loss  accuracy
1      2.218122    1.786876    0.378600  (00:10)
2      1.882517    1.572523    0.451000  (00:08)
3      1.666607    1.420917    0.499100  (00:08)
4      1.501447    1.308564    0.533800  (00:09)
5      1.381089    1.221739    0.564000  (00:09)
6      1.273666    1.147675    0.587600  (00:09)
7      1.190675    1.079463    0.616000  (00:09)
8      1.128813    1.028087    0.633300  (00:09)
9      1.072469    0.975964    0.654600  (00:09)
10     1.024608    0.937199    0.668500  (00:08)
11     0.984324    0.899542    0.684700  (00:09)
12     0.949098    0.871396    0.689500  (00:09)
13     0.929929    0.849434    0.700000  (00:08)
14     0.899533    0.828132    0.710900  (00:09)
15     0.880750    0.807545    0.716400  (00:09)
16     0.859833    0.804393    0.715800  (00:08)
17     0.842105    0.775370    0.725900  (00:09)
18     0.830966    0.767316    0.728600  (00:09)
19     0.814234    0.751041    0.733200  (00:09)
20     0.80

## WideResNet_22 in FP32
https://docs.fast.ai/vision.models.html

In [15]:
bs = 512
np.random.seed(42)

In [16]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [17]:
learn = Learner(data, wrn_22(), metrics=accuracy)

In [18]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 45:37
epoch  train_loss  valid_loss  accuracy
1      1.536300    1.470832    0.462300  (00:52)
2      1.160262    1.107029    0.602500  (00:47)
3      0.928368    0.914776    0.679100  (00:47)
4      0.760256    0.931047    0.685600  (00:47)
5      0.652168    1.042342    0.677400  (00:47)
6      0.568983    0.951829    0.694600  (00:46)
7      0.507437    1.096947    0.672100  (00:45)
8      0.472213    1.027268    0.697300  (00:45)
9      0.430894    0.765118    0.775200  (00:45)
10     0.397878    0.559144    0.819000  (00:45)
11     0.378650    0.499458    0.839700  (00:45)
12     0.349510    0.528496    0.829800  (00:45)
13     0.331769    0.611600    0.810800  (00:45)
14     0.315343    0.511550    0.840100  (00:45)
15     0.289794    0.543875    0.838100  (00:45)
16     0.281369    0.585915    0.826900  (00:45)
17     0.268900    0.399356    0.877100  (00:45)
18     0.253308    0.520942    0.848000  (00:45)
19     0.226497    0.461053    0.867800  (00:45)
20     0.21

## WideResNet_22 in FP16 (Mixed-Precision)
https://docs.fast.ai/vision.models.html

In [19]:
bs = 768

In [20]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [21]:
learn = Learner(data, wrn_22(), metrics=accuracy).to_fp16()

In [22]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 40:45
epoch  train_loss  valid_loss  accuracy
1      1.688091    1.504960    0.440300  (00:47)
2      1.332519    1.226433    0.563000  (00:40)
3      1.078594    1.116559    0.606400  (00:40)
4      0.899793    0.842620    0.706200  (00:40)
5      0.755262    0.869375    0.697700  (00:40)
6      0.649841    1.110025    0.670800  (00:40)
7      0.575567    0.794060    0.738400  (00:40)
8      0.513926    0.790122    0.742900  (00:40)
9      0.471724    0.787445    0.752700  (00:40)
10     0.430403    1.031972    0.711800  (00:40)
11     0.405307    0.680322    0.779800  (00:40)
12     0.379110    0.650501    0.789000  (00:40)
13     0.355727    0.721022    0.780000  (00:40)
14     0.334592    0.677938    0.788400  (00:40)
15     0.321955    0.880525    0.743300  (00:40)
16     0.306603    0.427979    0.860000  (00:40)
17     0.283275    0.987854    0.756600  (00:40)
18     0.271497    0.671504    0.802000  (00:40)
19     0.260273    0.483802    0.850600  (00:40)
20     0.23

## ResNet 34 in FP32


In [23]:
bs = 256
np.random.seed(42)

In [24]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [25]:
learn = create_cnn(data, models.resnet34, metrics=accuracy)

In [26]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 10:14
epoch  train_loss  valid_loss  accuracy
1      1.981860    1.655630    0.442400  (00:10)
2      1.652082    1.412889    0.509100  (00:09)
3      1.430169    1.261970    0.555100  (00:09)
4      1.278686    1.132325    0.596700  (00:09)
5      1.158788    1.041950    0.629000  (00:09)
6      1.075928    0.964396    0.654400  (00:09)
7      1.000220    0.895359    0.682300  (00:09)
8      0.948396    0.856373    0.694500  (00:10)
9      0.891566    0.813588    0.711600  (00:10)
10     0.862627    0.788951    0.717100  (00:10)
11     0.847679    0.746719    0.737100  (00:10)
12     0.815115    0.730587    0.739800  (00:09)
13     0.784083    0.711565    0.746500  (00:10)
14     0.778181    0.703461    0.748900  (00:10)
15     0.766937    0.679527    0.758100  (00:10)
16     0.741391    0.669256    0.766900  (00:10)
17     0.742755    0.666908    0.762600  (00:10)
18     0.723425    0.661421    0.766000  (00:10)
19     0.703250    0.652840    0.763500  (00:10)
20     0.69

## ResNet 34 in FP16


In [27]:
bs = 512
np.random.seed(42)

In [28]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [30]:
learn = create_cnn(data, models.resnet34, metrics=accuracy).to_fp16()

In [31]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 09:56
epoch  train_loss  valid_loss  accuracy
1      2.193574    1.740933    0.415300  (00:10)
2      1.825156    1.493526    0.487800  (00:09)
3      1.599781    1.338490    0.529700  (00:09)
4      1.423133    1.218901    0.567200  (00:09)
5      1.291676    1.119297    0.598700  (00:09)
6      1.188651    1.045843    0.623500  (00:09)
7      1.106800    0.980501    0.647000  (00:09)
8      1.039319    0.919941    0.674200  (00:09)
9      0.979433    0.885615    0.683700  (00:09)
10     0.930135    0.827495    0.705400  (00:09)
11     0.896852    0.812888    0.713000  (00:09)
12     0.860437    0.773284    0.725600  (00:09)
13     0.838502    0.754553    0.731600  (00:09)
14     0.812123    0.740891    0.736000  (00:09)
15     0.785818    0.719981    0.743400  (00:09)
16     0.771483    0.701794    0.752300  (00:09)
17     0.755891    0.699555    0.755300  (00:09)
18     0.740510    0.688925    0.755900  (00:09)
19     0.719554    0.672567    0.759800  (00:09)
20     0.71

## ResNet 50 in FP32

In [33]:
bs = 256
np.random.seed(42)

In [34]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [35]:
learn = create_cnn(data, models.resnet50, metrics=accuracy)

In [36]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 16:08
epoch  train_loss  valid_loss  accuracy
1      1.790080    1.484671    0.486300  (00:18)
2      1.490789    1.274267    0.553500  (00:14)
3      1.292323    1.119700    0.604700  (00:14)
4      1.127707    1.001376    0.647400  (00:14)
5      1.034666    0.918210    0.677500  (00:15)
6      0.933903    0.834426    0.703900  (00:15)
7      0.858165    0.761041    0.729400  (00:16)
8      0.802099    0.727623    0.743000  (00:16)
9      0.748941    0.664229    0.766900  (00:16)
10     0.711753    0.632384    0.782000  (00:16)
11     0.673642    0.618663    0.784600  (00:16)
12     0.644519    0.594132    0.793200  (00:16)
13     0.635958    0.575863    0.800900  (00:16)
14     0.606586    0.577193    0.799000  (00:16)
15     0.579893    0.549878    0.805000  (00:16)
16     0.582904    0.536953    0.810500  (00:16)
17     0.567764    0.529449    0.817000  (00:16)
18     0.550154    0.524807    0.819600  (00:16)
19     0.543873    0.514189    0.820600  (00:16)
20     0.53

## ResNet 50 in FP16

In [37]:
bs = 512
np.random.seed(42)

In [38]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [39]:
learn = create_cnn(data, models.resnet50, metrics=accuracy).to_fp16()

In [40]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 14:20
epoch  train_loss  valid_loss  accuracy
1      1.925760    1.526584    0.473300  (00:20)
2      1.629402    1.341644    0.532100  (00:13)
3      1.422359    1.204565    0.577500  (00:14)
4      1.269420    1.086304    0.619700  (00:14)
5      1.149365    1.004452    0.648000  (00:14)
6      1.052686    0.918193    0.671800  (00:14)
7      0.962639    0.849272    0.698300  (00:14)
8      0.895580    0.784770    0.719000  (00:14)
9      0.832468    0.751411    0.733900  (00:14)
10     0.786680    0.707686    0.753300  (00:14)
11     0.737892    0.672041    0.765900  (00:14)
12     0.698360    0.644888    0.776900  (00:14)
13     0.675478    0.624919    0.785100  (00:14)
14     0.648885    0.606992    0.790200  (00:14)
15     0.629905    0.596147    0.795700  (00:14)
16     0.603017    0.574590    0.803500  (00:14)
17     0.592635    0.547783    0.810100  (00:14)
18     0.578149    0.551960    0.811800  (00:14)
19     0.563611    0.544691    0.811600  (00:14)
20     0.54

## ResNet 101 in FP32

In [41]:
bs = 256
np.random.seed(42)

In [42]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [43]:
learn = create_cnn(data, models.resnet101, metrics=accuracy)

In [44]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 24:01
epoch  train_loss  valid_loss  accuracy
1      1.767628    1.496406    0.477800  (00:24)
2      1.470542    1.274939    0.555500  (00:25)
3      1.262356    1.126148    0.604800  (00:24)
4      1.124002    1.010184    0.650700  (00:24)
5      1.014872    0.897323    0.686500  (00:24)
6      0.919713    0.830348    0.708700  (00:23)
7      0.847648    0.774356    0.729700  (00:23)
8      0.768236    0.709311    0.756100  (00:23)
9      0.730965    0.667238    0.770800  (00:23)
10     0.688476    0.645233    0.775000  (00:23)
11     0.662864    0.612320    0.788600  (00:23)
12     0.628203    0.610274    0.786600  (00:23)
13     0.607273    0.582524    0.800000  (00:23)
14     0.591641    0.557656    0.807100  (00:23)
15     0.569198    0.561227    0.807400  (00:23)
16     0.561833    0.545152    0.810100  (00:23)
17     0.554600    0.531829    0.817800  (00:23)
18     0.541353    0.551791    0.807200  (00:23)
19     0.538480    0.533611    0.814700  (00:23)
20     0.51

## ResNet 101 in FP16

In [45]:
bs = 512
np.random.seed(42)

In [46]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [47]:
learn = create_cnn(data, models.resnet101, metrics=accuracy).to_fp16()

In [48]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 20:59
epoch  train_loss  valid_loss  accuracy
1      1.948015    1.562223    0.460400  (00:20)
2      1.631433    1.358770    0.525600  (00:20)
3      1.414180    1.210732    0.574700  (00:21)
4      1.254014    1.088459    0.617500  (00:21)
5      1.136890    1.005783    0.645800  (00:21)
6      1.040242    0.928468    0.674000  (00:21)
7      0.953884    0.849812    0.702400  (00:21)
8      0.883903    0.791834    0.723100  (00:21)
9      0.815515    0.750282    0.743000  (00:20)
10     0.765203    0.704044    0.758200  (00:20)
11     0.726844    0.673116    0.768900  (00:20)
12     0.692003    0.643419    0.776500  (00:21)
13     0.659547    0.611102    0.787600  (00:21)
14     0.635328    0.607005    0.792900  (00:20)
15     0.608404    0.589703    0.797500  (00:20)
16     0.596698    0.572484    0.802400  (00:20)
17     0.580117    0.556514    0.808700  (00:20)
18     0.563573    0.555280    0.814300  (00:20)
19     0.551752    0.538028    0.814200  (00:20)
20     0.53

## ResNet 152 in FP32

In [49]:
bs = 256
np.random.seed(42)

In [50]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [52]:
learn = create_cnn(data, models.resnet152, metrics=accuracy)

In [53]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 31:50
epoch  train_loss  valid_loss  accuracy
1      1.795876    1.442577    0.502500  (00:30)
2      1.434135    1.211324    0.574500  (00:30)
3      1.219598    1.054491    0.627500  (00:31)
4      1.065201    0.925101    0.672400  (00:32)
5      0.947252    0.835237    0.705300  (00:32)
6      0.852417    0.769818    0.731300  (00:32)
7      0.779233    0.701564    0.758700  (00:32)
8      0.718954    0.643932    0.768700  (00:32)
9      0.679028    0.611182    0.786300  (00:31)
10     0.643206    0.726441    0.786100  (00:31)
11     0.616436    0.553895    0.808500  (00:31)
12     0.599687    0.566333    0.802800  (00:31)
13     0.569029    0.539677    0.814800  (00:31)
14     0.541109    0.512112    0.825000  (00:31)
15     0.535076    0.500172    0.824700  (00:32)
16     0.525116    0.509861    0.819600  (00:31)
17     0.514518    0.501379    0.826300  (00:31)
18     0.499004    0.497418    0.826500  (00:32)
19     0.488863    0.494906    0.829700  (00:31)
20     0.48

## ResNet 152 in FP16

In [54]:
bs = 512
np.random.seed(42)

In [55]:
ds_tfms = ([*rand_pad(4, 32), flip_lr(p=0.5)], [])
data = ImageDataBunch.from_folder(path, valid='test', ds_tfms=ds_tfms, bs=bs).normalize(cifar_stats)

In [56]:
learn = create_cnn(data, models.resnet152, metrics=accuracy).to_fp16()

In [57]:
with gpu_mem_restore_ctx():
    learn.fit_one_cycle(60)

Total time: 25:58
epoch  train_loss  valid_loss  accuracy
1      1.911248    1.496594    0.478800  (00:25)
2      1.581300    1.275785    0.554700  (00:26)
3      1.357416    1.121515    0.609300  (00:26)
4      1.191497    1.019990    0.641000  (00:26)
5      1.073234    0.926011    0.674500  (00:25)
6      0.966677    0.839877    0.704300  (00:25)
7      0.880152    0.776991    0.724100  (00:26)
8      0.807180    0.717119    0.747800  (00:26)
9      0.744560    0.680570    0.759500  (00:26)
10     0.704918    0.644767    0.775300  (00:25)
11     0.661834    0.615626    0.786200  (00:26)
12     0.630952    0.573346    0.800500  (00:26)
13     0.600699    0.559540    0.808100  (00:26)
14     0.579632    0.558865    0.808900  (00:25)
15     0.557303    0.549953    0.810800  (00:26)
16     0.535317    0.527162    0.816700  (00:26)
17     0.523816    0.500287    0.826800  (00:26)
18     0.508728    0.520647    0.824200  (00:26)
19     0.495821    0.510365    0.826000  (00:26)
20     0.48