In [1]:
#hide
!pip install -Uqq fastbook
import fastbook
fastbook.setup_book()

[K     |████████████████████████████████| 727kB 5.2MB/s 
[K     |████████████████████████████████| 51kB 5.1MB/s 
[K     |████████████████████████████████| 194kB 8.2MB/s 
[K     |████████████████████████████████| 1.2MB 7.5MB/s 
[K     |████████████████████████████████| 61kB 6.5MB/s 
[?25hMounted at /content/gdrive


In [2]:
#hide
from fastbook import *

# Image Classification

## From Dogs and Cats to Pet Breeds

In [3]:
from fastai.vision.all import *
path = untar_data(URLs.PETS)

### memo    
untar_data() downloads data files to Colab Session Storage.  
Is gone after instance in close on Colab cloud.  
Unix "tar" command, compressed and uncompressed data files.  

https://www.robots.ox.ac.uk/~vgg/data/pets/  
Oxford Univ Geometry Group and Indian Inst I & Tech.  
Curated data on dogs and cats breeds.  Some are confusing even for humans to identify.  
Manually download to repo on C drive.  Move to Ubuntu and untar (uncompress).  Explore.  

In [4]:
path?


Type:        PosixPath
String form: /root/.fastai/data/oxford-iiit-pet
File:        /usr/lib/python3.6/pathlib.py
Docstring:  
Path subclass for non-Windows systems.

#### memo: 
pathlib.py symbolically manipulates drive path.  
path.parts() - separates parts out for Winows or Unix OS.  
```
>>> p = PurePath('/usr/bin/python3')
>>> p.parts
('/', 'usr', 'bin', 'python3')

>>> p = PureWindowsPath('c:/Program Files/PSF')
>>> p.parts
('c:\\', 'Program Files', 'PSF')

Usage: 
from pathlib import Path 
>>> p = Path('.')  # current directory
>>> [x for x in p.iterdir() if x.is_dir()]  # lists all directories in tree. 
```

In [5]:
#hide
Path.BASE_PATH = path

In [6]:
path.ls()

(#2) [Path('images'),Path('annotations')]

In [7]:
(path/"images").ls()

(#7393) [Path('images/saint_bernard_118.jpg'),Path('images/saint_bernard_31.jpg'),Path('images/Bombay_188.jpg'),Path('images/japanese_chin_82.jpg'),Path('images/Sphynx_90.jpg'),Path('images/Persian_262.jpg'),Path('images/pug_139.jpg'),Path('images/keeshond_120.jpg'),Path('images/keeshond_146.jpg'),Path('images/english_setter_78.jpg')...]

In [11]:
Path.BASE_PATH?

In [None]:
fname = (path/"images").ls()[0]

In [None]:
re.findall(r'(.+)_\d+.jpg$', fname.name)

In [None]:
pets = DataBlock(blocks = (ImageBlock, CategoryBlock),
                 get_items=get_image_files, 
                 splitter=RandomSplitter(seed=42),
                 get_y=using_attr(RegexLabeller(r'(.+)_\d+.jpg$'), 'name'),
                 item_tfms=Resize(460),
                 batch_tfms=aug_transforms(size=224, min_scale=0.75))
dls = pets.dataloaders(path/"images")

## Presizing

In [None]:
dblock1 = DataBlock(blocks=(ImageBlock(), CategoryBlock()),
                   get_y=parent_label,
                   item_tfms=Resize(460))
dls1 = dblock1.dataloaders([(Path.cwd()/'images'/'grizzly.jpg')]*100, bs=8)
dls1.train.get_idxs = lambda: Inf.ones
x,y = dls1.valid.one_batch()
_,axs = subplots(1, 2)

x1 = TensorImage(x.clone())
x1 = x1.affine_coord(sz=224)
x1 = x1.rotate(draw=30, p=1.)
x1 = x1.zoom(draw=1.2, p=1.)
x1 = x1.warp(draw_x=-0.2, draw_y=0.2, p=1.)

tfms = setup_aug_tfms([Rotate(draw=30, p=1, size=224), Zoom(draw=1.2, p=1., size=224),
                       Warp(draw_x=-0.2, draw_y=0.2, p=1., size=224)])
x = Pipeline(tfms)(x)
#x.affine_coord(coord_tfm=coord_tfm, sz=size, mode=mode, pad_mode=pad_mode)
TensorImage(x[0]).show(ctx=axs[0])
TensorImage(x1[0]).show(ctx=axs[1]);

### Checking and Debugging a DataBlock

In [None]:
dls.show_batch(nrows=1, ncols=3)

In [None]:
pets1 = DataBlock(blocks = (ImageBlock, CategoryBlock),
                 get_items=get_image_files, 
                 splitter=RandomSplitter(seed=42),
                 get_y=using_attr(RegexLabeller(r'(.+)_\d+.jpg$'), 'name'))
pets1.summary(path/"images")

In [None]:
learn = cnn_learner(dls, resnet34, metrics=error_rate)
learn.fine_tune(2)

## Cross-Entropy Loss

### Viewing Activations and Labels

In [None]:
x,y = dls.one_batch()

In [None]:
y

In [None]:
preds,_ = learn.get_preds(dl=[(x,y)])
preds[0]

In [None]:
len(preds[0]),preds[0].sum()

### Softmax

In [None]:
plot_function(torch.sigmoid, min=-4,max=4)

In [None]:
#hide
torch.random.manual_seed(42);

In [None]:
acts = torch.randn((6,2))*2
acts

In [None]:
acts.sigmoid()

In [None]:
(acts[:,0]-acts[:,1]).sigmoid()

In [None]:
sm_acts = torch.softmax(acts, dim=1)
sm_acts

### Log Likelihood

In [None]:
targ = tensor([0,1,0,1,1,0])

In [None]:
sm_acts

In [None]:
idx = range(6)
sm_acts[idx, targ]

In [None]:
from IPython.display import HTML
df = pd.DataFrame(sm_acts, columns=["3","7"])
df['targ'] = targ
df['idx'] = idx
df['loss'] = sm_acts[range(6), targ]
t = df.style.hide_index()
#To have html code compatible with our script
html = t._repr_html_().split('</style>')[1]
html = re.sub(r'<table id="([^"]+)"\s*>', r'<table >', html)
display(HTML(html))

In [None]:
-sm_acts[idx, targ]

In [None]:
F.nll_loss(sm_acts, targ, reduction='none')

### Taking the Log

In [None]:
plot_function(torch.log, min=0,max=4)

In [None]:
loss_func = nn.CrossEntropyLoss()

In [None]:
loss_func(acts, targ)

In [None]:
F.cross_entropy(acts, targ)

In [None]:
nn.CrossEntropyLoss(reduction='none')(acts, targ)

## Model Interpretation

In [None]:
interp = ClassificationInterpretation.from_learner(learn)
interp.plot_confusion_matrix(figsize=(12,12), dpi=60)

In [None]:
interp.most_confused(min_val=5)

## Improving Our Model

### The Learning Rate Finder

In [None]:
learn = cnn_learner(dls, resnet34, metrics=error_rate)
learn.fine_tune(1, base_lr=0.1)

In [None]:
learn = cnn_learner(dls, resnet34, metrics=error_rate)
lr_min,lr_steep = learn.lr_find()

In [None]:
print(f"Minimum/10: {lr_min:.2e}, steepest point: {lr_steep:.2e}")

In [None]:
learn = cnn_learner(dls, resnet34, metrics=error_rate)
learn.fine_tune(2, base_lr=3e-3)

### Unfreezing and Transfer Learning

In [None]:
learn.fine_tune??

In [None]:
learn = cnn_learner(dls, resnet34, metrics=error_rate)
learn.fit_one_cycle(3, 3e-3)

In [None]:
learn.unfreeze()

In [None]:
learn.lr_find()

In [None]:
learn.fit_one_cycle(6, lr_max=1e-5)

### Discriminative Learning Rates

In [None]:
learn = cnn_learner(dls, resnet34, metrics=error_rate)
learn.fit_one_cycle(3, 3e-3)
learn.unfreeze()
learn.fit_one_cycle(12, lr_max=slice(1e-6,1e-4))

In [None]:
learn.recorder.plot_loss()

### Selecting the Number of Epochs

### Deeper Architectures

In [None]:
from fastai.callback.fp16 import *
learn = cnn_learner(dls, resnet50, metrics=error_rate).to_fp16()
learn.fine_tune(6, freeze_epochs=3)

## Conclusion

## Questionnaire

1. Why do we first resize to a large size on the CPU, and then to a smaller size on the GPU?
   * ans: to get squares. PyTorch uses square images.  
1. If you are not familiar with regular expressions, find a regular expression tutorial, and some problem sets, and complete them. Have a look on the book's website for suggestions.
   * ans: do more exercises.  re.py
1. What are the two ways in which data is most commonly provided, for most deep learning datasets?
   * ans: individual files where each file is a data item, such as images, with file names perhaps indicate organization.  
     Tabular format (csv) where each row is a data item, with possible names associated with image or document files.  
     Can also be binary format files, for large dump, such as medical imaging data.  
1. Look up the documentation for `L` and try using a few of the new methods is that it adds.
   * ans: L is a fastai function, builds on PyTorch nn.convolutions object? 
1. Look up the documentation for the Python `pathlib` module and try using a few methods of the `Path` class.
   * ans: symbolic directory path manipulation. https://docs.python.org/3/library/pathlib.html
   
1. Give two examples of ways that image transformations can degrade the quality of the data.
   * ans: interpolated image can become fuzzy, can have unrelated artifacts (parts of image missing or taken over by other objects)
1. What method does fastai provide to view the data in a `DataLoaders`?
   * ans: ?
1. What method does fastai provide to help you debug a `DataBlock`?
   * ans: stepping through, debug. 
1. Should you hold off on training a model until you have thoroughly cleaned your data? 
   * ans: No, train as soon as possible.  Use trained model to look for error in data. Exp. unique()
1. What are the two pieces that are combined into cross-entropy loss in PyTorch?
   * ans: 
1. What are the two properties of activations that softmax ensures? Why is this important?
   * ans: vanishing weights, so later layers will continue to improve learning. 
   * ? 
1. When might you want your activations to not have these two properties?
1. Calculate the `exp` and `softmax` columns of <<bear_softmax>> yourself (i.e., in a spreadsheet, with a calculator, or in a notebook). 
   * Later -- do. 
1. Why can't we use `torch.where` to create a loss function for datasets where our label can have more than two categories?
   * ans: We can, but use one-hot encoding to specifiy multiclass (dummy) variables. 
1. What is the value of log(-2)? Why?
   * ans: 2e^(i*pi) in Complex Number space, but undefined in Real Number space. 
1. What are two good rules of thumb for picking a learning rate from the learning rate finder? 
   * ans: 1/c ?  Hightest slope decline in accuracy.
1. What two steps does the `fine_tune` method do?
   * ans: trains the randomly added final layer for one epoch 
        Unfreeze all layers and train all of them for the N epoches requested. 
1. In Jupyter Notebook, how do you get the source code for a method or function?
   * ans: ??  function??  
1. What are discriminative learning rates?
   * ans: Use varying rate in layers, depending on user data concordance with trained model data. Usually earier layers train on primative shapes and can be readily transferred to user's data, but later layers learn complex shapes that does not transfer well to user's untrained data. 
1. How is a Python `slice` object interpreted when passed as a learning rate to fastai?
   * ans: start number, end number, interpolate between with geometric growth. learning rate starts low at initial layer, that is already well trained, and gets high towards final layer that has not been trained (our data).
1. Why is early stopping a poor choice when using 1cycle training? 
   * ans: final random layer has not had enough epoches to get accurate.  
1. What is the difference between `resnet50` and `resnet101`?  
   * ans: more layers. pre-trained models for imagenet database is available for standard number of layers. restnet18 and resnet34 are smaller and good to start with. larger ones are good for trying to improve accuracy. 
1. What does `to_fp16` do?  
   * ans: reduce byte size to floating point 16bit precision, rounds numbers, reduce memory usage.

#### Regular Expression  
re.search()  
re.sub  #substitutes  
re.finall  
re.match  
* one or more match  
? one or zero match  
https://learnbyexample.github.io/python-regex-cheatsheet/   

### Further Research

1. Find the paper by Leslie Smith that introduced the learning rate finder, and read it.
1. See if you can improve the accuracy of the classifier in this chapter. What's the best accuracy you can achieve? Look on the forums and the book's website to see what other students have achieved with this dataset, and how they did it.