In [1]:
%%capture
!pip install -Uqq fastai gradio
!apt-get install tree

In [2]:
from fastai.vision.all import *
from fastai import *
from torch import nn, optim
import torch

from IPython.display import display, YouTubeVideo

In [3]:
SIZE = {'width':1000, 'height': 600}

In [4]:
path = untar_data(URLs.PETS)

In [5]:
list(path.ls())

In [6]:
!tree {str(path/'images')} | grep .jpg | shuf -n 10

In [7]:
def label_func(filename):
    return '_'.join(filename.split('_')[:-1])

In [8]:
dls = ImageDataLoaders.from_name_func(
    path=path,
    fnames=get_image_files(path/'images'),
    label_func=label_func,
    valid_pct=0.2,
    item_tfms=Resize(224),
    batch_tfms=Normalize.from_stats(*imagenet_stats),
)

In [9]:
imagenet_stats

In [10]:
dls.show_batch()

In [11]:
vid = YouTubeVideo('RYth6EbBUqM', **SIZE)
display(vid)

In [12]:
cbs = [
    EarlyStoppingCallback(patience=3)
]

learn = cnn_learner( # Instantiate a learner object
    dls, # Pass the dataloaders
    models.resnet34, # Specify the architechture you want to use
    pretrained=False, # Tell it NOT to fetch the ImageNet weights
    cbs=cbs, # Pass the callbacks
    metrics=[accuracy, error_rate] # Pass the metrics you want to see
)

In [13]:
print(f"""
There are {learn.dls.c} categories.
If we guessed randomly, we would have aboiut a {100. * 1/learn.dls.c:.02f}% chance of guessing correctly.
""")

In [14]:
learn.fit_one_cycle(5, 1e-3)

In [15]:
cbs = [
    EarlyStoppingCallback(patience=3)
]

learn = cnn_learner( # Instantiate a learner object
    dls, # Pass the dataloaders
    models.resnet34, # Specify the architechture you want to use
    pretrained=True, # Tell it to fetch the ImageNet weights
    cbs=cbs, # Pass the callbacks
    metrics=[accuracy, error_rate] # Pass the metrics you want to see
)

In [16]:
learn.model[1]

In [17]:
LR = 1e-3

In [18]:
# Train the classifier head
learn.fit_one_cycle(1, LR)

In [19]:
# Unfreeze the backbone
learn.unfreeze()

In [20]:
# Trainin everything together
learn.fit_one_cycle(5, lr_max=slice(LR/10, LR))

In [21]:
cbs = [
    EarlyStoppingCallback(patience=3)
]

learn = cnn_learner( # Instantiate a learner object
    dls, # Pass the dataloaders
    resnet34, # Specify the architechture you want to use
    pretrained=True, # Tell it to fetch the ImageNet weights
    # loss_func=nn.CrossEntropyLoss(), # Pass the loss function
    cbs=cbs, # Pass the callbacks
    metrics=[accuracy, error_rate] # Pass the metrics you want to see
)

In [22]:
learn.fine_tune(5, base_lr=LR)

In [23]:
import gradio as gr

In [24]:
# grab one image from our training dataset
img, cat = dls.train_ds[0]
print(cat)
img

In [25]:
breed, cat_id, logits = learn.predict(img)
print(breed, cat_id, sep='\n')
print(logits.sum())

In [26]:
def recognize_pet(img):
    breed, cat_id, logits = learn.predict(img)
    proba = float(logits.softmax(dim=0).max())
    return f"""
    Breed: {breed},
    Probability: {proba:.04f}
    """

print(recognize_pet(img))

# Exercise 12.1

Run the block of code below.
Find a few images of pets to upload and observe the inferences made by the model.
Do they work decently well?

<!-- startquestion -->

In [27]:
gr.Interface(recognize_pet, inputs=gr.Image(source='upload'), outputs=gr.Label()).launch(inline=True, share=True)

In [28]:
def conv_block(in_channels, out_channels, stride=1):
    return nn.Sequential(
        nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1, stride=stride),
        nn.PReLU(),
        nn.BatchNorm2d(out_channels),
        nn.Dropout(0.2)
    )

In [29]:
class ResidualConvBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super().__init__()
        # If the channels or output shape are different,
        # we need a projection so the channels match
        self.projection = True if in_channels != out_channels or stride > 1 else False
        # The projection is just a 1x1 convolutional layer that changes
        # the number of channels to match out_channels.
        if self.projection:
            self.proj = nn.Conv2d(
                in_channels,
                out_channels,
                kernel_size=1,
                padding=0,
                stride=stride # If the shape changes, this is one way to downsample spatially.
            )

        # Now just add the easy part of the network
        self.conv1 = conv_block(in_channels, out_channels, stride)
        self.conv2 = conv_block(out_channels, out_channels)
        self.final_act = nn.PReLU()

    def forward(self, x):
        # Declare the identity
        identity = x
        # but if we need to do the projection, project x
        # into a different number of channels.
        if self.projection:
            identity = self.proj(x)
        # Pass the original x through the conv network
        acts = self.conv2(self.conv1(x))
        # Return the identity + the activations of the conv network
        return self.final_act(identity + acts)

In [30]:
fake_batch = torch.randn(2, 32, 28, 28)

In [31]:
res = ResidualConvBlock(32, 32)
res

In [32]:
res(fake_batch).shape

In [33]:
res = ResidualConvBlock(32, 64)
res

In [34]:
res(fake_batch).shape

In [35]:
res = ResidualConvBlock(32, 64, 2)
res

In [36]:
res(fake_batch).shape

In [37]:
def build_model(*args, **kwargs):
    model = nn.Sequential(
        nn.BatchNorm2d(3),
        ResidualConvBlock(3, 16),
        ResidualConvBlock(16,16),
        ResidualConvBlock(16,32,2),
        ResidualConvBlock(32, 64),
        nn.AdaptiveMaxPool2d(1),
        nn.Flatten(),
        nn.Linear(64, 256),
        nn.BatchNorm1d(256),
        nn.ReLU(),
        nn.Dropout(0.2),
        nn.Linear(256, 37)
    )
    return model

In [38]:
learn = cnn_learner(dls, build_model, pretrained=False, metrics=[accuracy, error_rate])

In [39]:
learn.fit_one_cycle(20, lr_max=1e-3)