<a href="https://colab.research.google.com/github/Davilirio/Neural-Nets/blob/master/resnets_scratch_fastai.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Writing resnets and densenets in pytorch

In [None]:
from fastai.vision import *

In [None]:
path = untar_data(URLs.MNIST)

In [None]:
path.ls()

In [None]:
# creating imagelist from the path above,
# converting data into PIL L (grayscale) images.
il = ImageList.from_folder(path, convert_mode='L')

In [None]:
# matplotlib reverse binary color map
# (I was researching colormaps documentation)
defaults.cmap='binary_r'

In [None]:
il[0].show()

In [None]:
# in fastai v1 you have to explicitly say where are the data for validation
sd = il.split_by_folder(train='training',valid='testing')

In [None]:
lbl_list = sd.label_from_folder()
lbl_list

In [None]:
# we can enter the datablock api and see each item inside
x, y = lbl_list.train[0]
y, x.show()

In [None]:
# writing transforms:
# since the data is all black, we use zero in padding as its 1 channel and black
# size of the data will be 28, and the second list is for valid tfms, NONE
# * means get both things rand_pad returns

tfms = ([*rand_pad(padding=3, size=28, mode='zeros')],[])

In [None]:
# defining the batch size for our problem
bs = 128

# applying the transforms, creating databunch and normalizing data 
data = (lbl_list.transform(tfms)
                .databunch(bs=bs)
                .normalize())

In [None]:
# we can enter any dataset:
x, y = data.train_ds[0]
y, x.show()

In [None]:
data.show_batch(rows=4, figsize=(6, 6))

In [None]:
data.c

### Basic CNN with batch normzalization

In [None]:
# ni = number of inputs
# nf = number of features
def conv(ni, nf): return nn.Conv2d(ni, nf, kernel_size=3, stride=2, padding=1)

model = nn.Sequential(
    conv(1, 8), # size=14
    nn.BatchNorm2d(8),
    nn.ReLU(),
    conv(8, 16), # size=7
    nn.BatchNorm2d(16),
    nn.ReLU(),
    conv(16, 32), # size=4
    nn.BatchNorm2d(32),
    nn.ReLU(),
    conv(32, 16), # size=2
    nn.BatchNorm2d(16),
    nn.ReLU(),
    conv(16, 10), # size= 1
    nn.BatchNorm2d(10),
    Flatten() # vectorizes the (10, 1, 1) rank 3 tensor to create the pred
) 


In [None]:
# defining a fastai learner with this model

learner = Learner(data, model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [None]:
learner.lr_find()

In [None]:
learner.recorder.plot(end_lr=100)

In [None]:
learner.fit_one_cycle(3,max_lr=0.1)

In [None]:
learner.fit_one_cycle()

#### Refactoring

In [None]:
# conv_layer() creates a conv2d, reLU, BN sequence

def conv2(ni, nf): return conv_layer(ni, nf, stride=2)

model = nn.Sequential(
    conv2(1, 8), # size=14
    conv2(8, 16), # size=7
    conv2(16,32), # size=4
    conv2(32, 16), # size=2
    conv2(16, 10), # size=1
    Flatten()
)

In [None]:
learner = Learner(data, model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [None]:
learner.summary()

In [None]:
learner.fit_one_cycle(6,max_lr=0.1)

### Kind of ResNet-ish

In [None]:
# creating a module that contains residue at the end
class ResBlock(nn.Module):
  # defining the self atributtes of the block
  def __init__(self, nf):
    # super init to overwrite our own module
    super(ResBlock, self).__init__()
    # defining our conv_layers that take number of features as inputs
    self.conv1 = conv_layer(nf, nf)
    self.conv2 = conv_layer(nf, nf)
  
  # foward takes the attributes and the input
  def forward(self, input):
    # returns the input (residue) and the activations from passing 2 conv_layers
    return input + self.conv2(self.conv1(input))

# create a simple function just for testing
def resblck(nf): return ResBlock(nf)

In [None]:
# creating a model using the residual learning resblock
# just paste the model above and add resblocks between layers

model = nn.Sequential(
    conv2(1, 8), # size=14
    resblck(8),
    conv2(8, 16), # size=7
    resblck(16),
    conv2(16,32), # size=4
    resblck(32),
    conv2(32, 16), # size=2
    resblck(16),
    conv2(16, 10), # size=1
    Flatten()
)

In [None]:
learn = Learner(data, model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(12, max_lr=0.2)

#### Refactoring

In [None]:
def res_conv(ni, nf): return nn.Sequential(conv2(ni, nf), resblck(nf))

In [None]:
model = nn.Sequential(
    res_conv(1,8), # size 14
    res_conv(8,16), # size 7
    res_conv(16,32), # size 4
    res_conv(32,16), # size 2
    res_conv(16,data.c), # size 1
    Flatten() # vectorizes the (10,1,1) rank 3 tensor
)

In [None]:
learn = Learner(data, model, loss_func=nn.CrossEntropyLoss(), metrics=accuracy)

In [None]:
learn.summary()

In [None]:
learn.lr_find()
learn.recorder.plot()

In [None]:
learn.fit_one_cycle(12, max_lr=0.2)

In [None]:
# writing in fastai module sequential extended
model_fst = SequentialEx(res_conv(1,8),
                         res_conv(8,16),
                         res_conv(16, 32),
                         res_conv(32, 16),
                         res_conv(16, 10),
                         Flatten())

In [None]:
xb, yb = data.one_batch()
xb.shape, yb.shape

In [None]:
# outputs in expected size, so modelling was correct
model_fst(xb).shape

### DenseNet-ish

In [None]:
# a densenet differs from a resnet mostly in the way the skip connection
# interacts with the activations. Instead of being in sum format, it is 
# in concat format.

# creating a module that contains residue at the end (concatenated)
class DenseBlock(nn.Module):
  # defining the self atributtes of the block
  def __init__(self, nf):
    # super init to overwrite our own module
    super(DenseBlock, self).__init__()
    # defining our conv_layers that take number of features as inputs
    self.conv1 = conv_layer(nf, nf)
    self.conv2 = conv_layer(nf, nf)
  
  # foward takes the attributes and the input
  def forward(self, input):
    activations = self.conv2(self.conv1(input))
    # returns the input (residue) and the activations from passing 2 conv_layers
    return torch.cat((activations, input), dim=1)

## OBS: AS THE OUTPUTS OF A CONV LAYER DIFFER, THE SIZES OF INPUTS TO THE NEXT
## LAYER MUST BE CHANGED

#### DenseNet in Sequential Extended


In [None]:
# creating func that has torch.cat in the merging process
def denselayer(ni, nf): return nn.Sequential(conv_layer(ni, nf),
                                             conv_layer(nf, nf),
                                             conv_layer(nf, nf),
                                             MergeLayer(dense=True))

In [None]:
## creating model with the simplified func above
## errors to be corrected here!
model = SequentialEx(
    denselayer(1, 8), # sz 14
    denselayer(8, 16), # sz 7
    denselayer(16, 32), # sz 4
    denselayer(32, 16), # sz 2
    denselayer(16, 10), # sz 1
    Flatten()
)