In [1]:
from comet_ml import Experiment
%matplotlib inline
import matplotlib.pyplot as plt
from fastai.vision import *
import torch
import warnings
from torchsummary import summary
from models.custom_resnet import _resnet, Bottleneck
# torch.cuda.set_device(0)
torch.manual_seed(1)
torch.cuda.manual_seed(1)

# stage should be in 0 to 5 (5 for classifier stage)
hyper_params = {
    "stage": 5,
    "repeated": 0,
    "num_classes": 10,
    "batch_size": 16,
    "num_epochs": 2,
    "learning_rate": 1e-4
}

In [2]:
path = untar_data(URLs.IMAGENETTE)

In [3]:
tfms = get_transforms(do_flip=False)
data = ImageDataBunch.from_folder(path, train = 'train', valid = 'val', bs = hyper_params["batch_size"], size = 224, ds_tfms = tfms).normalize(imagenet_stats)

In [7]:
learn = cnn_learner(data, models.resnet50, metrics = accuracy, pretrained = True)
learn.load('resnet50_imagenette_bs32')
learn.freeze()
# learn.summary()

net = _resnet('resnet50', Bottleneck, [2, 2, 2, 1], pretrained = False, progress = False)

if torch.cuda.is_available() : 
    net = net.cuda()
    print('Model on GPU')
    
# for name, param in net.named_parameters() : 
#     print(name, param.shape)
#     param.requires_grad = False
#     if name[5] == str(hyper_params['stage']) and hyper_params['stage'] != 0 :
#         param.requires_grad = True
#     elif (name[0] == 'b' or name[0] == 'c') and hyper_params['stage'] == 0 : 
#         param.requires_grad = True
#     elif name[0] == 'f' and hyper_params['stage'] == 5 : 
#         param.requires_grad = True
#     print(param.requires_grad)

Model on GPU


In [5]:
net

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [6]:
# x, y = next(iter(data.train_dl))
# net(torch.autograd.Variable(x).cuda())
# summary(net, (3, 224, 224))
# summary(learn.model, (3, 224, 224))
# print(learn.summary())
net.layer1

Sequential(
  (0): Bottleneck(
    (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace)
    (downsample): Sequential(
      (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
  )
  (1): Bottleneck(
    (conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (conv2): Conv2d(64, 64, kernel_

In [5]:
class SaveFeatures :
    def __init__(self, m) : 
        self.handle = m.register_forward_hook(self.hook_fn)
    def hook_fn(self, m, inp, outp) : 
        self.features = outp
    def remove(self) :
        self.handle.remove()
        
mdl = learn.model
del learn

In [6]:
sf = [SaveFeatures(m) for m in [mdl[0][2], mdl[0][4], mdl[0][5], mdl[0][6], mdl[0][7]]]
sf2 = [SaveFeatures(m) for m in [net.relu2, net.layer1, net.layer2, net.layer3, net.layer4]]

### For testing hooks 
# x, y = next(iter(data.train_dl))
# x = torch.autograd.Variable(x).cuda()
# out1 = mdl(x)
# out2 = net(x)
# for i in range(4) : 
#     print('resnet50 : ', sf[i].features.shape)
#     print('custom model : ', sf2[i].features.shape)
# del x, y, sf, sf2
# del out1, out2

#### Stage-wise training

In [8]:
if hyper_params['stage'] == 0 : 
    filename = '../saved_models/large_stage' + str(hyper_params['stage']) + '/model' + str(hyper_params['repeated']) + '.pt'
else : 
    filename = '../saved_models/large_stage' + str(hyper_params['stage'] + 1) + '/model' + str(hyper_params['repeated']) + '.pt'
optimizer = torch.optim.Adam(net.parameters(), lr = hyper_params["learning_rate"])
total_step = len(data.train_ds) // hyper_params["batch_size"]
train_loss_list = list()
val_loss_list = list()
min_val = 100
for epoch in range(hyper_params["num_epochs"]):
    trn = []
    net.train()
    for i, (images, labels) in enumerate(data.train_dl) :
        if torch.cuda.is_available():
            images = torch.autograd.Variable(images).cuda().float()
            labels = torch.autograd.Variable(labels).cuda()
        else : 
            images = torch.autograd.Variable(images).float()
            labels = torch.autograd.Variable(labels)

        y_pred = net(images)
        y_pred2 = mdl(images)

        loss = F.mse_loss(sf2[hyper_params["stage"]].features, sf[hyper_params["stage"]].features)
        trn.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
#         torch.nn.utils.clip_grad_value_(net.parameters(), 10)
        optimizer.step()

        if i % 50 == 49 :
            print('epoch = ', epoch + 1, ' step = ', i + 1, ' of total steps ', total_step, ' loss = ', loss.item())

    train_loss = (sum(trn) / len(trn))
    train_loss_list.append(train_loss)

    net.eval()
    val = []
    with torch.no_grad() :
        for i, (images, labels) in enumerate(data.valid_dl) :
            if torch.cuda.is_available():
                images = torch.autograd.Variable(images).cuda().float()
                labels = torch.autograd.Variable(labels).cuda()
            else : 
                images = torch.autograd.Variable(images).float()
                labels = torch.autograd.Variable(labels)

            # Forward pass
            y_pred = net(images)
            y_pred2 = mdl(images)
            loss = F.mse_loss(sf[hyper_params["stage"]].features, sf2[hyper_params["stage"]].features)
            val.append(loss.item())

    val_loss = sum(val) / len(val)
    val_loss_list.append(val_loss)
    print('epoch : ', epoch + 1, ' / ', hyper_params["num_epochs"], ' | TL : ', train_loss, ' | VL : ', val_loss)

    if val_loss < min_val :
        print('saving model')
        min_val = val_loss
        torch.save(net.state_dict(), filename)

epoch =  1  step =  50  of total steps  805  loss =  0.15482433140277863
epoch =  1  step =  100  of total steps  805  loss =  0.12196048349142075
epoch =  1  step =  150  of total steps  805  loss =  0.14741818606853485
epoch =  1  step =  200  of total steps  805  loss =  0.156645268201828
epoch =  1  step =  250  of total steps  805  loss =  0.1329699158668518
epoch =  1  step =  300  of total steps  805  loss =  0.1337122768163681
epoch =  1  step =  350  of total steps  805  loss =  0.11640556156635284
epoch =  1  step =  400  of total steps  805  loss =  0.11518991738557816
epoch =  1  step =  450  of total steps  805  loss =  0.11609910428524017
epoch =  1  step =  500  of total steps  805  loss =  0.11605231463909149
epoch =  1  step =  550  of total steps  805  loss =  0.10304179042577744
epoch =  1  step =  600  of total steps  805  loss =  0.10243804007768631
epoch =  1  step =  650  of total steps  805  loss =  0.10527341067790985
epoch =  1  step =  700  of total steps  80

In [None]:
# learn = Learner(data, net, metrics = accuracy)
net.cpu()
net.load_state_dict(torch.load('../saved_models/stage5/model0.pt', map_location = 'cpu'))
net = net.cuda()

In [None]:
plt.plot(range(hyper_params["num_epochs"]), train_loss_list, label = 'train_loss') 
plt.plot(range(hyper_params["num_epochs"]), val_loss_list, label = 'val_loss')
plt.legend()
plt.savefig('../figures/stage5/train_loss.jpg')

#### Training the classifier only after stage-wise training

In [None]:
hyper_params = {
    "stage": 5,
    "repeated": 3,
    "num_classes": 10,
    "batch_size": 64,
    "num_epochs": 100,
    "learning_rate": 1e-4
}

class Flatten(nn.Module) :
    def forward(self, input):
        return input.view(input.size(0), -1)

def conv2(ni, nf) : 
    return conv_layer(ni, nf, stride = 2)

class ResBlock(nn.Module):
    def __init__(self, nf):
        super().__init__()
        self.conv1 = conv_layer(nf,nf)
        
    def forward(self, x): 
        return (x + self.conv1(x))

def conv_and_res(ni, nf): 
    return nn.Sequential(conv2(ni, nf), ResBlock(nf))

def conv_(nf) : 
    return nn.Sequential(conv_layer(nf, nf), ResBlock(nf))
    
net = nn.Sequential(
    conv_layer(3, 64, ks = 7, stride = 2, padding = 3),
    nn.MaxPool2d(3, 2, padding = 1),
    conv_(64),
    conv_and_res(64, 128),
    conv_and_res(128, 256),
    AdaptiveConcatPool2d(),
    Flatten(),
    nn.Linear(2 * 256, 128),
    nn.Linear(128, hyper_params["num_classes"])
)

net.cpu()
net.load_state_dict(torch.load('../saved_models/small_stage4/model1.pt', map_location = 'cpu'))

if torch.cuda.is_available() : 
    net = net.cuda()
    print('Model on GPU')
    
for name, param in net.named_parameters() : 
    print(name, param.shape)
    param.requires_grad = False
    if name[0] == '7' or name[0] == '8':
        param.requires_grad = True
    print(param.requires_grad)

In [9]:
def _get_accuracy(dataloader, Net):
    total = 0
    correct = 0
    Net.eval()
    for i, (images, labels) in enumerate(dataloader):
        images = torch.autograd.Variable(images).float()
        labels = torch.autograd.Variable(labels).float()
        
        if torch.cuda.is_available() : 
            images = images.cuda()
            labels = labels.cuda()

        outputs = Net.forward(images)
        outputs = F.log_softmax(outputs, dim = 1)

        _, pred_ind = torch.max(outputs, 1)
        
        # converting to numpy arrays
        labels = labels.data.cpu().numpy()
        pred_ind = pred_ind.data.cpu().numpy()
        
        # get difference
        diff_ind = labels - pred_ind
        # correctly classified will be 1 and will get added
        # incorrectly classified will be 0
        correct += np.count_nonzero(diff_ind == 0)
        total += len(diff_ind)

    accuracy = correct / total
    # print(len(diff_ind))
    return accuracy

net.cpu()
net.load_state_dict(torch.load('../saved_models/large_classifier/model0.pt', map_location = 'cpu'))
net.cuda()
print(_get_accuracy(data.valid_dl, net))
net.cpu()
net.load_state_dict(torch.load('../saved_models/large_no_teacher/model0.pt', map_location = 'cpu'))
net.cuda()
print(_get_accuracy(data.valid_dl, net))

0.982
0.934


In [None]:
optimizer = torch.optim.Adam(net.parameters(), lr = hyper_params["learning_rate"])
total_step = len(data.train_ds) // hyper_params["batch_size"]
train_loss_list = list()
val_loss_list = list()
min_val = 0
for epoch in range(hyper_params["num_epochs"]):
    trn = []
    net.train()
    for i, (images, labels) in enumerate(data.train_dl) :
        if torch.cuda.is_available():
            images = torch.autograd.Variable(images).cuda().float()
            labels = torch.autograd.Variable(labels).cuda()
        else : 
            images = torch.autograd.Variable(images).float()
            labels = torch.autograd.Variable(labels)

        y_pred = net(images)

        loss = F.cross_entropy(y_pred, labels)
        trn.append(loss.item())

        optimizer.zero_grad()
        loss.backward()
#         torch.nn.utils.clip_grad_value_(net.parameters(), 10)
        optimizer.step()

        if i % 50 == 49 :
            print('epoch = ', epoch, ' step = ', i + 1, ' of total steps ', total_step, ' loss = ', loss.item())

    train_loss = (sum(trn) / len(trn))
    train_loss_list.append(train_loss)

    net.eval()
    val = []
    with torch.no_grad() :
        for i, (images, labels) in enumerate(data.valid_dl) :
            if torch.cuda.is_available():
                images = torch.autograd.Variable(images).cuda().float()
                labels = torch.autograd.Variable(labels).cuda()
            else : 
                images = torch.autograd.Variable(images).float()
                labels = torch.autograd.Variable(labels)

            # Forward pass
            y_pred = net(images)
            
            loss = F.cross_entropy(y_pred, labels)
            val.append(loss.item())

    val_loss = sum(val) / len(val)
    val_loss_list.append(val_loss)
    val_acc = _get_accuracy(data.valid_dl, net)

    print('epoch : ', epoch + 1, ' / ', hyper_params["num_epochs"], ' | TL : ', train_loss, ' | VL : ', val_loss, ' | VA : ', val_acc * 100)

    if (val_acc * 100) > min_val :
        print('saving model')
        min_val = val_acc * 100
        torch.save(net.state_dict(), '../saved_models/small_classifier/model1.pt')savename

In [None]:
net.cpu()
net.load_state_dict(torch.load('../saved_models/small_classifier/model4.pt', map_location = 'cpu'))
net.cuda()

learn = cnn_learner(data, models.resnet34, metrics = accuracy)
learn = learn.load('unfreeze_imagenet_bs64')
learn.freeze()

print(_get_accuracy(data.valid_dl, net))
print(_get_accuracy(data.valid_dl, learn.model))

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from fastai.vision import *
import torch
from torchsummary import summary
torch.cuda.set_device(0)

for repeated in range(0, 1) : 
    torch.manual_seed(repeated)
    torch.cuda.manual_seed(repeated)

    # stage should be in 0 to 5 (5 for classifier stage)
    hyper_params = {
        "stage": 5,
        "repeated": repeated,
        "num_classes": 10,
        "batch_size": 64,
        "num_epochs": 100,
        "learning_rate": 1e-4
    }

    path = untar_data(URLs.IMAGENETTE)
    tfms = get_transforms(do_flip=False)
    data = ImageDataBunch.from_folder(path, train = 'train', valid = 'val', bs = hyper_params["batch_size"], size = 224, ds_tfms = tfms).normalize(imagenet_stats)
    
    class Flatten(nn.Module) :
        def forward(self, input):
            return input.view(input.size(0), -1)

    def conv2(ni, nf) : 
        return conv_layer(ni, nf, stride = 2)

    class ResBlock(nn.Module):
        def __init__(self, nf):
            super().__init__()
            self.conv1 = conv_layer(nf,nf)

        def forward(self, x): 
            return (x + self.conv1(x))

    def conv_and_res(ni, nf): 
        return nn.Sequential(conv2(ni, nf), ResBlock(nf))

    def conv_(nf) : 
        return nn.Sequential(conv_layer(nf, nf), ResBlock(nf))

    net = nn.Sequential(
        conv_layer(3, 64, ks = 7, stride = 2, padding = 3),
        nn.MaxPool2d(3, 2, padding = 1),
        conv_(64),
        conv_and_res(64, 128),
        conv_and_res(128, 256),
        AdaptiveConcatPool2d(),
        Flatten(),
        nn.Linear(2 * 256, 128),
        nn.Linear(128, hyper_params["num_classes"])
    )

    net.cpu()
    filename = '../saved_models/small_stage4/model' + str(repeated) + '.pt'
    net.load_state_dict(torch.load(filename, map_location = 'cpu'))

    if torch.cuda.is_available() : 
        net = net.cuda()
        print('Model on GPU')

    for name, param in net.named_parameters() : 
        param.requires_grad = False
        if name[0] == '7' or name[0] == '8':
            param.requires_grad = True
        
    optimizer = torch.optim.Adam(net.parameters(), lr = hyper_params["learning_rate"])
    total_step = len(data.train_ds) // hyper_params["batch_size"]
    train_loss_list = list()
    val_loss_list = list()
    min_val = 0
    savename = '../saved_models/small_classifier/model' + str(repeated) + '.pt'
    for epoch in range(hyper_params["num_epochs"]):
        trn = []
        net.train()
        for i, (images, labels) in enumerate(data.train_dl) :
            if torch.cuda.is_available():
                images = torch.autograd.Variable(images).cuda().float()
                labels = torch.autograd.Variable(labels).cuda()
            else : 
                images = torch.autograd.Variable(images).float()
                labels = torch.autograd.Variable(labels)

            y_pred = net(images)

            loss = F.cross_entropy(y_pred, labels)
            trn.append(loss.item())

            optimizer.zero_grad()
            loss.backward()
    #         torch.nn.utils.clip_grad_value_(net.parameters(), 10)
            optimizer.step()

            if i % 50 == 49 :
                print('epoch = ', epoch, ' step = ', i + 1, ' of total steps ', total_step, ' loss = ', loss.item())

        train_loss = (sum(trn) / len(trn))
        train_loss_list.append(train_loss)

        net.eval()
        val = []
        with torch.no_grad() :
            for i, (images, labels) in enumerate(data.valid_dl) :
                if torch.cuda.is_available():
                    images = torch.autograd.Variable(images).cuda().float()
                    labels = torch.autograd.Variable(labels).cuda()
                else : 
                    images = torch.autograd.Variable(images).float()
                    labels = torch.autograd.Variable(labels)

                # Forward pass
                y_pred = net(images)

                loss = F.cross_entropy(y_pred, labels)
                val.append(loss.item())

        val_loss = sum(val) / len(val)
        val_loss_list.append(val_loss)
        val_acc = _get_accuracy(data.valid_dl, net)

        print('epoch : ', epoch + 1, ' / ', hyper_params["num_epochs"], ' | TL : ', train_loss, ' | VL : ', val_loss, ' | VA : ', val_acc * 100)

        if (val_acc * 100) > min_val :
            print('saving model')
            min_val = val_acc * 100
            torch.save(net.state_dict(), savename)