In [24]:
import torch
import torchvision
from torchsummary import summary
import torch.nn as nn
from collections import OrderedDict
from torchvision import transforms
from torch.utils.data import DataLoader
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [25]:
def create_model(model_choice, channels=64):
    #load resnet18
    if model_choice==1:
        # Build a resnet50 model from scratch
        model = torchvision.models.vgg13(pretrained=True)
        model.classifier[6] = nn.Linear(4096, 10)
        

    else:
        model=nn.Sequential(OrderedDict([
            ("conv1",nn.Conv2d(3,channels,7,3,3,bias=False)),
            ("bn1",nn.BatchNorm2d(channels)),
            ("relu1",nn.ReLU(inplace=True)),
            ("maxpool1",nn.MaxPool2d(2,2,1)),
            
            ("conv2",nn.Conv2d(channels,channels,3,1,1,bias=False)),
            ("bn2",nn.BatchNorm2d(channels)),
            ("relu2",nn.ReLU(inplace=True)),
            ("maxpool2",nn.MaxPool2d(2,2,1)),

            ("conv3",nn.Conv2d(channels,channels,3,1,1,bias=False)),
            ("bn3",nn.BatchNorm2d(channels)),
            ("relu3",nn.ReLU(inplace=True)),
            ("maxpool3",nn.MaxPool2d(2,2,1)),
            ("conv4",nn.Conv2d(channels,channels,3,1,1,bias=False)),
            ("bn4",nn.BatchNorm2d(channels)),
            ("relu4",nn.ReLU(inplace=True)),
            ("maxpool4",nn.MaxPool2d(2,2,1)),
            ("conv5",nn.Conv2d(channels,channels,3,1,1,bias=False)),
            ("bn5",nn.BatchNorm2d(channels)),
            ("relu5",nn.ReLU(inplace=True)),
            ("maxpool5",nn.MaxPool2d(2,2,1)),
            ("conv6",nn.Conv2d(channels,channels,3,1,1,bias=False)),
            ("bn6",nn.BatchNorm2d(channels)),
            ("relu6",nn.ReLU(inplace=True)), 
            ("maxpool6",nn.MaxPool2d(2,2,1)),
            ("conv7",nn.Conv2d(channels,channels,3,1,1,bias=False)),
            ("bn7",nn.BatchNorm2d(channels)),
            ("relu7",nn.ReLU(inplace=True)), 
            ("maxpool7",nn.MaxPool2d(2,2,1)),
            ("fl",nn.Flatten()),
            ("fc",nn.Linear(4*channels,10)),
            ("softmax",nn.Softmax(dim=1))
        ]))
    return model


In [26]:
#device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#model=model.to(device)

cnt = 0
conv_dict = dict()
def hook_fn(module, input, output):
    global cnt
    conv_dict[conv_list[cnt]] = module.weight
    cnt += 1
    return None


def get_hook(model):
    dic=model.state_dict()
    bn_layers = dict()
    conv_dict = dict()
    conv_list = []
    flag=False
    layer=None
    hook=[]
    for keys, values in dic.items():
        if 'conv' in keys:
            name=keys.replace('.weight','')
            layer=model.get_submodule(name)
            for i in range(100):
                name=name.replace("."+str(i)+".","["+str(i)+"].")
            layer.name = name
            hook.append(layer.register_forward_hook(hook_fn))
            conv_list.append(name)
            flag=True
        elif flag:
            if 'bn' in keys:
                bn_layers[name]=keys.replace('.weight','')
                for i in range(100):
                    bn_layers[name]=bn_layers[name].replace("."+str(i)+".","["+str(i)+"].")
            else:
                bn_layers[name]=None
            flag=False
    return hook, conv_list, bn_layers

def get_variance(tensor):
    var=torch.mean(torch.var(tensor, dim=(0)))
    print("variance:",var)
    return var



def add_channels(name, mean=0, std=0.01, add_coef=2, max_out_channels=512):
    layer=eval("model."+name)
    
    if layer.out_channels*add_coef>max_out_channels:
        print("",name," out_channels reach max_out_channels:",max_out_channels)
        return
    shape=layer.weight.shape
    ori_weight=layer.weight.clone()

    # Add output channels to conv
    exec("{}=torch.nn.utils.skip_init(torch.nn.Conv2d,in_channels=layer.in_channels, out_channels=int(layer.out_channels*add_coef), kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, groups=layer.groups, bias=layer.bias, padding_mode=layer.padding_mode, device=device)".format("model."+name)) 
    add_layer_shape=list(ori_weight.shape)
    add_layer_shape[0]=int(add_layer_shape[0]*(add_coef-1))
    # Load original weight and add new weight
    exec("{}.weight=torch.nn.Parameter(torch.cat([ori_weight,torch.normal(mean, std, size=add_layer_shape).to(device)], dim=0))".format("model."+name))
    # print(torch.cat([ori_weight,torch.normal(mean, std, size=add_layer_shape).to(device)], dim=0).shape)
    print("add {} out_channels to layer:".format((add_coef-1)*ori_weight.shape[0]),name,"out_channels:",layer.out_channels,"->",layer.out_channels*add_coef)

    next = conv_list.index(name) + 1

    # Add input channels to next conv layer
    if next < len(conv_list):
        name_next = conv_list[next]
        layer = eval("model."+name_next)
        weight = layer.weight.clone()
        exec("{}=torch.nn.utils.skip_init(torch.nn.Conv2d,in_channels=int(layer.in_channels*add_coef), out_channels=layer.out_channels, kernel_size=layer.kernel_size, stride=layer.stride, padding=layer.padding, dilation=layer.dilation, groups=layer.groups, bias=layer.bias, padding_mode=layer.padding_mode, device=device)".format("model."+name_next)) 
        add_layer_shape=list(weight.shape)
        add_layer_shape[1]=int(add_layer_shape[1]*(add_coef-1))
        # print(torch.cat((weight,torch.normal(mean,std,size=add_layer_shape).to(device)), dim=1).shape)
        exec("{}.weight=torch.nn.Parameter(torch.cat((weight,torch.normal(mean,std,size=add_layer_shape).to(device)), dim=1))".format("model."+name_next))
    else:
        weight = model.fc.weight.clone()
        model.fc.in_features = int(model.fc.in_features * add_coef)
        add_layer_shape=list(weight.shape)
        add_layer_shape[1]=int(add_layer_shape[1]*(add_coef-1))
        model.fc=nn.Linear(model.fc.in_features, model.fc.out_features,device=device)
        model.fc.weight=torch.nn.Parameter(torch.cat((weight,torch.normal(mean,std,size=add_layer_shape).to(device)), dim=1))


    # Add input channels to bn
    if(bn_layers[name] != None):
        bn=eval("model."+bn_layers[name])
        bn_weight=bn.weight.clone()
        add_layer_shape=list(bn_weight.shape)
        add_layer_shape[0]=int(add_layer_shape[0]*(add_coef-1))
        exec("{}=torch.nn.BatchNorm2d(num_features=int(bn.num_features*add_coef), eps=bn.eps, momentum=bn.momentum, affine=bn.affine, track_running_stats=bn.track_running_stats, device=device)".format("model."+bn_layers[name]))
        # Load original weight and add new weight
        exec("{}.weight=torch.nn.Parameter(torch.cat((bn_weight,torch.normal(mean,std,size=add_layer_shape).to(device)), dim=0))".format("model."+bn_layers[name]))
    


# Modify the layers(If variance > 0.5, then add the channels of this layer)
def modify_layers(layers, var_threshold=0.2, mean=0, std=0.1, add_coef=2, max_out_channels=512):
    for keys, output in layers.items():
        var= get_variance(output)
        #print(var)
        if var>var_threshold:
            flag=True
            add_channels(keys, mean, std, add_coef, max_out_channels)
    layers.clear()
# model= create_model(0)
# model= model.to(device)

# hook, conv_list, bn_layers = get_hook(model)

# model.train()
# random_batch = torch.rand(1, 3, 224,224).to(device)
# model(random_batch)

# modify_layers(conv_dict, var_threshold=0.2)
# for h in hook:
#     h.remove()
# #print(model)
# model(random_batch)
    

In [27]:
# load cifar10
#load the CIFAR-10 dataset, and resize the data into 224x224
myTransforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))])

cifar10_data = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=myTransforms)
cifar10_data_test = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=myTransforms)

#load the data
train_loader=DataLoader(cifar10_data,batch_size=64,shuffle=True, num_workers=0)
test_loader=DataLoader(cifar10_data_test,batch_size=64,shuffle=True)

Files already downloaded and verified
Files already downloaded and verified


In [28]:
def test_model(model):
    total=0
    correct=0
    validation_accuracy=0
    with torch.no_grad():
        for data,target in test_loader:
            data,target=data.to(device),target.to(device)
            output=model.forward(data)
            _,predicted=torch.max(output.data,1)
            total+=target.size(0)
            correct+=(predicted==target).sum().item()
        validation_accuracy=100*correct/total
        print("Accuracy of the network on the 10000 test images: {}%".format(validation_accuracy),'\n')
    return validation_accuracy

In [29]:
def normal_train(model, optimizer, loss_f, epochs, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    model.train()
    training_loss=0.0
    for epoch in range(epochs):
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model.forward(data)
            loss = loss_f(output, target)
            loss.backward()
            optimizer.step()
            training_loss+=loss.item()
            if batch_idx % 10 == 0 :
                print('[iteration - %3d] training loss: %.3f' % (epoch*len(train_loader) + batch_idx, training_loss/10))
                training_loss = 0.0
                print()
        del data, target, output
        torch.cuda.empty_cache()
                


In [22]:
def Adnn_train(model, optimizer, loss_f, epochs, duration=100, var_threshold=0.3, mean=0, std=0.01, add_coef=2, max_out_channels=512, validation=True, patience=5 , device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')):
    global conv_list, bn_layers, cnt
    best_acc=0
    best_epoch=0
    for epoch in range(epochs):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model.forward(data)
            loss = loss_f(output, target)
            loss.backward()
            optimizer.step()
            print('\r[Eposh - %3d][iteration - %3d / %3d ] training loss: %.3f' % (epoch+1, batch_idx, len(train_loader), loss.item()), end="")
            if batch_idx%duration==duration-1:
                print()
                cnt=0
                # Get a batch from the training set
                hook, conv_list, bn_layers = get_hook(model)
                model.forward(data)
                # Modify the layers(If variance > 0.5, then add the channels of this layer)
                modify_layers(conv_dict, var_threshold=var_threshold, mean=mean, std=std, add_coef=add_coef, max_out_channels=max_out_channels)
                del optimizer
                optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
                for h in hook:
                    h.remove()
                del hook, conv_list, bn_layers
                torch.cuda.empty_cache()
        print()
        if validation:
                model.eval()
                acc=test_model(model)
                if acc>best_acc:
                    best_acc=acc
                    best_epoch=epoch
                model.train()
                if epoch-best_epoch>patience:
                    print("Early stopping")
                    return

In [23]:
#Adnn train
model=create_model(0,32).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
loss_f = nn.CrossEntropyLoss()
Epochs=50
Adnn_train(model, optimizer, loss_f, Epochs, var_threshold=0.15, max_out_channels=1024, add_coef=1.5)
test_model(model)
summary(model,input_size=(3,224,224))

[Eposh -   1][iteration -  99 / 782 ] training loss: 2.269
tensor(0.0023, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0011, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0012, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0012, device='cuda:0', grad_fn=<MeanBackward0>)
[Eposh -   1][iteration - 135 / 782 ] training loss: 2.267

KeyboardInterrupt: 

In [None]:
model=create_model(0,256).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
loss_f = nn.CrossEntropyLoss()
Epochs=10
normal_train(model, optimizer, loss_f, Epochs)
test_model(model)


[iteration -   0] training loss: 0.230

[iteration -  10] training loss: 2.297

[iteration -  20] training loss: 2.286

[iteration -  30] training loss: 2.261



In [None]:
test_model(model)
summary(model,input_size=(3,224,224))
torch.cuda.empty_cache()


Accuracy of the network on the 10000 test images: 78.24% 

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            37,632
├─BatchNorm2d: 1-2                       512
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Conv2d: 1-5                            589,824
├─BatchNorm2d: 1-6                       512
├─ReLU: 1-7                              --
├─MaxPool2d: 1-8                         --
├─Conv2d: 1-9                            589,824
├─BatchNorm2d: 1-10                      512
├─ReLU: 1-11                             --
├─MaxPool2d: 1-12                        --
├─Conv2d: 1-13                           589,824
├─BatchNorm2d: 1-14                      512
├─ReLU: 1-15                             --
├─MaxPool2d: 1-16                        --
├─Conv2d: 1-17                           589,824
├─BatchNorm2d: 1-18                      512
├─ReLU: 1-19                             --
├─MaxPool2d: 1-20          

In [None]:
model=create_model(0,32).to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9)
loss_f = nn.CrossEntropyLoss()
Epochs=10
normal_train(model, optimizer, loss_f, Epochs)
test_model(model)

[iteration -   0] training loss: 0.229

[iteration -  10] training loss: 2.302

[iteration -  20] training loss: 2.300

[iteration -  30] training loss: 2.299

[iteration -  40] training loss: 2.294

[iteration -  50] training loss: 2.292

[iteration -  60] training loss: 2.287

[iteration -  70] training loss: 2.282

[iteration -  80] training loss: 2.276

[iteration -  90] training loss: 2.277

[iteration - 100] training loss: 2.277

[iteration - 110] training loss: 2.278

[iteration - 120] training loss: 2.260

[iteration - 130] training loss: 2.260

[iteration - 140] training loss: 2.258

[iteration - 150] training loss: 2.257

[iteration - 160] training loss: 2.247

[iteration - 170] training loss: 2.248

[iteration - 180] training loss: 2.236

[iteration - 190] training loss: 2.241

[iteration - 200] training loss: 2.225

[iteration - 210] training loss: 2.230

[iteration - 220] training loss: 2.236

[iteration - 230] training loss: 2.208

[iteration - 240] training loss: 2.210



In [None]:
test_model(model)

Accuracy of the network on the 10000 test images: 65.16% 



In [None]:
summary(model, input_size=(3,224,224))

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            4,704
├─BatchNorm2d: 1-2                       64
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Conv2d: 1-5                            9,216
├─BatchNorm2d: 1-6                       64
├─ReLU: 1-7                              --
├─MaxPool2d: 1-8                         --
├─Conv2d: 1-9                            9,216
├─BatchNorm2d: 1-10                      64
├─ReLU: 1-11                             --
├─MaxPool2d: 1-12                        --
├─Conv2d: 1-13                           9,216
├─BatchNorm2d: 1-14                      64
├─ReLU: 1-15                             --
├─MaxPool2d: 1-16                        --
├─Conv2d: 1-17                           9,216
├─BatchNorm2d: 1-18                      64
├─ReLU: 1-19                             --
├─MaxPool2d: 1-20                        --
├─Conv2d: 1-21                           9,216
├─BatchNo

Layer (type:depth-idx)                   Param #
├─Conv2d: 1-1                            4,704
├─BatchNorm2d: 1-2                       64
├─ReLU: 1-3                              --
├─MaxPool2d: 1-4                         --
├─Conv2d: 1-5                            9,216
├─BatchNorm2d: 1-6                       64
├─ReLU: 1-7                              --
├─MaxPool2d: 1-8                         --
├─Conv2d: 1-9                            9,216
├─BatchNorm2d: 1-10                      64
├─ReLU: 1-11                             --
├─MaxPool2d: 1-12                        --
├─Conv2d: 1-13                           9,216
├─BatchNorm2d: 1-14                      64
├─ReLU: 1-15                             --
├─MaxPool2d: 1-16                        --
├─Conv2d: 1-17                           9,216
├─BatchNorm2d: 1-18                      64
├─ReLU: 1-19                             --
├─MaxPool2d: 1-20                        --
├─Conv2d: 1-21                           9,216
├─BatchNo