In [5]:
from __future__ import print_function, division

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy

plt.ion()   # interactive mode

<matplotlib.pyplot._IonContext at 0x7f45741392e0>

In [6]:
# function for training model
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []
    tra_acc_history = []
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['training', 'testing']:
            if phase == 'training':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()
                def closure():
                    optimizer.zero_grad()
                    outputs = model(inputs)#outputs_aux
                    loss = criterion(outputs, labels)#+0.2*criterion(outputs_aux, labels)
                    loss.backward()
                    return loss
                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'training'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'training':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        #print(inputs.shape)
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)#+0.2*criterion(outputs_aux, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'training':
                        loss.backward()
                        optimizer.step(closure)

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'testing' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'testing':
                val_acc_history.append(epoch_acc)
            else:
                tra_acc_history.append(epoch_acc)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history, tra_acc_history, best_acc


In [7]:
# function for deciding which part is required to be updated
def set_parameter_requires_grad(model, feature_extracting):
# Flag for feature extracting. When False, we finetune the whole model,
# when True we only update the reshaped layer params
   # feature_extract = True
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [8]:
## customized pre-trained denseNet
# data augmentation and loading
Experiment_acc_history = []
# parameter
batch_size=128#64
dropout_ratio=0.6
order_idx=2
learning_ratio=0.01
input_size=224#256#224
num_epochs = 150#300
for experiment_idx in range(1,11):
    print('Pre_trained_First 4 Dense Blocks with residual block with_BN_relu_in_conv second order no size constrained')
    print("RUN: "+str(experiment_idx))
    dataset_name="DTD-80-40-with-label"
    architecture_name="dense161"
     # checking if there is a file with this nam
    path_model="Pre_trained_"+architecture_name+"0.2_residual_block_in224_2208_tune1_relu2_SIGMOID_SIGMOID_epoch_"+str(num_epochs)+'_'+str(learning_ratio)+"batch_size_"+str(batch_size)+"drop_out_"+str(dropout_ratio)+"_"+dataset_name+"_run_"+str(experiment_idx)+".pkl"
    path_save_fig="Pre_trained_"+architecture_name+"0.2_residual_block_in224_2208_tune1_relu2_SIGMOID_SIGMOID_epoch_"+str(num_epochs)+'_'+str(learning_ratio)+"batch_size_"+str(batch_size)+"drop_out_"+str(dropout_ratio)+"_"+dataset_name+"_run_"+str(experiment_idx)+".png"
    path_save_txt="Pre_trained_"+architecture_name+"0.2_residual_block_in224_2208_tune1_relu2_SIGMOID_SIGMOID_epoch_"+str(num_epochs)+'_'+str(learning_ratio)+"batch_size_"+str(batch_size)+"drop_out_"+str(dropout_ratio)+"_"+dataset_name+"+.txt"

    #input_size=448
    data_transforms = {
        'training': transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
            #transforms.TenCrop(224)
            ]),
        'testing': transforms.Compose([
            transforms.Resize(input_size),#224 #256
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }
    os.getcwd()
    should_data_augmentation=True
    experiment_idx_str=str(experiment_idx)
    print("The datatset using is")
    #dataset_name="FMD-50-50"
    print(dataset_name)
    data_dir = r"./data/"+dataset_name+"/"+experiment_idx_str
    print(data_dir)
    image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x),
                                          transform=data_transforms[x])
                      for x in ['training', 'testing']}
    class_names = image_datasets['training'].classes
    dataset_sizes = {x: len(image_datasets[x]) for x in ['training', 'testing']}
    print(dataset_sizes)
    dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=4)
                   for x in ['training', 'testing']}


    print(len(class_names))
    print(class_names)
    device = torch.device("cuda:7")
    print(device)
    #device="cpu"
    num_classes=len(class_names)
    print(num_classes)
    feature_extract=True
    ### Model of Deep residual pooling network ### 
    class myModel(nn.Module):
        def __init__(self):
            super(myModel,self).__init__()
            model_dense=models.densenet161(pretrained=True)
            self.features=nn.Sequential(*list(model_dense.features.children())[:-1])
            self.features1=nn.Sequential(*list(model_dense.features.children())[:-2])
            self.conv1= nn.Sequential(nn.Conv2d(in_channels=2208,
                                            out_channels=1104,
                                             kernel_size=1,
                                            stride=1,
                                            padding=0),
                                nn.Dropout2d(p=0.5),
                                  nn.BatchNorm2d(1104))
            self.relu1 = nn.ReLU(inplace=True)
            self.relu2 = nn.ReLU(inplace = True)
            self.norm1 = nn.BatchNorm2d(4416);
            self.relu3 = nn.ReLU(inplace=True);
            self.classifier=nn.Linear((4416),num_classes)

        def forward(self,x):

            out = self.features(x)
            identity=out
            ## Residual pooling layer 1 ##
            ## 1. Residual encoding module ##
            identity = self.sigmoid(identity)
            #print(out.shape)
            out = self.conv1(out)
            out = self.relu1(out)
            out = out - identity
            ## 2. Aggregation module ##
            out = self.relu2(out)
            out = self.norm1(out)
            out = self.relu3(out)
            out = nn.functional.adaptive_avg_pool2d(out,(1,1)).view(out.size(0), -1) 
            #----------------------------------------------------------------------
            out2 = self.features1(x)
            identity1=out2
            ## Residual pooling layer 2 ##
            ## 1. Residual encoding module ##
            identity1 = self.sigmoid1(identity1)
            out2 = self.conv2(out2)
            out2 = self.relu4(out2)
            out2 = out2 - identity1
            ## 2. Aggregation module ##
            out2 = self.relu5(out2)
            out2 = self.norm2(out2)
            out2 = self.relu6(out2)
            out2 = nn.functional.adaptive_avg_pool2d(out2,(1,1)).view(out2.size(0), -1)                   
            #----------------------------------------------------------------------
            x=self.classifier(out)
            x_aux = self.classifier_aux(out2)
            x = x + 0.2*x_aux
            return x#(x, x_aux)
    net=myModel()
    feature_extract=True#False#True
    set_parameter_requires_grad(net, feature_extract)
    #net.features.10
#--------------------------------------------------------------------------------
    dense_feature_dim = 2208
    net.conv1= nn.Sequential(nn.Conv2d(in_channels=dense_feature_dim,
                                            out_channels=dense_feature_dim,
                                             kernel_size=1,
                                            stride=1,
                                            padding=0),
                                nn.Dropout2d(p=dropout_ratio),
                                  nn.BatchNorm2d(dense_feature_dim))#0.3 FOR DTD#nn.BatchNorm2d(2208)
    net.sigmoid=nn.Sigmoid()
    net.relu1 = nn.Sigmoid()#ReLU(inplace = False)#Sigmoid()#(inplace=False)
    print("order index:"+str(order_idx))
    net.relu2 = nn.ReLU(inplace=True);
    net.norm1 = nn.BatchNorm2d(dense_feature_dim);
    net.relu3 = nn.ReLU(inplace=True);
#--------------------------------------------------------------------------------
    dense_feature_dim2 = 1056
    net.conv2= nn.Sequential(nn.Conv2d(in_channels=dense_feature_dim2,
                                            out_channels=dense_feature_dim2,
                                             kernel_size=1,
                                            stride=1,
                                            padding=0),
                                nn.Dropout2d(p=dropout_ratio),
                                  nn.BatchNorm2d(dense_feature_dim2))#0.3 FOR DTD#nn.BatchNorm2d(2208)
    net.sigmoid1=nn.Sigmoid()
    net.relu4 = nn.Sigmoid()#ReLU(inplace = False)#Sigmoid()#(inplace=False)
    print("order index:"+str(order_idx))
    net.relu5 = nn.ReLU(inplace=True);
    net.norm2 = nn.BatchNorm2d(dense_feature_dim2);
    net.relu6 = nn.ReLU(inplace=True);        
#--------------------------------------------------------------------------------
    net.classifier_aux=nn.Linear((dense_feature_dim2),num_classes)    
    net.classifier=nn.Linear((dense_feature_dim),num_classes) 
    criterion = nn.CrossEntropyLoss()
    model_ft = net.to(device)
    params_to_update = model_ft.parameters()
    print("Params to learn:")
    #  feature_extract is true: dense block part no need to optimized
    if feature_extract:
        params_to_update = []
        for name,param in model_ft.named_parameters():
            #print(name)
            if param.requires_grad == True:
                params_to_update.append(param)
                print("\t",name)
    else:
        for name,param in model_ft.named_parameters():
            if param.requires_grad == True:
                print("\t",name)
    #Observe that all parameters are being optimized
    classifier_params = list(map(id, model_ft.classifier.parameters()))
    base_params = filter(lambda p: id(p) not in classifier_params,
                         params_to_update)

    optimizer_ft = optim.Adam(params_to_update,lr=0.0001)#, momentum=0.9)# pre: 0.01
    # training part
    #num_epochs=200
    model_ft, hist_val,hist_train,best_acc = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=False)

    # draw learning curve and save the results (accuracy and corresponding model)
    tra_hist = []
    tes_hist=[]
    tra_hist = [h.cpu().numpy() for h in hist_train]
    tes_hist = [h.cpu().numpy() for h in hist_val]

    plt.title("Accuracy vs. Number of Training Epochs")
    plt.xlabel("Training Epochs")
    plt.ylabel("Accuracy")
    plt.plot(range(1,num_epochs+1),tra_hist,label="Training")
    plt.plot(range(1,num_epochs+1),tes_hist,label="testing")
    plt.ylim((0,1.))
    plt.xticks(np.arange(1, num_epochs+1, 1.0))
    plt.legend()
    plt.savefig(path_save_fig)                                                                     
    plt.show()
    Experiment_acc_history.append(best_acc)
    print('First 4 Dense Blocks with residual block with_BN_relu_in_conv RBF')

    torch.save(model_ft, path_model)  #

# In[6]:


print("Average accuracy in "+str(experiment_idx)+"runs is")
print(str(torch.mean(torch.FloatTensor(Experiment_acc_history))))

print('First 4 Dense Blocks with residual block with_BN_relu_in_conv second order only-ending')
Experiment_acc_history_np = torch.FloatTensor(Experiment_acc_history)
Experiment_acc_history_np_1=Experiment_acc_history_np.numpy
print("STD OF Average accuracy in "+str(experiment_idx)+"runs is")
Experiment_acc_history_np = torch.FloatTensor(Experiment_acc_history)
print(np.var(Experiment_acc_history_np.numpy()))
print(Experiment_acc_history_np)
print(Experiment_acc_history_np.numpy())
np.savetxt(path_save_txt,Experiment_acc_history_np)

Pre_trained_First 4 Dense Blocks with residual block with_BN_relu_in_conv second order no size constrained
RUN: 1
The datatset using is
DTD-80-40-with-label
./data/DTD-80-40-with-label/1
{'training': 3760, 'testing': 1880}
47
['banded', 'blotchy', 'braided', 'bubbly', 'bumpy', 'chequered', 'cobwebbed', 'cracked', 'crosshatched', 'crystalline', 'dotted', 'fibrous', 'flecked', 'freckled', 'frilly', 'gauzy', 'grid', 'grooved', 'honeycombed', 'interlaced', 'knitted', 'lacelike', 'lined', 'marbled', 'matted', 'meshed', 'paisley', 'perforated', 'pitted', 'pleated', 'polka-dotted', 'porous', 'potholed', 'scaly', 'smeared', 'spiralled', 'sprinkled', 'stained', 'stratified', 'striped', 'studded', 'swirly', 'veined', 'waffled', 'woven', 'wrinkled', 'zigzagged']
cuda:7
47
