In [1]:
import os
import urllib
import torch
import torch.nn as nn
import torch.nn.functional as F
#import torch.utils.model_zoo as model_zoo
from torchvision import models

In [2]:
#https://raw.githubusercontent.com/delta-onera/delta_tb/master/deltatb/networks/net_segnet_bn_relu.py
class SegNet_BN_ReLU(nn.Module):
    # Unet network
    @staticmethod
    def weight_init(m):
        if isinstance(m, nn.Linear):
            torch.nn.init.kaiming_normal(m.weight.data)
     
    def __init__(self, in_channels, out_channels):
        super(SegNet_BN_ReLU, self).__init__()
 
        self.in_channels = in_channels
        self.out_channels = out_channels
 
        self.pool = nn.MaxPool2d(2, return_indices=True)
        self.unpool = nn.MaxUnpool2d(2)
         
        self.conv1_1 = nn.Conv2d(in_channels, 64, 3, padding=1)
        self.conv1_1_bn = nn.BatchNorm2d(64)
        self.conv1_2 = nn.Conv2d(64, 64, 3, padding=1)
        self.conv1_2_bn = nn.BatchNorm2d(64)
         
        self.conv2_1 = nn.Conv2d(64, 128, 3, padding=1)
        self.conv2_1_bn = nn.BatchNorm2d(128)
        self.conv2_2 = nn.Conv2d(128, 128, 3, padding=1)
        self.conv2_2_bn = nn.BatchNorm2d(128)
         
        self.conv3_1 = nn.Conv2d(128, 256, 3, padding=1)
        self.conv3_1_bn = nn.BatchNorm2d(256)
        self.conv3_2 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv3_2_bn = nn.BatchNorm2d(256)
        self.conv3_3 = nn.Conv2d(256, 256, 3, padding=1)
        self.conv3_3_bn = nn.BatchNorm2d(256)
         
        self.conv4_1 = nn.Conv2d(256, 512, 3, padding=1)
        self.conv4_1_bn = nn.BatchNorm2d(512)
        self.conv4_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv4_2_bn = nn.BatchNorm2d(512)
        self.conv4_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv4_3_bn = nn.BatchNorm2d(512)
         
        self.conv5_1 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv5_1_bn = nn.BatchNorm2d(512)
        self.conv5_2 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv5_2_bn = nn.BatchNorm2d(512)
        self.conv5_3 = nn.Conv2d(512, 512, 3, padding=1)
        self.conv5_3_bn = nn.BatchNorm2d(512)
         
        self.conv5_3_D = nn.Conv2d(512, 512, 3, padding=1)
        self.conv5_3_D_bn = nn.BatchNorm2d(512)
        self.conv5_2_D = nn.Conv2d(512, 512, 3, padding=1)
        self.conv5_2_D_bn = nn.BatchNorm2d(512)
        self.conv5_1_D = nn.Conv2d(512, 512, 3, padding=1)
        self.conv5_1_D_bn = nn.BatchNorm2d(512)
         
        self.conv4_3_D = nn.Conv2d(512, 512, 3, padding=1)
        self.conv4_3_D_bn = nn.BatchNorm2d(512)
        self.conv4_2_D = nn.Conv2d(512, 512, 3, padding=1)
        self.conv4_2_D_bn = nn.BatchNorm2d(512)
        self.conv4_1_D = nn.Conv2d(512, 256, 3, padding=1)
        self.conv4_1_D_bn = nn.BatchNorm2d(256)
         
        self.conv3_3_D = nn.Conv2d(256, 256, 3, padding=1)
        self.conv3_3_D_bn = nn.BatchNorm2d(256)
        self.conv3_2_D = nn.Conv2d(256, 256, 3, padding=1)
        self.conv3_2_D_bn = nn.BatchNorm2d(256)
        self.conv3_1_D = nn.Conv2d(256, 128, 3, padding=1)
        self.conv3_1_D_bn = nn.BatchNorm2d(128)
         
        self.conv2_2_D = nn.Conv2d(128, 128, 3, padding=1)
        self.conv2_2_D_bn = nn.BatchNorm2d(128)
        self.conv2_1_D = nn.Conv2d(128, 64, 3, padding=1)
        self.conv2_1_D_bn = nn.BatchNorm2d(64)
         
        self.conv1_2_D = nn.Conv2d(64, 64, 3, padding=1)
        self.conv1_2_D_bn = nn.BatchNorm2d(64)
        self.conv1_1_D = nn.Conv2d(64, out_channels, 3, padding=1)
         
        self.apply(self.weight_init)
         
    def forward(self, x):
        # Encoder block 1
        x =F.avg_pool2d(x,4)
        #print(x.shape)
        x = self.conv1_1_bn(F.relu(self.conv1_1(x)))
        x1 = self.conv1_2_bn(F.relu(self.conv1_2(x)))
        size1 = x.size()
        x, mask1 = self.pool(x1)
         
        # Encoder block 2
        x = self.conv2_1_bn(F.relu(self.conv2_1(x)))
        #x = self.drop2_1(x)
        x2 = self.conv2_2_bn(F.relu(self.conv2_2(x)))
        size2 = x.size()
        x, mask2 = self.pool(x2)
         
        # Encoder block 3
        x = self.conv3_1_bn(F.relu(self.conv3_1(x)))
        x = self.conv3_2_bn(F.relu(self.conv3_2(x)))
        x3 = self.conv3_3_bn(F.relu(self.conv3_3(x)))
        size3 = x.size()
        x, mask3 = self.pool(x3)
         
        # Encoder block 4
        x = self.conv4_1_bn(F.relu(self.conv4_1(x)))
        x = self.conv4_2_bn(F.relu(self.conv4_2(x)))
        x4 = self.conv4_3_bn(F.relu(self.conv4_3(x)))
        size4 = x.size()
        x, mask4 = self.pool(x4)
         
        # Encoder block 5
        x = self.conv5_1_bn(F.relu(self.conv5_1(x)))
        x = self.conv5_2_bn(F.relu(self.conv5_2(x)))
        x = self.conv5_3_bn(F.relu(self.conv5_3(x)))
        size5 = x.size()
        x, mask5 = self.pool(x)
         
        # Decoder block 5
        x = self.unpool(x, mask5, output_size = size5)
        x = self.conv5_3_D_bn(F.relu(self.conv5_3_D(x)))
        x = self.conv5_2_D_bn(F.relu(self.conv5_2_D(x)))
        x = self.conv5_1_D_bn(F.relu(self.conv5_1_D(x)))
         
        # Decoder block 4
        x = self.unpool(x, mask4, output_size = size4)
        x = self.conv4_3_D_bn(F.relu(self.conv4_3_D(x)))
        x = self.conv4_2_D_bn(F.relu(self.conv4_2_D(x)))
        x = self.conv4_1_D_bn(F.relu(self.conv4_1_D(x)))
         
        # Decoder block 3
        x = self.unpool(x, mask3, output_size = size3)
        x = self.conv3_3_D_bn(F.relu(self.conv3_3_D(x)))
        x = self.conv3_2_D_bn(F.relu(self.conv3_2_D(x)))
        x = self.conv3_1_D_bn(F.relu(self.conv3_1_D(x)))
         
        # Decoder block 2
        x = self.unpool(x, mask2, output_size = size2)
        x = self.conv2_2_D_bn(F.relu(self.conv2_2_D(x)))
        x = self.conv2_1_D_bn(F.relu(self.conv2_1_D(x)))
         
        # Decoder block 1
        x = self.unpool(x, mask1, output_size = size1)
        x = self.conv1_2_D_bn(F.relu(self.conv1_2_D(x)))
        x = self.conv1_1_D(x)
#         return x
        #print(x.shape)
        return F.interpolate(x,mode='bilinear',scale_factor=4) # upsample, but we don't need this anymore

 
    def load_pretrained_weights(self):
 
        #vgg16_weights = model_zoo.load_url("https://download.pytorch.org/models/vgg16_bn-6c64b313.pth")
        vgg16_weights=models.vgg16_bn(True).state_dict()
        count_vgg = 0
        count_this = 0
 
        vggkeys = list(vgg16_weights.keys())
        thiskeys  = list(self.state_dict().keys())
 
        corresp_map = []
 
        while(True):
            vggkey = vggkeys[count_vgg]
            thiskey = thiskeys[count_this]
 
            if "classifier" in vggkey:
                break
             
            while vggkey.split(".")[-1] not in thiskey:
                count_this += 1
                thiskey = thiskeys[count_this]
 
 
            corresp_map.append([vggkey, thiskey])
            count_vgg+=1
            count_this += 1
 
        mapped_weights = self.state_dict()
        for k_vgg, k_segnet in corresp_map:
            if (self.in_channels != 3) and "features" in k_vgg and "conv1_1." not in k_segnet:
                mapped_weights[k_segnet] = vgg16_weights[k_vgg]
            elif (self.in_channels == 3) and "features" in k_vgg:
                mapped_weights[k_segnet] = vgg16_weights[k_vgg]
 
        try:
            self.load_state_dict(mapped_weights)
            print("Loaded VGG-16 weights in Segnet !")
        except:
            print("Error VGG-16 weights in Segnet !")
            raise
     
    def load_from_filename(self, model_path):
        """Load weights from filename."""
        th = torch.load(model_path)  # load the weigths
        self.load_state_dict(th)
 
 
def segnet_bn_relu(in_channels, out_channels, pretrained=False, **kwargs):
    """Constructs a ResNet-34 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = SegNet_BN_ReLU(in_channels, out_channels)
    if pretrained:
        model.load_pretrained_weights()
    return model

In [3]:
net=segnet_bn_relu(3,4,False)
print(net)
x=torch.rand((1,3,512,512))
print(net.forward(x).shape)

SegNet_BN_ReLU(
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (unpool): MaxUnpool2d(kernel_size=(2, 2), stride=(2, 2), padding=(0, 0))
  (conv1_1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_1_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv1_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv1_2_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2_1): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_1_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv2_2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2_2_bn): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (conv3_1): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv3_1_bn): BatchNorm2d(256, eps=1e

In [4]:
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from farmdataset import FarmDataset
from segnet import segnet_bn_relu as Unet
import time
from PIL import Image

In [5]:
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    start_time = time.time()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        #print(target.shape)
        optimizer.zero_grad()
        output = model(data)
        #print('output size',output.size(),output)
 
        output = F.log_softmax(output, dim=1)
        loss = nn.NLLLoss2d(weight=torch.Tensor([0.1,0.5,0.5,0.2]).to('cuda'))(output,target)
        loss.backward()
        
        optimizer.step()
 
        #time.sleep(0.6)#make gpu sleep
        if batch_idx % 100 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))
    end_time = time.time()
    curr_lr = optimizer.param_groups[0]['lr']
    print('Training Time: {}   Learning Rate: {}'.format(end_time - start_time, curr_lr))
    if epoch%2 == 0:
        imgd = output.detach()[0,:,:,:].cpu()
        img = torch.argmax(imgd,0).byte().numpy()
        imgx = Image.fromarray(img).convert('L')
        imgxx = Image.fromarray(target.detach()[0,:,:].cpu().byte().numpy()*255).convert('L')
        imgx.save("./tmp/predict{}.bmp".format(epoch))
        imgxx.save('./tmp/real{}.bmp'.format(epoch))
 
def test(model, device, testdataset, issave=False):
    model.eval()
    test_loss = 0
    correct = 0
    evalid = [i+7 for i in range(0,2100,15)]
    maxbatch = len(evalid)
    with torch.no_grad():
        for idx in evalid:
            data, target = testdataset[idx]
            data, target = data.unsqueeze(0).to(device), target.unsqueeze(0).to(device)
            target = target[:,:1472,:1472]
            output = model(data[:,:,:1472,:1472])
            output = F.log_softmax(output, dim=1)
            loss = nn.NLLLoss2d().to('cuda')(output,target)
            test_loss += loss
             
            r = torch.argmax(output[0],0).byte()
  
            tg = target.byte().squeeze(0)
            tmp = 0
            count = 0
            for i in range(1,4):
                mp = r == i
                tr = tg == i
                tp = mp*tr == 1
                t = (mp+tr-tp).sum().item()
                if t == 0:
                    continue
                else:
                    tmp += tp.sum().item()/t
                    count += 1
            if count>0:
                correct += tmp/count
            
             
            if issave:
                Image.fromarray(r.cpu().numpy()).save('predict.png')
                Image.fromarray(tg.cpu().numpy()).save('target.png')
                input()
                 
    print('Test Loss is {:.6f}, mean precision is: {:.4f}%'.format(test_loss/maxbatch,correct))
 
 
def main():
    # Training settings
#     parser = argparse.ArgumentParser(description='Scratch segmentation Example')
#     parser.add_argument('--batch-size', type=int, default=8, metavar='N',
#                         help='input batch size for training (default: 64)')
#     parser.add_argument('--test-batch-size', type=int, default=8, metavar='N',
#                         help='input batch size for testing (default: 1000)')
#     parser.add_argument('--epochs', type=int, default=30, metavar='N',
#                         help='number of epochs to train (default: 10)')
#     parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
#                         help='learning rate (default: 0.01)')
#     parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
#                         help='SGD momentum (default: 0.5)')
#     parser.add_argument('--no-cuda', action='store_true', default=False,
#                         help='disables CUDA training')
#     parser.add_argument('--seed', type=int, default=1, metavar='S',
#                         help='random seed (default: 1)')
#     parser.add_argument('--log-interval', type=int, default=10, metavar='N',
#                         help='how many batches to wait before logging training status')
#     args = parser.parse_args()
    use_cuda = torch.cuda.is_available()
 
    torch.manual_seed(1)
 
    device = torch.device("cuda" if use_cuda else "cpu")
    print('my device is :',device)
 
    kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {}
    train_loader = torch.utils.data.DataLoader(FarmDataset(istrain=True), batch_size = 5, shuffle=True, drop_last=True, **kwargs)
     
    startepoch = 0
    model = torch.load('./tmp/model{}'.format(startepoch))  if startepoch else Unet(3,4).to(device) 
    epochs = 22
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
 
    for epoch in range(startepoch, epochs + 1):
        if epoch in [10, 16]:
            for param_group in optimizer.param_groups:
                param_group['lr'] *= 0.1
        train(model, device, train_loader, optimizer, epoch)
        if epoch % 2 == 0:
            print(epoch)
            test(model, device, FarmDataset(istrain = True, isaug = False), issave = False)
            torch.save(model,'./tmp/model{}'.format(epoch))

In [6]:
main()

my device is : cuda




Training Time: 1516.672904253006   Learning Rate: 0.001
0
Test Loss is 0.864739, mean precision is: 14.0379%
Training Time: 1522.7208468914032   Learning Rate: 0.001
Training Time: 1525.958727836609   Learning Rate: 0.001
2
Test Loss is 0.620138, mean precision is: 23.2863%
Training Time: 1524.572214603424   Learning Rate: 0.001


Training Time: 1526.9287221431732   Learning Rate: 0.001
4
Test Loss is 0.608575, mean precision is: 26.9152%
Training Time: 1525.8644642829895   Learning Rate: 0.001
Training Time: 1528.8506762981415   Learning Rate: 0.001
6
Test Loss is 0.605966, mean precision is: 22.9709%
Training Time: 1522.240835428238   Learning Rate: 0.001


Training Time: 1523.8585550785065   Learning Rate: 0.001
8
Test Loss is 0.565870, mean precision is: 28.0222%
Training Time: 1525.0593979358673   Learning Rate: 0.001
Training Time: 1526.8990213871002   Learning Rate: 0.0001
10
Test Loss is 0.498930, mean precision is: 31.1165%


Training Time: 1519.815928220749   Learning Rate: 0.0001
Training Time: 1524.6429884433746   Learning Rate: 0.0001
12
Test Loss is 0.540621, mean precision is: 28.9639%
Training Time: 1527.222767829895   Learning Rate: 0.0001
Training Time: 1526.5151770114899   Learning Rate: 0.0001
14
Test Loss is 0.499923, mean precision is: 30.2207%


Training Time: 1526.9862623214722   Learning Rate: 0.0001
Training Time: 1522.5578649044037   Learning Rate: 1e-05
16
Test Loss is 0.503989, mean precision is: 28.8139%
Training Time: 1514.267472743988   Learning Rate: 1e-05
Training Time: 1520.6407804489136   Learning Rate: 1e-05
18
Test Loss is 0.474929, mean precision is: 29.6358%


Training Time: 1519.6876022815704   Learning Rate: 1e-05
Training Time: 1521.0031623840332   Learning Rate: 1e-05
20
Test Loss is 0.483909, mean precision is: 30.0569%
Training Time: 1519.031631231308   Learning Rate: 1e-05
Training Time: 1518.4284217357635   Learning Rate: 1e-05
22
Test Loss is 0.492249, mean precision is: 29.6335%


In [9]:
for epoch in range(24, 69, 3):
    print(epoch)
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
    model = torch.load('./tmp/model{}'.format(epoch))
    test(model, device, FarmDataset(istrain = True, isaug = False), issave = False)

24




Test Loss is 1.629143, mean precision is: 0.8994%
27
Test Loss is 1.519135, mean precision is: 2.3174%
30
Test Loss is 1.602930, mean precision is: 1.3016%
33
Test Loss is 1.439184, mean precision is: 1.9290%
36
Test Loss is 1.616317, mean precision is: 1.4456%
39
Test Loss is 1.692963, mean precision is: 1.5148%
42
Test Loss is 1.623588, mean precision is: 2.0390%
45


TypeError: can't convert np.ndarray of type numpy.object_. The only supported types are: float64, float32, float16, int64, int32, int16, int8, and uint8.