In [1]:
import os
import sys
import glob
import math
import matplotlib.pyplot as plt
import argparse
import numpy as np
from PIL import Image
from utils.data_augumentation import Compose, Scale, RandomRotation, RandomMirror, Resize, Normalize_Tensor
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, datasets, models
from FusionNet import * 
from UNet import *
from resNet_uNet import *

In [33]:
anno_class_img = Image.open('labels/DJI_0004_2_2.png').convert('RGB')
print(np.array(anno_class_img).shape)

(100, 100, 3)


In [2]:
# inputファイルとlabelsに存在しているファイルを取得
input_dir, label_dir = 'input','labels'
in_files = glob.glob(input_dir+'/*.jpg')
label_files = glob.glob(label_dir+'/*.png')
in_f = [f.split('/')[-1].split('.')[-2] for f in in_files ]
lb_f = [f.split('/')[-1].split('.')[-2] for f in label_files ]
f_name = set(in_f) & set(lb_f)
print('deta count : ',len(list(f_name)))

input_files, annotation_files = [], []
for i in list(f_name):
    input_files.append(input_dir+'/'+i+'.jpg')
    annotation_files.append(label_dir+'/'+i+'.png')
datafiles = list(zip(input_files, annotation_files))
print(datafiles)

deta count :  284
[('input/DJI_0004_2_2.jpg', 'labels/DJI_0004_2_2.png'), ('input/DJI_0006_7_3.jpg', 'labels/DJI_0006_7_3.png'), ('input/DJI_0016_2_1.jpg', 'labels/DJI_0016_2_1.png'), ('input/DJI_0005_8_4.jpg', 'labels/DJI_0005_8_4.png'), ('input/DJI_0018_1_2.jpg', 'labels/DJI_0018_1_2.png'), ('input/DJI_0012_1_1.jpg', 'labels/DJI_0012_1_1.png'), ('input/DJI_0015_8_1.jpg', 'labels/DJI_0015_8_1.png'), ('input/DJI_0016_1_1.jpg', 'labels/DJI_0016_1_1.png'), ('input/DJI_0015_8_2.jpg', 'labels/DJI_0015_8_2.png'), ('input/DJI_0017_1_1.jpg', 'labels/DJI_0017_1_1.png'), ('input/DJI_0023_3_4.jpg', 'labels/DJI_0023_3_4.png'), ('input/DJI_0010_1_1.jpg', 'labels/DJI_0010_1_1.png'), ('input/DJI_0019_1_2.jpg', 'labels/DJI_0019_1_2.png'), ('input/DJI_0020_7_2.jpg', 'labels/DJI_0020_7_2.png'), ('input/DJI_0012_6_1.jpg', 'labels/DJI_0012_6_1.png'), ('input/DJI_0019_4_3.jpg', 'labels/DJI_0019_4_3.png'), ('input/DJI_0016_4_1.jpg', 'labels/DJI_0016_4_1.png'), ('input/DJI_0011_2_1.jpg', 'labels/DJI_0011_2_

In [3]:
class DataTransform():
    def __init__(self, input_size, color_mean, color_std):
        self.data_transform = {
            'train': Compose([
                Scale(scale=[0.5, 1.5]),  # 画像の拡大
                RandomRotation(angle=[-10, 10]),  # 回転
                RandomMirror(),  # ランダムミラー
                Resize(input_size),  # リサイズ(input_size)
                Normalize_Tensor(color_mean, color_std)  # 色情報の標準化とテンソル化
            ]),
            'val': Compose([
                Resize(input_size),  # リサイズ(input_size)
                Normalize_Tensor(color_mean, color_std)  # 色情報の標準化とテンソル化
            ])
        }

    def __call__(self, phase, img, anno_class_img):
        return self.data_transform[phase](img, anno_class_img)

In [73]:
class VOCDataset(Dataset):
    def __init__(self, img_list, anno_list, phase, transform):
        self.img_list = img_list
        self.anno_list = anno_list
        self.phase = phase
        self.transform = transform

    def __len__(self):
        return len(self.img_list)

    def __getitem__(self, index):
        img, anno_class_img = self.pull_item(index)
        return img, anno_class_img

    def pull_item(self, index):
        image_file_path = self.img_list[index]
        img = Image.open(image_file_path)   # [高さ][幅][色RGB]
        anno_file_path = self.anno_list[index]
        anno_class_img = Image.open(anno_file_path)   # [高さ][幅]
        #anno_class_img = Image.open(anno_file_path).convert('RGB')
        #anno_np = np.array(anno_class_img)
        #anno_value = np.max(anno_np) + 1
        #anno_class_img_ = np.eye(anno_value)[anno_np]
        print(anno_class_img)
        img, anno_class_img = self.transform(self.phase, img, anno_class_img)
        return img, anno_class_img

In [74]:
# 動作確認
num_train = math.floor(len(datafiles)*0.8)
num_test = len(datafiles)-num_train
num_all = num_train + num_test

def split_train_test(data):
    id_all   = np.random.choice(num_all, num_all, replace=False)
    id_test  = id_all[0:num_test]
    id_train = id_all[num_test:num_all]
    test_data  = data[id_test]
    train_data = data[id_train]
    return train_data, test_data
    
train_list, test_list = split_train_test(np.asarray(datafiles))
input_train, annotation_train = train_list[:,0], train_list[:,1]
input_val, annotation_val = test_list[:,0], test_list[:,1]

print('input   :: train: %d , test: %d'%(len(input_train), len(input_val)))
print('annotation  :: train: %d , test: %d'%(len(annotation_train), len(annotation_val)))

# (RGB)の色の平均値と標準偏差
color_mean = (0.485, 0.456, 0.406)
color_std = (0.229, 0.224, 0.225)

# データセット作成
train_dataset = VOCDataset(input_train, annotation_train, phase="train", transform=DataTransform(
    input_size=224, color_mean=color_mean, color_std=color_std))

val_dataset = VOCDataset(input_val, annotation_val, phase="val", transform=DataTransform(
    input_size=224, color_mean=color_mean, color_std=color_std))

# データの取り出し例
print(val_dataset.__getitem__(0)[0].shape)
print(val_dataset.__getitem__(0)[1].shape)
print(val_dataset.__getitem__(0))

input   :: train: 227 , test: 57
annotation  :: train: 227 , test: 57
<PIL.PngImagePlugin.PngImageFile image mode=P size=100x100 at 0x7F5C31C5AF60>
torch.Size([3, 224, 224])
<PIL.PngImagePlugin.PngImageFile image mode=P size=100x100 at 0x7F5C31C5AE10>
torch.Size([224, 224])
<PIL.PngImagePlugin.PngImageFile image mode=P size=100x100 at 0x7F5C31C5AF98>
(tensor([[[-0.8507, -0.8507, -0.7993,  ..., -1.0562, -0.9534, -0.9192],
         [-0.7308, -0.7308, -0.6794,  ..., -1.0562, -0.9534, -0.9192],
         [-0.4739, -0.4739, -0.4226,  ..., -1.0219, -0.9192, -0.8849],
         ...,
         [-0.7993, -0.7993, -0.7822,  ..., -1.4500, -1.4158, -1.4158],
         [-0.7993, -0.7993, -0.7822,  ..., -1.4500, -1.4158, -1.4158],
         [-0.7993, -0.7993, -0.7822,  ..., -1.4500, -1.4158, -1.4158]],

        [[-0.1800, -0.1800, -0.1450,  ..., -0.3725, -0.2675, -0.2325],
         [-0.0574, -0.0574, -0.0224,  ..., -0.3725, -0.2675, -0.2325],
         [ 0.2052,  0.2052,  0.2402,  ..., -0.3550, -0.2500, -

In [59]:
# データローダーの作成

batch_size = 8

train_dataloader = DataLoader(
    train_dataset, batch_size=batch_size, shuffle=True)

val_dataloader = DataLoader(
    val_dataset, batch_size=batch_size, shuffle=False)

# 辞書オブジェクトにまとめる
dataloaders_dict = {"train": train_dataloader, "val": val_dataloader}

# 動作の確認
batch_iterator = iter(dataloaders_dict["val"])  # イタレータに変換
imges, anno_class_imges = next(batch_iterator)  # 1番目の要素を取り出す
print(imges.size())  # torch.Size([8, 3, 475, 475])
print(anno_class_imges.size())  # torch.Size([8, 3, 475, 475])

torch.Size([8, 3, 224, 224])
torch.Size([8, 224, 224])


In [60]:
try:
    generator = torch.load('./model/{}.pkl'.format(args.network))
    print("\n--------model restored--------\n")
except:
    print("\n--------model not restored--------\n")
    pass


--------model not restored--------



In [61]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNetUNet(n_class=2)
model = model.to(device)

# check keras-like model summary using torchsummary
from torchsummary import summary
summary(model, input_size=(3, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
            Conv2d-3         [-1, 64, 224, 224]          36,928
              ReLU-4         [-1, 64, 224, 224]               0
            Conv2d-5         [-1, 64, 112, 112]           9,408
            Conv2d-6         [-1, 64, 112, 112]           9,408
       BatchNorm2d-7         [-1, 64, 112, 112]             128
       BatchNorm2d-8         [-1, 64, 112, 112]             128
              ReLU-9         [-1, 64, 112, 112]               0
             ReLU-10         [-1, 64, 112, 112]               0
        MaxPool2d-11           [-1, 64, 56, 56]               0
        MaxPool2d-12           [-1, 64, 56, 56]               0
           Conv2d-13           [-1, 64, 56, 56]          36,864
           Conv2d-14           [-1, 64,

(tensor(28976386), tensor(28976386))

In [62]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

cuda:0


In [63]:
def dice_loss(pred, target, smooth = 1.):
    pred = pred.contiguous()
    target = target.contiguous()

    intersection = (pred * target).sum(dim=2).sum(dim=2)
    
    loss = (1 - ((2. * intersection + smooth) / (pred.sum(dim=2).sum(dim=2) + target.sum(dim=2).sum(dim=2) + smooth)))
    
    return loss.mean()

In [64]:
from collections import defaultdict
import torch.nn.functional as F

def calc_loss(pred, target, metrics, bce_weight=0.5):
    bce = F.binary_cross_entropy_with_logits(pred, target)

    pred = F.sigmoid(pred)
    dice = dice_loss(pred, target)

    loss = bce * bce_weight + dice * (1 - bce_weight)

    metrics['bce'] += bce.data.to(device).numpy() * target.size(0)
    metrics['dice'] += dice.data.to(device).numpy() * target.size(0)
    metrics['loss'] += loss.data.to(device).numpy() * target.size(0)

    return loss

def print_metrics(metrics, epoch_samples, phase):
    outputs = []
    for k in metrics.keys():
        outputs.append("{}: {:4f}".format(k, metrics[k] / epoch_samples))

    print("{}: {}".format(phase, ", ".join(outputs)))

def train_model(model, optimizer, scheduler, num_epochs=25):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 1e10

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        since = time.time()

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                scheduler.step()
                for param_group in optimizer.param_groups:
                    print("LR", param_group['lr'])

                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            metrics = defaultdict(float)
            epoch_samples = 0

            for inputs, labels in dataloaders_dict[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    loss = calc_loss(outputs, labels, metrics)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                epoch_samples += inputs.size(0)

            print_metrics(metrics, epoch_samples, phase)
            epoch_loss = metrics['loss'] / epoch_samples

            # deep copy the model
            if phase == 'val' and epoch_loss < best_loss:
                print("saving best model")
                best_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())

        time_elapsed = time.time() - since
        print('{:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))

    print('Best val loss: {:4f}'.format(best_loss))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [65]:
import torch
import torch.optim as optim
from torch.optim import lr_scheduler
import time
import copy

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

num_class = 2
model = ResNetUNet(num_class).to(device)

# freeze backbone layers
#for l in model.base_layers:
#    for param in l.parameters():
#        param.requires_grad = False

optimizer_ft = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)

exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=30, gamma=0.1)

model = train_model(model, optimizer_ft, exp_lr_scheduler, num_epochs=100)

cuda:0
Epoch 0/59
----------
LR 0.0001


ValueError: Target size (torch.Size([8, 224, 224])) must be the same as input size (torch.Size([8, 2, 224, 224]))

In [67]:
#Sizes of tensors must match except in dimension 1. Got 119 and 120 in dimension 2
#Target size (torch.Size([8, 224, 224])) must be the same as input size (torch.Size([8, 2, 224, 224]))