## インポートライブラリ

In [20]:
import numpy as np 
import pandas as pd

from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torchvision import transforms as T
import torchvision
import torch.nn.functional as F
from torch.autograd import Variable

import statistics

from PIL import Image
import cv2
import albumentations as A

import time
import os
from tqdm.notebook import tqdm

!pip install -q segmentation-models-pytorch
!pip install -q torchsummary

from torchsummary import summary
import segmentation_models_pytorch as smp

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Google driveマウント

In [21]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# パス設定

## 画像・モデルのパス指定


In [22]:
IMAGE_PATH = '/content/drive/MyDrive/Drone_TA/Fuehuki_Dataset/JPEGImages/'
MASK_PATH = '/content/drive/MyDrive/Drone_TA/Fuehuki_Dataset/SegmentationClass/'

In [23]:
MODEL_PATH = '/content/drive/MyDrive/Drone_TA/PreTrain_Model/drone_Trained.pth'

## 出力画像の保存先指定

In [24]:
SAVE_PATH = '/content/drive/MyDrive/Drone_TA/PreTrain_Model_Result/'

# データ設定

## データセットの分割

In [25]:
def create_df():
    name = []
    for dirname, _, filenames in os.walk(IMAGE_PATH):
        for filename in filenames:
            name.append(filename.split('.')[0])
    
    return pd.DataFrame({'id': name}, index = np.arange(0, len(name)))

df = create_df()
print('全画像数: ', len(df))

全画像数:  102


In [26]:
X_trainval, X_test = train_test_split(df['id'].values, test_size=0.1, random_state=19)#全データの10%をテストデータとしてランダムに分離
X_train, X_val = train_test_split(X_trainval, test_size=0.15, random_state=19)#全データの13.5%を検証データ,残りを訓練データとしてランダムに分離

print('Train Size   : ', len(X_train))
print('Val Size     : ', len(X_val))
print('Test Size    : ', len(X_test))

Train Size   :  77
Val Size     :  14
Test Size    :  11


## アノテーションの色の定義

In [27]:
mapping = {(150, 143, 9): 1,
           (128, 64, 128): 0,
           (0, 76, 130): 0,
           (0, 102, 0): 0,
           (87, 103, 112): 0,
           (168, 42, 28): 0,
           (30, 41, 48): 0,
           (89, 50, 0): 0,
           (35, 142, 107): 0,
           (70, 70, 70): 0,
           (156, 102, 102): 0,
           (12, 228, 254): 0,
           (12, 148, 254): 0,
           (153, 153, 190): 0,
           (153, 153, 153): 0,
           (96, 22, 255): 0,
           (0, 51, 102): 0,
           (32, 11, 119): 0,
           (0, 51, 51): 0,
           (190, 250, 190): 0,
           (146, 150, 112): 0,
           (115, 135, 2): 0,
           (0, 0, 255): 0,
           (0, 0, 0): 0,
            }        

## mappingのkeyとvalueの反転(visualize()で使用)

In [28]:
reverse_mapping = {1: (150, 143, 9),
                   2: (0, 0, 0),
                   3: (0, 0, 0),
                   4: (0, 0, 0),
                   5: (0, 0, 0),
                   6: (0, 0, 0),
                   7: (0, 0, 0),
                   8: (0, 0, 0),
                   9: (0, 0, 0),
                   10: (0, 0, 0),
                   11: (0, 0, 0),
                   12: (0, 0, 0),
                   13: (0, 0, 0),
                   14: (0, 0, 0),
                   15: (0, 0, 0),
                   16: (0, 0, 0),
                   17: (0, 0, 0),
                   18: (0, 0, 0),
                   19: (0, 0, 0),
                   20: (0, 0, 0),
                   21: (0, 0, 0),
                   22: (0, 0, 0),
                   23: (0, 0, 0),
                   0: (0, 0, 0),
            }      

## 3次元アノテーションデータを2次元データに変換(mappingに定義されたRGB値からvalue値に変換)

In [29]:
def onehot_encoding(target):
  h = target.shape[0]
  w = target.shape[1]
  target = target.permute(2,0,1).contiguous()
  mask = torch.empty(h, w, dtype=torch.long)
  
  for k in mapping:
    idx = (target==torch.tensor(k, dtype=torch.uint8).unsqueeze(1).unsqueeze(2))
    validx = (idx.sum(0) == 3)  
    mask[validx] = torch.tensor(mapping[k], dtype=torch.long)
  
  return mask

## 2次元の出力を3次元のRGB画像に変換(value値からmappingに定義されたRGB値に変換)

In [30]:
def visualize(temp):
    r = temp.copy()
    g = temp.copy()
    b = temp.copy()
    for l in range(0,len(reverse_mapping)):
        r[temp==l]=reverse_mapping[l][0]
        g[temp==l]=reverse_mapping[l][1]
        b[temp==l]=reverse_mapping[l][2]

    rgb = np.zeros((temp.shape[1], temp.shape[2],3))

    rgb[:,:,0] = (r)
    rgb[:,:,1] = (g)
    rgb[:,:,2] = (b)
    return rgb

## テスト時のデータセットの定義


In [31]:
class DroneTestDataset(Dataset):
    
    def __init__(self, img_path, mask_path, X, transform=None):
        self.img_path = img_path
        self.mask_path = mask_path
        self.X = X
        self.transform = transform
      
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        img = cv2.imread(self.img_path + self.X[idx] + '.jpg')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        mask = cv2.imread(self.mask_path + self.X[idx] + '.png')
        filename = str(self.X[idx])
        
        if self.transform is not None:
            aug = self.transform(image=img, mask=mask)
            img = Image.fromarray(aug['image'])
            mask = aug['mask']
        
        if self.transform is None:
            img = Image.fromarray(img)
        
        mask = torch.from_numpy(mask).long()

        mask = onehot_encoding(mask)
        
        return img, mask, filename

## データセットの変形(Data Augmentation)


In [32]:
t_test = A.Resize(768, 1152, interpolation=cv2.INTER_NEAREST)
test_set = DroneTestDataset(IMAGE_PATH, MASK_PATH, X_test, transform=t_test)

# モデルのロード＆事前学習済みのパラメータのロード

In [33]:
model = smp.Unet('mobilenet_v2', encoder_weights='imagenet', classes=24, activation=None, encoder_depth=5, decoder_channels=[256, 128, 64, 32, 16])
model.load_state_dict(torch.load(MODEL_PATH))

<All keys matched successfully>

# テスト設定

## 検証時の精度評価関数の定義

In [34]:
def pixel_accuracy(output, mask):
    with torch.no_grad():
        output = torch.argmax(F.softmax(output, dim=1), dim=1)
        correct = torch.eq(output, mask).int()
        accuracy = float(correct.sum()) / float(correct.numel())
    return accuracy

def mIoU(pred_mask, mask, smooth=1e-10, n_classes=23):
    with torch.no_grad():
        pred_mask = F.softmax(pred_mask, dim=1)
        pred_mask = torch.argmax(pred_mask, dim=1)
        pred_mask = pred_mask.contiguous().view(-1)
        mask = mask.contiguous().view(-1)

        iou_per_class = []
        for clas in range(0, n_classes): #loop per pixel class
            true_class = pred_mask == clas
            true_label = mask == clas

            if true_label.long().sum().item() == 0: #no exist label in this loop
                iou_per_class.append(np.nan)
            else:
                intersect = torch.logical_and(true_class, true_label).sum().float().item()
                union = torch.logical_or(true_class, true_label).sum().float().item()

                iou = (intersect + smooth) / (union +smooth)
                iou_per_class.append(iou)
        return np.nanmean(iou_per_class)

In [35]:
def predict_image_mask_miou(model, image, mask, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
    model.eval()
    t = T.Compose([T.ToTensor(), T.Normalize(mean, std)])
    image = t(image)
    model.to(device); image=image.to(device)
    mask = mask.to(device)
    with torch.no_grad():
        
        image = image.unsqueeze(0)
        mask = mask.unsqueeze(0)
        
        output = model(image)
        score = mIoU(output, mask)
        masked = torch.argmax(output, dim=1)
        masked = masked.cpu().squeeze(0)
    return masked, score

def predict_image_mask_pixel(model, image, mask, mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]):
    model.eval()
    t = T.Compose([T.ToTensor(), T.Normalize(mean, std)])
    image = t(image)
    model.to(device); image=image.to(device)
    mask = mask.to(device)
    with torch.no_grad():
        
        image = image.unsqueeze(0)
        mask = mask.unsqueeze(0)
        
        output = model(image)
        acc = pixel_accuracy(output, mask)
        masked = torch.argmax(output, dim=1)
        masked = masked.cpu().squeeze(0)
    return masked, acc

In [36]:
def miou_score(model, test_set):
    score_iou = []
    for i in tqdm(range(len(test_set))):
        img, mask ,filename= test_set[i]
        pred_mask, score = predict_image_mask_miou(model, img, mask)
        pred_mask = pred_mask.cpu().numpy().copy()
        pred_mask = pred_mask.reshape(1,pred_mask.shape[0],pred_mask.shape[1])
        cv2.imwrite(os.path.join(SAVE_PATH,filename + ".png"), visualize(pred_mask),[cv2.IMWRITE_PNG_COMPRESSION,9])#ここで画像を保存
        score_iou.append(score)
    mean_iou = statistics.mean(score_iou)
    return mean_iou

def pixel_acc(model, test_set):
    accuracy = []
    for i in tqdm(range(len(test_set))):
        img, mask,filename = test_set[i]
        pred_mask, acc = predict_image_mask_pixel(model, img, mask)
        accuracy.append(acc)
    mean_acc = statistics.mean(accuracy)
    return mean_acc


## Testの実行

In [37]:
mob_miou = miou_score(model, test_set)
print("miou: " + str(mob_miou))
mob_acc = pixel_acc(model, test_set)
print("acc: " + str(mob_acc))

  0%|          | 0/11 [00:00<?, ?it/s]

miou: 1.335274512685649e-14


  0%|          | 0/11 [00:00<?, ?it/s]

acc: 0.0
