In [1]:
from keras import backend as K
from keras.models import load_model
from keras.preprocessing import image
from keras.optimizers import Adam
from imageio import imread
import numpy as np
from matplotlib import pyplot as plt
import cv2
import time

from models.keras_ssd300 import ssd_300
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from keras_layers.keras_layer_L2Normalization import L2Normalization

from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms

%matplotlib inline

img_height = 300
img_width = 300

K.clear_session() 

model = ssd_300(image_size=(img_height, img_width, 3),
                n_classes=1,
                mode='inference',
                l2_regularization=0.0005,
                scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], 
                aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5]],
                two_boxes_for_ar1=True,
                steps=[8, 16, 32, 64, 100, 300],
                offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                clip_boxes=False,
                variances=[0.1, 0.1, 0.2, 0.2],
                normalize_coords=True,
                subtract_mean=[123, 117, 104],
                swap_channels=[2, 1, 0],
                confidence_thresh=0.5,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400)

#학습된 weight의 경로를 지정
weights_path = 'ssd300_pascal_07+12_epoch-04_loss-3.0387_val_loss-3.5244_weight.h5'

model.load_weights(weights_path, by_name=True)

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

Using TensorFlow backend.


In [15]:
#영상의 경로를 지정하고 프레임 캡쳐
#나중에는 비디오 캡쳐를 함과 동시에 input_images리스트에 곧바로 넣어버려서, 불필요한 이미지 입출력 과정을 줄이자
vidcap = cv2.VideoCapture('C:\\Users\\user\Desktop\\video.mp4')
success,imagefile = vidcap.read()
count = 0
while success:
    if(count%5==0):
#프레임 캡쳐를 저장할 경로
        cv2.imwrite("C:\\Users\\user\Desktop\\frames\\frame%d.jpg" % count, imagefile)    
    success,imagefile = vidcap.read()
    count += 1

orig_images = [] 
input_images = [] 

# range는 추후 변수를 사용할 수 있도록 변경
for i in range(0,320):
    if(i%5==0):
#프레임 캡쳐를 불러오는 경로
        img_path = 'C:\\Users\\user\Desktop\\frames\\frame%d.jpg'%i
        #print(img_path)
        orig_images.append(imread(img_path))
        img = image.load_img(img_path, target_size=(img_height, img_width))
        img = image.img_to_array(img)
        img = np.array(img)
        input_images.append(img)
        
input_images = np.array(input_images)
orig_images = np.array(orig_images)

num_of_frames = 16
counting = 0
saving_bounding_boxes = []

print("Predicted boxes:\n")
print('   class   conf xmin   ymin   xmax   ymax')

# range는 추후 변수를 사용할 수 있도록 변경
for i in range(0, 4):
    y_pred = model.predict(input_images[i*num_of_frames:i*num_of_frames + num_of_frames])
    confidence_threshold = 0.4

    y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]
    np.set_printoptions(precision=2, suppress=True, linewidth=90)

    for j in range(0, num_of_frames):
        print('frame :',counting)
#        print(y_pred_thresh[j])
        for box in y_pred_thresh[j]:
            # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions.
            xmin = box[2] * orig_images[0].shape[1] / img_width
            ymin = box[3] * orig_images[0].shape[0] / img_height
            xmax = box[4] * orig_images[0].shape[1] / img_width
            ymax = box[5] * orig_images[0].shape[0] / img_height
            print('xmin : ',xmin, '  ymin : ',ymin, '  xmax : ',xmax, '  ymax : ',ymax)
            # 균열이 탐지된 프레임과 b-box 정보가 saving_bounding_boxes <- 여기에 저장됨
            saving_bounding_boxes.append([counting, xmin,ymin,xmax,ymax])
        counting += 5
            
'''
3. (여기서부터 핵심) skeleton line상의 각 픽셀들에 어떻게 접근할 것인가?
4. 각 픽셀들에 접근했다면, 몇 픽셀 간격으로 이동할 것이며, 몇 픽셀 떨어진 픽셀을 방향 설정의 기준으로 잡을 것인가?
5. 방향 설정까지 했다면, 그 방향에서 edge가 몇 픽셀 떨어져 있는지를 재고, 각 skeleton line상의 pixel에 그 정보를 저장하자!
6. 거기서 max값을 return하자
'''
'''
1. 어떻게 skeletion의 direction을 추출할 것인가 -> 가장 정확한 방법을 찾아야 함 -> 잘못 세면 overestimation 되기 십상
2. direction만 잘 잡는다면 width를 계산하는 것은 오히려 어렵지 않다
3. skeleton을 추출한 이미지와 edge를 추출한 이미지에 함께 접근해가며 픽셀마다 width를 매기면 됨
4. 여기서 수 많은 width들 중 최종 width는 어떻게 정할 것인지가 또 다른 문제가 됨
'''

Predicted boxes:

   class   conf xmin   ymin   xmax   ymax
frame : 0
frame : 5
frame : 10
frame : 15
frame : 20
frame : 25
frame : 30
frame : 35
frame : 40
frame : 45
frame : 50
frame : 55
frame : 60
frame : 65
frame : 70
frame : 75
frame : 80
frame : 85
frame : 90
frame : 95
frame : 100
xmin :  -3.7950758934020996   ymin :  11.739411900838217   xmax :  841.7351481119791   ymax :  216.10314778645832
frame : 105
frame : 110
frame : 115
xmin :  -9.302658081054688   ymin :  10.951525688171387   xmax :  834.99740234375   ymax :  211.03209197998046
frame : 120
xmin :  -7.218521931966146   ymin :  14.727643133799235   xmax :  831.1311800130209   ymax :  208.32061381022135
frame : 125
frame : 130
xmin :  0.18059635162353516   ymin :  14.443070087432861   xmax :  834.390283203125   ymax :  197.7364427693685
frame : 135
frame : 140
frame : 145
xmin :  -0.3447403907775879   ymin :  18.943992462158203   xmax :  834.871630859375   ymax :  201.02261678059895
frame : 150
xmin :  3.0437536239624023 

'\n1. 여기서 리포트 된 픽셀정보를 바탕으로 각 frame들을 Crop\n2. 각 cropped image들에 대해 sauvola, skeletonize, canny edge 적용\n3. (여기서부터 핵심) skeleton line상의 각 픽셀들에 어떻게 접근할 것인가?\n4. 각 픽셀들에 접근했다면, 몇 픽셀 간격으로 이동할 것이며, 몇 픽셀 떨어진 픽셀을 방향 설정의 기준으로 잡을 것인가?\n5. 방향 설정까지 했다면, 그 방향에서 edge가 몇 픽셀 떨어져 있는지를 재고, 각 skeleton line상의 pixel에 그 정보를 저장하자!\n6. 거기서 max값을 return하자\n'

In [18]:
from skimage import io

cropped_frames = []

for i in range(0, len(saving_bounding_boxes)):
    frame_count = saving_bounding_boxes[i][0]//5
    frame = orig_images[frame_count]
    if(saving_bounding_boxes[i][1] < 0):
        saving_bounding_boxes[i][1] = 0
    xmin = int(saving_bounding_boxes[i][1])
    ymin = int(saving_bounding_boxes[i][2])
    xmax = int(saving_bounding_boxes[i][3])
    ymax = int(saving_bounding_boxes[i][4])
    print(xmin,ymin,xmax,ymax)
    cropped_frame = orig_images[frame_count][ymin:ymax, xmin:xmax, :]
    cropped_frame = cropped_frame.astype('uint8')
    img_path = '../../Desktop/test/%d.jpg'%frame_count
    print(img_path)
    cropped_frames.append(cropped_frame)
    io.imsave(img_path, cropped_frame)

0 11 841 216
../../Desktop/test/20.jpg
0 10 834 211
../../Desktop/test/23.jpg
0 14 831 208
../../Desktop/test/24.jpg
0 14 834 197
../../Desktop/test/26.jpg
0 18 834 201
../../Desktop/test/29.jpg
3 21 840 204
../../Desktop/test/30.jpg
2 21 838 205
../../Desktop/test/31.jpg
12 23 842 208
../../Desktop/test/32.jpg
8 16 839 198
../../Desktop/test/33.jpg
0 21 832 201
../../Desktop/test/34.jpg
0 19 839 206
../../Desktop/test/35.jpg
0 22 836 208
../../Desktop/test/36.jpg
0 16 827 195
../../Desktop/test/37.jpg
19 26 831 209
../../Desktop/test/39.jpg
0 27 838 218
../../Desktop/test/41.jpg
6 21 845 218
../../Desktop/test/42.jpg
9 22 848 215
../../Desktop/test/43.jpg
7 22 846 214
../../Desktop/test/44.jpg
4 21 847 221
../../Desktop/test/45.jpg
1 21 843 207
../../Desktop/test/46.jpg
7 20 846 220
../../Desktop/test/47.jpg
11 23 845 199
../../Desktop/test/48.jpg
10 26 841 208
../../Desktop/test/49.jpg
50 34 861 779
../../Desktop/test/58.jpg
42 35 842 787
../../Desktop/test/59.jpg
24 42 804 791
../..

In [28]:
# 1. Image binarization(Sauvola's method) using Pw and Pl, respectively
# 오래 걸리는 문제가 있음(150초 소요)

import time
import matplotlib
import matplotlib.pyplot as plt
import cv2
from skimage import io
from skimage import data
from skimage.color import rgb2gray
from skimage.data import page
from skimage.filters import (threshold_sauvola)
from PIL import Image

start_time = time.time() 
matplotlib.rcParams['font.size'] = 9

sauvola_frames_Pw_bw = []
sauvola_frames_Pl_bw = []
sauvola_frames_Pw = []
sauvola_frames_Pl = []

# Upload the image
for i in range(0,len(cropped_frames)):
    img = cropped_frames[i]
    img_gray = rgb2gray(img)

    # 논문에선 각각 70,180이었으나 여기선 홀수 input만 가능
    window_size_Pw = 71
    window_size_Pl = 181
    thresh_sauvola_Pw = threshold_sauvola(img_gray, window_size=window_size_Pw, k=0.42)
    thresh_sauvola_Pl = threshold_sauvola(img_gray, window_size=window_size_Pl, k=0.18)

    #Below are the converted images through Sauvola's method.
    # _bw will contain 0 or 1, not true or false. bw means black or white.
    binary_sauvola_Pw = img_gray > thresh_sauvola_Pw
    binary_sauvola_Pl = img_gray > thresh_sauvola_Pl
    binary_sauvola_Pw_bw = img_gray > thresh_sauvola_Pw
    binary_sauvola_Pl_bw = img_gray > thresh_sauvola_Pl

    binary_sauvola_Pw_bw.dtype = 'uint8'
    binary_sauvola_Pl_bw.dtype = 'uint8'

    binary_sauvola_Pw_bw *= 255
    binary_sauvola_Pl_bw *= 255
    
    sauvola_frames_Pw_bw.append(binary_sauvola_Pw_bw)
    sauvola_frames_Pl_bw.append(binary_sauvola_Pl_bw)
    sauvola_frames_Pw.append(binary_sauvola_Pw)
    sauvola_frames_Pl.append(binary_sauvola_Pl) 
    
    img_path_Pw = '../../Desktop/Sauvola/Sauvola_Pw_%d.jpg'%i
    img_path_Pl = '../../Desktop/Sauvola/Sauvola_Pl_%d.jpg'%i
    
    io.imsave(img_path_Pw, binary_sauvola_Pw_bw)
    io.imsave(img_path_Pl, binary_sauvola_Pl_bw)

print("start_time", start_time)
print("--- %s seconds ---" %(time.time() - start_time))

  cropped = ar[slices]


start_time 1537522386.5903513
--- 149.27273297309875 seconds ---


"\n    plt.figure(figsize=(8, 7))\n    plt.subplot(2, 2, 1)\n    plt.imshow(img_gray, cmap=plt.cm.gray)\n    plt.title('Original')\n    plt.axis('off')\n\n    plt.subplot(2, 2, 4)\n    plt.imshow(binary_sauvola_Pw_bw, cmap=plt.cm.gray)\n    plt.title('Sauvola Threshold for Pw')\n    plt.axis('off')\n\n    plt.show()\n\n    plt.subplot(2, 2, 4)\n    plt.imshow(binary_sauvola_Pl_bw, cmap=plt.cm.gray)\n    plt.title('Sauvola Threshold for Pl')\n    plt.axis('off')\n\n    plt.show()\n"

In [30]:
# 2. Extract the skeletons of each images

from skimage.morphology import skeletonize
from skimage.util import invert

skeleton_frames_Pw = []
skeleton_frames_Pl = []

for i in range(0,len(cropped_frames)):
# Invert the binarized images
    img_Pw = invert(sauvola_frames_Pw[i])
    img_Pl = invert(sauvola_frames_Pl[i])

    # Below are skeletonized images
    skeleton_Pw = skeletonize(img_Pw)
    skeleton_Pl = skeletonize(img_Pl)

    # Convert true/false to 1/0 to save it as image
    skeleton_Pw.dtype = 'uint8'
    skeleton_Pl.dtype = 'uint8'

    skeleton_Pw *= 255
    skeleton_Pl *= 255

    skeleton_frames_Pw.append(skeleton_Pw)
    skeleton_frames_Pl.append(skeleton_Pl)   
    
    img_path_Pw = "../../Desktop/Skeleton/skeleton_Pw_%d.jpg"%i
    img_path_Pl = "../../Desktop/Skeleton/skeleton_Pl_%d.jpg"%i
    io.imsave(img_path_Pw, skeleton_Pw)
    io.imsave(img_path_Pl, skeleton_Pl)
    

  warn('%s is a low contrast image' % fname)
  warn('%s is a low contrast image' % fname)
  warn('%s is a low contrast image' % fname)
  warn('%s is a low contrast image' % fname)
  warn('%s is a low contrast image' % fname)


In [36]:
# 3. Detect the edges of each images
### edge detection 할 때, 좋은 parameter를 찾아야 한다. 지금은 edge가 너무 두꺼움 (overestimation됨) ###
import numpy as np
from scipy import ndimage as ndi
from skimage import feature

edges_frames_Pw = []
edges_frames_Pl = []

for i in range(0,len(cropped_frames)):
    # Compute the Canny filter for two values of sigma
    # canny(image, sigma=1.0, low_threshold=None, high_threshold=None, mask=None, use_quantiles=False)
    # sigma가 1이었으나, 0.1로 조정하여 실제 균열 edge와 거의 같게 만듦.
    # 정확도에서 문제가 생긴다면 1. skeleton의 방향 설정 방법을 바꾸던가, 2. 여기서 시그마 값을 살짝 늘리거나 줄여가면서 정확도를 테스트 해볼 것
    edges_Pw = feature.canny(sauvola_frames_Pw[i], 0.09)
    edges_Pl = feature.canny(sauvola_frames_Pl[i], 0.09)

    edges_Pw.dtype = 'uint8'
    edges_Pl.dtype = 'uint8'

    edges_Pw *= 255
    edges_Pl *= 255

    edges_frames_Pw.append(edges_Pw)
    edges_frames_Pl.append(edges_Pl)
    
    img_path_Pw = "../../Desktop/edges/edges_Pw_%d.jpg"%i
    img_path_Pl = "../../Desktop/edges/edges_Pl_%d.jpg"%i
    
    io.imsave(img_path_Pw, edges_Pw)
    io.imsave(img_path_Pl, edges_Pl)


In [40]:
np.set_printoptions(threshold=np.nan)
#print(edges_frames_Pw[0])
#edges_frames_Pw[0]

[[  0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0   0   0   0   0   0   0   0   0   0   0   0 

    0]]
