In [1]:
import numpy as np
import cv2
import imageio
import scipy
import scipy.misc
import scipy.signal
import matplotlib
import matplotlib.pyplot as plt
from darkflow.net.build import TFNet
import os
import json

In [2]:
ACTIVATE_IMAGE_ENHANCEMENT = False

In [3]:
def boxing(original_img, predictions):
    # return a new image with a bounding box over the original image using
    # coordinates in predictions
    newImage = np.copy(original_img)

    for result in predictions:
        top_x = result['topleft']['x']
        top_y = result['topleft']['y']

        btm_x = result['bottomright']['x']
        btm_y = result['bottomright']['y']

        confidence = result['confidence']
        label = result['label'] + " " + str(round(confidence, 3))

        if confidence > 0.3:
            newImage = cv2.rectangle(
                newImage, (top_x, top_y), (btm_x, btm_y), (255, 0, 0), 3)
            newImage = cv2.putText(
                newImage,
                label,
                (top_x, top_y - 5),
                cv2.FONT_HERSHEY_COMPLEX_SMALL,
                0.8,
                (0, 230, 0),
                1,
                cv2.LINE_AA)

    return newImage

In [4]:
options = {"model": "cfg/yolo.cfg",
           "load": "bin/yolo.weights",
           "threshold": 0.3,
           "gpu": 0.0,
           "labels": "labels.txt"}

tfnet = TFNet(options)

Parsing ./cfg/yolo.cfg
Parsing cfg/yolo.cfg
Loading bin/yolo.weights ...
Successfully identified 203934260 bytes
Finished in 1.2303831577301025s
Model has a coco model name, loading coco labels.

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
Instructions for updating:
Colocations handled automatically by placer.
       |        | input                            | (?, 608, 608, 3)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 608, 608, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 304, 304, 32)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 304, 304, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep

# Image Pre-processing

In [5]:
"""
CAIP2017 - 17th international Conference on Computer Analysis of Images and Patterns
 
A New Image Contrast Enhancement Algorithm using Exposure Fusion Framework

Ying, Zhenqiang & Li, Ge & Ren, Yurui & Wang, Ronggang & Wang, Wenmin. (2017). 36-46. 10.1007/978-3-319-64698-5_4. 
"""
def computeTextureWeights(fin, sigma, sharpness):
    dt0_v = np.vstack((np.diff(fin, n=1, axis=0), fin[0, :] - fin[-1, :]))
    dt0_h = np.vstack((np.diff(fin, n=1, axis=1).conj().T,
                       fin[:, 0].conj().T - fin[:, -1].conj().T)).conj().T

    gauker_h = scipy.signal.convolve2d(dt0_h, np.ones((1, sigma)), mode='same')
    gauker_v = scipy.signal.convolve2d(dt0_v, np.ones((sigma, 1)), mode='same')

    W_h = 1 / (np.abs(gauker_h) * np.abs(dt0_h) + sharpness)
    W_v = 1 / (np.abs(gauker_v) * np.abs(dt0_v) + sharpness)

    return W_h, W_v


def solveLinearEquation(IN, wx, wy, lamda):
    [r, c] = IN.shape
    k = r * c
    dx = -lamda * wx.flatten('F')
    dy = -lamda * wy.flatten('F')
    tempx = np.roll(wx, 1, axis=1)
    tempy = np.roll(wy, 1, axis=0)
    dxa = -lamda * tempx.flatten('F')
    dya = -lamda * tempy.flatten('F')
    tmp = wx[:, -1]
    tempx = np.concatenate((tmp[:, None], np.zeros((r, c - 1))), axis=1)
    tmp = wy[-1, :]
    tempy = np.concatenate((tmp[None, :], np.zeros((r - 1, c))), axis=0)
    dxd1 = -lamda * tempx.flatten('F')
    dyd1 = -lamda * tempy.flatten('F')

    wx[:, -1] = 0
    wy[-1, :] = 0
    dxd2 = -lamda * wx.flatten('F')
    dyd2 = -lamda * wy.flatten('F')

    Ax = scipy.sparse.spdiags(np.concatenate(
        (dxd1[:, None], dxd2[:, None]), axis=1).T, np.array([-k + r, -r]), k, k)
    Ay = scipy.sparse.spdiags(np.concatenate(
        (dyd1[None, :], dyd2[None, :]), axis=0), np.array([-r + 1, -1]), k, k)
    D = 1 - (dx + dy + dxa + dya)
    A = ((Ax + Ay) + (Ax + Ay).conj().T + scipy.sparse.spdiags(D, 0, k, k)).T

    tin = IN[:, :]
    tout = scipy.sparse.linalg.spsolve(A, tin.flatten('F'))
    OUT = np.reshape(tout, (r, c), order='F')

    return OUT


def tsmooth(img, lamda=0.01, sigma=3.0, sharpness=0.001):
    I = cv2.normalize(img.astype('float64'), None, 0.0, 1.0, cv2.NORM_MINMAX)
    x = np.copy(I)
    wx, wy = computeTextureWeights(x, sigma, sharpness)
    S = solveLinearEquation(I, wx, wy, lamda)
    return S


def rgb2gm(I):
    if (I.shape[2] == 3):
        I = cv2.normalize(I.astype('float64'), None, 0.0, 1.0, cv2.NORM_MINMAX)
        I = (I[:, :, 0] * I[:, :, 1] * I[:, :, 2])**(1 / 3)

    return I


def applyK(I, k, a=-0.3293, b=1.1258):
    def f(x): return np.exp((1 - x**a) * b)
    beta = f(k)
    gamma = k**a
    J = (I**gamma) * beta
    return J


def entropy(X):
    tmp = X * 255
    tmp[tmp > 255] = 255
    tmp[tmp < 0] = 0
    tmp = tmp.astype(np.uint8)
    _, counts = np.unique(tmp, return_counts=True)
    pk = np.asarray(counts)
    pk = 1.0 * pk / np.sum(pk, axis=0)
    S = -np.sum(pk * np.log2(pk), axis=0)
    return S


def maxEntropyEnhance(I, isBad, a=-0.3293, b=1.1258):
    # Esatimate k
    tmp = cv2.resize(I, (50, 50), interpolation=cv2.INTER_AREA)
    tmp[tmp < 0] = 0
    tmp = tmp.real
    Y = rgb2gm(tmp)

    isBad = isBad * 1
    isBad = scipy.misc.imresize(isBad, (50, 50), interp='bicubic', mode='F')
    isBad[isBad < 0.5] = 0
    isBad[isBad >= 0.5] = 1
    Y = Y[isBad == 1]

    if Y.size == 0:
        J = I
        return J

    def f(k): return -entropy(applyK(Y, k))
    opt_k = scipy.optimize.fminbound(f, 1, 7)

    # Apply k
    J = applyK(I, opt_k, a, b) - 0.01
    return J


def Ying_2017_CAIP(img, mu=0.5, a=-0.3293, b=1.1258):
    lamda = 0.5
    sigma = 5
    I = cv2.normalize(img.astype('float64'), None, 0.0, 1.0, cv2.NORM_MINMAX)

    # Weight matrix estimation
    t_b = np.max(I, axis=2)
    t_our = cv2.resize(
        tsmooth(
            scipy.misc.imresize(
                t_b,
                0.5,
                interp='bicubic',
                mode='F'),
            lamda,
            sigma),
        (t_b.shape[1],
         t_b.shape[0]),
        interpolation=cv2.INTER_AREA)

    # Apply camera model with k(exposure ratio)
    isBad = t_our < 0.5
    J = maxEntropyEnhance(I, isBad)

    # W: Weight Matrix
    t = np.zeros((t_our.shape[0], t_our.shape[1], I.shape[2]))
    for i in range(I.shape[2]):
        t[:, :, i] = t_our
    W = t**mu

    I2 = I * W
    J2 = J * (1 - W)

    result = I2 + J2
    result = result * 255
    result[result > 255] = 255
    result[result < 0] = 0
    return result.astype(np.uint8)

In [6]:
# Image Enhancement using CAIP2017 Algorithm Above
if ACTIVATE_IMAGE_ENHANCEMENT:
    DATA_DIR = 'data/'
    RESULTS_DIR = 'processed_imgs/'
    for filename in os.listdir(DATA_DIR):
        print("Processing ", filename)
        src = DATA_DIR + filename
        dest = RESULTS_DIR + 'processed_' + filename
        img = imageio.imread(src)
        result = Ying_2017_CAIP(img)
        plt.imsave(dest, result)

# Inference on Images

In [7]:
if ACTIVATE_IMAGE_ENHANCEMENT:
    DATA_DIR = 'processed_imgs/'
else:
    DATA_DIR = 'data/'

RESULTS_DIR = 'results/'

for filename in os.listdir(DATA_DIR):
    print(filename)
    src = DATA_DIR + filename
    dest = RESULTS_DIR + 'boxed_image_' + filename

    img = cv2.imread(src)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    results = tfnet.return_predict(img)

    boxed_image = boxing(img, results)

    with open(RESULTS_DIR + 'result_' + filename.split('.png')[0] + '.txt', 'w') as f:
        json.dump(str(results), f)

    plt.imsave(dest, boxed_image)

8.png
9.png
14.png
15.png
17.png
16.png
12.png
13.png
11.png
10.png
21.png
20.png
22.png
23.png
18.png
24.png
25.png
19.png
4.png
5.png
7.png
6.png
2.png
3.png
1.png
0.png


# Inference on Video

In [None]:
# Inference on Video 
cap = cv2.VideoCapture('./video_data/test_video.mp4')
width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

fourcc = cv2.VideoWriter_fourcc(*'DIVX')
out = cv2.VideoWriter('./video_results/output_test_video.avi',
                      fourcc, 20.0, (int(width), int(height)))

while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()

    if ret:
        frame = np.asarray(frame)
        results = tfnet.return_predict(frame)

        new_frame = boxing(frame, results)

        # Display the resulting frame
        out.write(new_frame)
        # cv2.imshow('frame',new_frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        break

# When everything done, release the capture
cap.release()
out.release()
cv2.destroyAllWindows()