# Set up environment

**Import libraries**

In [1]:
from tensorflow.keras import layers
from tensorflow.keras import Sequential
from tensorflow.data import Dataset
from tensorflow.keras import callbacks
import numpy as np
import cv2
import time

In [2]:
def image_pooling(image, new_width, new_height, cvt_color):
    img =  cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_AREA)
    if cvt_color:
        return cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
    else:
      return img

def computeRollingVariance(square_sum, sum, num_elements):
    return (square_sum/num_elements-(sum/num_elements)**2)

In [3]:
def getVariances(video_path, frame_width=128, frame_height=128, target_frame_width=28, target_frame_height=28):
    """
    video_path is the file path to the input video
    Uses functions in the github

    Returns:
    (diff_variances, input_variances)
    """
    num_frame = 5
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(5)
    total_frames = int(cap.get(7))-num_frame+1
    print('Video loaded with', total_frames,'frames')

    diff_variances = np.zeros((total_frames,target_frame_height,target_frame_width),dtype=np.float64)
    input_variances = np.zeros((total_frames,target_frame_height,target_frame_width),dtype=np.float64)
    #Constants below
    input_frames = np.zeros((num_frame, frame_height, frame_width), dtype=np.float32)
    derivative1 = np.zeros((2, frame_height, frame_width), dtype=np.float32)
    cur_derivative2_corrected = np.zeros((frame_height, frame_width), dtype=np.float32)
    differences = np.zeros((num_frame, frame_height, frame_width), dtype=np.float32)
    start_frame = 0
    rotation_frequency = 1
    frequency_const = rotation_frequency*2*np.pi
    difference_sum = np.sum(differences,axis=0)
    difference_square_sum = np.sum(np.square(differences),axis=0)
    input_sum = np.sum(input_frames,axis=0)
    input_square_sum = np.sum(np.square(input_frames),axis=0)
    frame_num=0
    start_time = time.time()
    delta_time = 1
    started = False
    # Next part loops through each frame in video
    while True:
        ret, frame = cap.read()
        if ret:
            input_sum -= input_frames[start_frame]
            input_square_sum -= np.square(input_frames[start_frame])
            difference_sum -= differences[start_frame-1]
            difference_square_sum -= np.square(differences[start_frame-1])
            # read in image
            input_frames[start_frame] = image_pooling(
                frame, frame_width, frame_height, True)/255

            # compute first derivative
            derivative1[start_frame % 2] = (input_frames[start_frame]-input_frames[(start_frame-1)])/delta_time

            # compute second derivative and correct its coefficient
            cur_derivative2_corrected = (derivative1[start_frame % 2]-derivative1[(start_frame-1) % 2])/delta_time
            cur_derivative2_corrected /= frequency_const**2

            # compute difference between image and its second derivative. It's actually a +
            # because of the negative sign from differentiation
            differences[start_frame-1] = cur_derivative2_corrected + \
                input_frames[(start_frame-1)]

            # add in new variance of the newly read in image and newly computed difference
            input_sum += input_frames[start_frame]
            input_square_sum += input_frames[start_frame]**2
            difference_sum += differences[start_frame-1]
            difference_square_sum += differences[start_frame-1]**2

            # recompute variances
            input_variance = computeRollingVariance(input_square_sum,input_sum,num_frame)
            variances = computeRollingVariance(difference_square_sum,difference_sum,num_frame)
            # note this is only an estimation of variance, not the actual variance, which may be difficult
            # to evaluate on a rolling basis

            # scale down variance to ensure connectiveness
            diff_variances[frame_num] = image_pooling(
                variances, target_frame_width, target_frame_height, False)
            input_variances[frame_num] = image_pooling(
                input_variance, target_frame_width, target_frame_height, False)
            start_frame = (start_frame+1) % num_frame
        else:
            break
        if start_frame == 0 :
            started = True
        if started :
            frame_num += 1
        delta_time = time.time()-start_time
        start_time = time.time()
    cap.release()
    return diff_variances, input_variances

In [4]:
def variancesToDataset(diff_variance,input_variance,window_location, batch_size=16):
    """
    diff_variance and input_variance are three dimensional numpy arrays, with third dimension being frame number
    Before inputting into this function, stack every frame from all images to diff_variance and input_variance
    window_location is binary for whether or not there is a window at a pixel in a certain frame

    Returns:
    Dataset
    """
    features = np.transpose(np.array((diff_variance.flatten(),input_variance.flatten())))
    data = (features,window_location.flatten())
    ds = Dataset.from_tensor_slices(data)
    return ds.batch(batch_size).prefetch(1)

In [5]:
def get2D(diff_variances, input_variances):
    """
    diff_variance and input_variance are three dimensional numpy arrays, with third dimension being frame number
    Before inputting into this function, stack every frame from all images to diff_variance and input_variance
    
    Returns:
    2d numpy array with [variance, variance] as each row
    """
    return np.transpose(np.array((diff_variances.flatten(),input_variances.flatten())))

Preprocessing

In [7]:
diff1, input1 = getVariances('outdoor_window_Trim.mp4')
diff2, input2 = getVariances('indoor.avi')

Video loaded with 383 frames
Video loaded with 1990 frames


  if __name__ == '__main__':


In [18]:
features1 = get2D(diff1,input1)
features2 = get2D(diff2,input2)
features2 = features2[:features1.shape[0]]
features = np.concatenate((features1, features2))
features

array([[       nan, 0.00078811],
       [       nan, 0.00202113],
       [       nan, 0.00206045],
       ...,
       [       nan, 0.01316918],
       [       nan, 0.00713564],
       [       nan, 0.00045757]])

In [22]:
labels1 = np.ones(features1.shape)
labels2 = np.zeros(features2.shape)
labels = np.concatenate((labels1,labels2))

array([[1., 1.],
       [1., 1.],
       [1., 1.],
       ...,
       [1., 1.],
       [1., 1.],
       [1., 1.]])

In [19]:
model = Sequential([
    #tf.keras.layers.Dense(2, activation='relu', input_shape=(2,)),
    layers.Dense(1, activation='sigmoid',input_shape=(2,))
])

model.compile(optimizer='adam',loss='binary_crossentropy',metrics=["accuracy"])

In [20]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1)                 3         
Total params: 3
Trainable params: 3
Non-trainable params: 0
_________________________________________________________________


In [20]:
stopper = callbacks.EarlyStopping(monitor='val_accuracy',patience=5)
model.fit(
    features,
    np.zeros(features.shape[0]),
    batch_size=64, 
    epochs=100, 
    validation_split=0.2,
    callbacks=[stopper],
    verbose=2
)

Epoch 1/100
2107/2107 - 1s - loss: 0.1546 - accuracy: 1.0000 - val_loss: 0.0567 - val_accuracy: 1.0000
Epoch 2/100
2107/2107 - 1s - loss: 0.0327 - accuracy: 1.0000 - val_loss: 0.0174 - val_accuracy: 1.0000
Epoch 3/100
2107/2107 - 1s - loss: 0.0111 - accuracy: 1.0000 - val_loss: 0.0064 - val_accuracy: 1.0000
Epoch 4/100
2107/2107 - 1s - loss: 0.0042 - accuracy: 1.0000 - val_loss: 0.0025 - val_accuracy: 1.0000
Epoch 5/100
2107/2107 - 1s - loss: 0.0017 - accuracy: 1.0000 - val_loss: 9.9711e-04 - val_accuracy: 1.0000
Epoch 6/100
2107/2107 - 1s - loss: 6.7642e-04 - accuracy: 1.0000 - val_loss: 4.0092e-04 - val_accuracy: 1.0000


<tensorflow.python.keras.callbacks.History at 0x7f8edd249210>

In [21]:
model.save('simpleLogistic')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: simpleLogistic/assets


In [22]:
output = model.predict(features[:10])
output.flatten()

array([0.00146401, 0.00052735, 0.00065494, 0.00053045, 0.00186622,
       0.00169724, 0.00168318, 0.00253487, 0.00287482, 0.00096923],
      dtype=float32)