### Manage Imports

In [3]:
import keras
%matplotlib inline
from matplotlib import pyplot as plt
import os
import cv2
import re
import numpy as np
from sklearn import svm, metrics
from keras.preprocessing.image import img_to_array, load_img
from keras.applications import VGG16
from keras import layers, models, optimizers
keras.__version__

Using TensorFlow backend.


'2.2.2'

### Globals

In [4]:
folder = "data" 
users = ["alex", "ben", "miao", "natasha", "nick", "sarah", "sean", "spencer", "tim", "yijun"]
mats = ["cloth", "concrete", "door", "drywall", "laminant", "whiteboard"]
pressures = ["hard", "soft"]
frames_dirs = ["frames", "swipe_frames"]

AUG_FACTOR = 8
IMAGE_SIZE = 128
FRAMES_PER_VID = 8
EPOCHS = 30

# Single Frame Classification

### Load Data

In [None]:
cnn_results = np.zeros((len(frames_dirs), len(mats), 2))
for user in ["alex"]:
    for fd in range(len(frames_dirs)):
        for m in range(len(mats)):
    
            # training data consists of swipes from all other users (for mats of interest)
            files = []
            training_users = [u for u in users if u != user]
            for train_user in training_users:
                for pres in pressures:
                    folder = "data/"+train_user+"/"+frames_dirs[fd]+"/"+mats[m]+"/"+pres+"/"
                    files.extend([folder+f for f in os.listdir(folder)])
            files.sort() # only keeps video frames in order if < 10 frames per video

            # create single arrays to store data (use 224x224 for VGGNet)
            X_train = np.ndarray(shape=(len(files)*AUG_FACTOR, IMAGE_SIZE, IMAGE_SIZE), dtype=np.float32)
            y_train = np.ndarray(shape=(len(files)*AUG_FACTOR, 1), dtype=np.float32)

            # load in training data from images into normalized array
            n = 0
            for f in files:
                # read in, normalize, resize for VGGNet
                x = cv2.resize(cv2.imread(f, cv2.IMREAD_GRAYSCALE), (IMAGE_SIZE, IMAGE_SIZE))
                X_train[n] = x / 255.0

                # perform data augmentation
                X_train[n+1] = np.fliplr(X_train[n])
                X_train[n+2] = np.rot90(X_train[n])
                X_train[n+3] = np.fliplr(X_train[n+2])
                X_train[n+4] = np.rot90(X_train[n+2])
                X_train[n+5] = np.fliplr(X_train[n+4])
                X_train[n+6] = np.rot90(X_train[n+4])
                X_train[n+7] = np.fliplr(X_train[n+6])

                # assign labels
                if "/hard/" in f:
                    y_train[n:n+AUG_FACTOR] = np.ones((AUG_FACTOR,1))
                else:
                    y_train[n:n+AUG_FACTOR] = np.zeros((AUG_FACTOR,1))
                n += AUG_FACTOR

            # find all test data
            test_files = []
            for pres in pressures:
                folder = "data/"+user+"/"+frames_dirs[fd]+"/"+mats[m]+"/"+pres+"/"
                test_files.extend([folder+f for f in os.listdir(folder)])
            test_files.sort() # only keeps video frames in order if < 10 frames per video

            # load in test data
            X_test = np.ndarray(shape=(len(test_files), IMAGE_SIZE, IMAGE_SIZE), dtype=np.float32)
            y_test = np.ndarray(shape=(len(test_files), 1), dtype=np.float32)
            n2 = 0
            for f in test_files:
                x = cv2.resize(cv2.imread(f, cv2.IMREAD_GRAYSCALE), (IMAGE_SIZE, IMAGE_SIZE))
                X_test[n2] = x / 255.0
                y_test[n2] = float("/hard/" in f)
                n2 += 1

            
            
            ########################
            # SHALLOW SINGLE FRAME #
            ########################
            
            model = models.Sequential()
            model.add(layers.Conv2D(32, (5,5), activation='relu', padding='same', input_shape=(IMAGE_SIZE,IMAGE_SIZE,1)))
            model.add(layers.MaxPooling2D((2,2)))
            model.add(layers.Dropout(0.25))
            model.add(layers.Conv2D(32, (5,5), activation='relu', padding='same'))
            #model.add(layers.MaxPooling2D((2,2)))
            #model.add(layers.Dropout(0.25))
            #model.add(layers.Conv2D(32, (5,5), activation='relu', padding='same'))
            model.add(layers.MaxPooling2D((2,2)))
            model.add(layers.Flatten())
            model.add(layers.Dense(128, activation='relu'))
            model.add(layers.Dropout(0.5))
            model.add(layers.Dense(1, activation='sigmoid'))
            
            model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

            X_train = np.reshape(X_train, (X_train.shape[0],IMAGE_SIZE,IMAGE_SIZE,1))
            history = model.fit(X_train, y_train,
                      batch_size=100,
                      epochs=EPOCHS,
                      verbose=0,
                      )
            X_test = np.reshape(X_test, (X_test.shape[0],IMAGE_SIZE,IMAGE_SIZE,1))
            test_loss, test_acc = model.evaluate(X_test, y_test)
            print 'Shallow Single Frame Accuracy: [', mats[m], frames_dirs[fd], "]", test_acc
            cnn_results[fd, m, 0] = test_acc
            
            
            
            #######################
            # SHALLOW MULTI-FRAME #
            #######################
            # create feature arrays from earlier per-frame predictions
            y_train_frame_pred = model.predict(X_train)
            X_train_svm = np.reshape(np.rollaxis(np.reshape(
                y_train_frame_pred, (len(files)//FRAMES_PER_VID,FRAMES_PER_VID,AUG_FACTOR)),1,3), (len(files),FRAMES_PER_VID))
            y_train_svm = y_train[::FRAMES_PER_VID]

            # same for test set
            y_test_frame_pred = model.predict(X_test)
            X_test_svm = np.reshape(y_test_frame_pred, (len(test_files)//FRAMES_PER_VID, FRAMES_PER_VID))
            y_test_svm = y_test[::FRAMES_PER_VID]
            
            # take average prediction, round at end
            simple_y_pred = np.round(np.sum(X_test_svm, axis=1)/FRAMES_PER_VID)
            avg_acc = metrics.accuracy_score(y_test_svm, simple_y_pred)

            print('Per-Video AVG Accuracy:', avg_acc)
            cnn_results[fd, m, 1] = avg_acc
            

In [None]:
cnn_results

## Single-Frame CNN pre-training on VGG

In [None]:
IMAGE_SIZE = 224

In [None]:
vgg_results = np.zeros((len(frames_dirs), len(mats), 2))
for user in ["alex"]:
    for fd in range(len(frames_dirs)):
        for m in range(len(mats)):
    
            # training data consists of swipes from all other users (for mats of interest)
            print "\tFinding training images..."
            files = []
            training_users = [u for u in users if u != user]
            for train_user in training_users:
                for pres in pressures:
                    folder = "data/"+train_user+"/"+frames_dirs[fd]+"/"+mats[m]+"/"+pres+"/"
                    files.extend([folder+f for f in os.listdir(folder)])
            files.sort() # only keeps video frames in order if < 10 frames per video
            print "\tTraining images found:", len(files)

            # create single arrays to store data (use 224x224 for VGGNet)
            X_train = np.ndarray(shape=(len(files)*AUG_FACTOR, IMAGE_SIZE, IMAGE_SIZE), dtype=np.float32)
            y_train = np.ndarray(shape=(len(files)*AUG_FACTOR, 1), dtype=np.float32)

            # load in training data from images into normalized array
            print "\tLoading and augmenting training images..."
            n = 0
            for f in files:
                # read in, normalize, resize for VGGNet
                x = cv2.resize(cv2.imread(f, cv2.IMREAD_GRAYSCALE), (IMAGE_SIZE, IMAGE_SIZE))
                X_train[n] = x / 255.0

                # perform data augmentation
                X_train[n+1] = np.fliplr(X_train[n])
                X_train[n+2] = np.rot90(X_train[n])
                X_train[n+3] = np.fliplr(X_train[n+2])
                X_train[n+4] = np.rot90(X_train[n+2])
                X_train[n+5] = np.fliplr(X_train[n+4])
                X_train[n+6] = np.rot90(X_train[n+4])
                X_train[n+7] = np.fliplr(X_train[n+6])

                # assign labels
                if "/hard/" in f:
                    y_train[n:n+AUG_FACTOR] = np.ones((AUG_FACTOR,1))
                else:
                    y_train[n:n+AUG_FACTOR] = np.zeros((AUG_FACTOR,1))
                n += AUG_FACTOR
            print "\tAll", n, "training images and labels stored successfully!"

            # find all test data
            print "\tFinding test images..."
            test_files = []
            for pres in pressures:
                folder = "data/"+user+"/"+frames_dirs[fd]+"/"+mats[m]+"/"+pres+"/"
                test_files.extend([folder+f for f in os.listdir(folder)])
            test_files.sort() # only keeps video frames in order if < 10 frames per video
            print "\tTest images found:", len(test_files)

            # load in test data
            X_test = np.ndarray(shape=(len(test_files), IMAGE_SIZE, IMAGE_SIZE), dtype=np.float32)
            y_test = np.ndarray(shape=(len(test_files), 1), dtype=np.float32)
            print "\tLoading test images..."
            n2 = 0
            for f in test_files:
                x = cv2.resize(cv2.imread(f, cv2.IMREAD_GRAYSCALE), (IMAGE_SIZE, IMAGE_SIZE))
                X_test[n2] = x / 255.0
                y_test[n2] = float("/hard/" in f)
                n2 += 1
            print "\tAll", n2, "test images and labels stored successfully!"

            
            
            ######################
            # VGGNET PRETRAINING #
            ######################
            
            vgg = VGG16(weights='imagenet',
                       include_top=False,
                       input_shape=(224,224,3))

            # feed each image through VGG and obtain 7*7*512 features
            X_train_feat = np.zeros(shape=(X_train.shape[0],7,7,512))
            print 'Total training images:', X_train.shape[0]
            for i in range(X_train.shape[0]):
                # subtract ImageNet means before passing to ImageNet classifier
                X_trainR = X_train[i]*255.0-123.68
                X_trainG = X_train[i]*255.0-116.78
                X_trainB = X_train[i]*255.0-103.94
                X_train_feat[i] = vgg.predict(np.stack((X_trainR,X_trainG,X_trainB), axis=-1).reshape((1,224,224,3)))
            X_train_feat = np.reshape(X_train_feat, (X_train.shape[0], 7*7*512))
            
            # fully connected
            model = models.Sequential()
            model.add(layers.Dense(256, activation='relu', input_dim=7*7*512))
            model.add(layers.Dropout(0.9))
            model.add(layers.Dense(1, activation='sigmoid'))

            sgd = optimizers.SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True)
            model.compile(loss='binary_crossentropy',
                          optimizer=sgd,
                          metrics=['accuracy'])

            history = model.fit(X_train_feat, y_train,
                      batch_size=100,
                      epochs=EPOCHS,
                      shuffle=True
                      )
            
            # calculate features for test data
            X_test_feat = np.zeros(shape=(X_test.shape[0],7,7,512))
            for i in range(X_test.shape[0]):
                X_testR = X_test[i]*255.0-123.68
                X_testG = X_test[i]*255.0-116.78
                X_testB = X_test[i]*255.0-103.94
                X_test_feat[i] = vgg.predict(np.stack((X_testR,X_testG,X_testB), axis=-1).reshape(1,224,224,3))
            X_test_feat = np.reshape(X_test_feat, (X_test.shape[0], 7*7*512))

            test_loss, test_acc = model.evaluate(X_test_feat, y_test)
            print 'VGGnet Single Frame Accuracy: [', mats[m], frames_dirs[fd], "]", test_acc
            vgg_results[fd, m, 0] = test_acc
            
            
            
            ################
            # VGGnet VIDEO #
            ################
            
            # take average prediction, round at end
            frame_pred = np.reshape(model.predict(X_test_feat), (len(test_files)//FRAMES_PER_VID, FRAMES_PER_VID))
            simple_y_pred = np.round(np.sum(frame_pred, axis=1)/FRAMES_PER_VID)
            avg_acc = metrics.accuracy_score(y_test[::FRAMES_PER_VID], simple_y_pred)
            print('Per-Video AVG Accuracy:', avg_acc)
            vgg_results[fd,m,1] = avg_acc

In [None]:
vgg_results

## Single Frame Classification: All Materials

In [3]:
files = []
user = "alex"
for fd in range(len(frames_dirs)):
    for m in range(len(mats)):
        training_users = [u for u in users if u != user]
        for train_user in training_users:
            for pres in pressures:
                folder = "data/"+train_user+"/"+frames_dirs[fd]+"/"+mats[m]+"/"+pres+"/"
                files.extend([folder+f for f in os.listdir(folder)])
        files.sort() # only keeps video frames in order if < 10 frames per video

    # create single arrays to store data (use 224x224 for VGGNet)
    X_train = np.ndarray(shape=(len(files)*AUG_FACTOR, IMAGE_SIZE, IMAGE_SIZE), dtype=np.float32)
    y_train = np.ndarray(shape=(len(files)*AUG_FACTOR, 1), dtype=np.float32)

    # load in training data from images into normalized array
    n = 0
    for f in files:
        # read in, normalize, resize for VGGNet
        x = cv2.resize(cv2.imread(f, cv2.IMREAD_GRAYSCALE), (IMAGE_SIZE, IMAGE_SIZE))
        X_train[n] = x / 255.0

        # perform data augmentation
        X_train[n+1] = np.fliplr(X_train[n])
        X_train[n+2] = np.rot90(X_train[n])
        X_train[n+3] = np.fliplr(X_train[n+2])
        X_train[n+4] = np.rot90(X_train[n+2])
        X_train[n+5] = np.fliplr(X_train[n+4])
        X_train[n+6] = np.rot90(X_train[n+4])
        X_train[n+7] = np.fliplr(X_train[n+6])

        # assign labels
        if "/hard/" in f:
            y_train[n:n+AUG_FACTOR] = np.ones((AUG_FACTOR,1))
        else:
            y_train[n:n+AUG_FACTOR] = np.zeros((AUG_FACTOR,1))
        n += AUG_FACTOR

    # find all test data
    test_files = []
    for pres in pressures:
        folder = "data/"+user+"/"+frames_dirs[fd]+"/"+mats[m]+"/"+pres+"/"
        test_files.extend([folder+f for f in os.listdir(folder)])
    test_files.sort() # only keeps video frames in order if < 10 frames per video

    # load in test data
    X_test = np.ndarray(shape=(len(test_files), IMAGE_SIZE, IMAGE_SIZE), dtype=np.float32)
    y_test = np.ndarray(shape=(len(test_files), 1), dtype=np.float32)
    n2 = 0
    for f in test_files:
        x = cv2.resize(cv2.imread(f, cv2.IMREAD_GRAYSCALE), (IMAGE_SIZE, IMAGE_SIZE))
        X_test[n2] = x / 255.0
        y_test[n2] = float("/hard/" in f)
        n2 += 1



    ########################
    # SHALLOW SINGLE FRAME #
    ########################

    model = models.Sequential()
    model.add(layers.Conv2D(32, (5,5), activation='relu', padding='same', input_shape=(IMAGE_SIZE,IMAGE_SIZE,1)))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Dropout(0.25))
    model.add(layers.Conv2D(32, (5,5), activation='relu', padding='same'))
    #model.add(layers.MaxPooling2D((2,2)))
    #model.add(layers.Dropout(0.25))
    #model.add(layers.Conv2D(32, (5,5), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D((2,2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
      optimizer='sgd',
      metrics=['accuracy'])

    X_train = np.reshape(X_train, (X_train.shape[0],IMAGE_SIZE,IMAGE_SIZE,1))
    history = model.fit(X_train, y_train,
              batch_size=100,
              epochs=EPOCHS,
              verbose=0,
              )
    X_test = np.reshape(X_test, (X_test.shape[0],IMAGE_SIZE,IMAGE_SIZE,1))
    test_loss, test_acc = model.evaluate(X_test, y_test)
    print 'Shallow Single Frame Accuracy: [', frames_dirs[fd], "]", test_acc



    #######################
    # SHALLOW MULTI-FRAME #
    #######################
    # create feature arrays from earlier per-frame predictions
    y_train_frame_pred = model.predict(X_train)
    X_train_svm = np.reshape(np.rollaxis(np.reshape(
        y_train_frame_pred, (len(files)//FRAMES_PER_VID,FRAMES_PER_VID,AUG_FACTOR)),1,3), (len(files),FRAMES_PER_VID))
    y_train_svm = y_train[::FRAMES_PER_VID]

    # same for test set
    y_test_frame_pred = model.predict(X_test)
    X_test_svm = np.reshape(y_test_frame_pred, (len(test_files)//FRAMES_PER_VID, FRAMES_PER_VID))
    y_test_svm = y_test[::FRAMES_PER_VID]

    # take average prediction, round at end
    simple_y_pred = np.round(np.sum(X_test_svm, axis=1)/FRAMES_PER_VID)
    avg_acc = metrics.accuracy_score(y_test_svm, simple_y_pred)

    print('Per-Video AVG Accuracy:', avg_acc)

Shallow Single Frame Accuracy: [ frames ] 0.6911764705882353
('Per-Video AVG Accuracy:', 0.6666666666666666)
Shallow Single Frame Accuracy: [ swipe_frames ] 0.6862745109726401
('Per-Video AVG Accuracy:', 0.7450980392156863)


## 3D Convolution

In [None]:
user = "alex"
    
# training data consists of swipes from all other users (for mats of interest)
print "\tFinding training videos..."
files = []
training_users = [u for u in users if u != user]
for train_user in training_users:
    for mat in mats:
        for pres in pressures:
            folder = "data/"+train_user+"/"+frames+"/"+mat+"/"+pres+"/"
            files.extend([folder+f for f in os.listdir(folder)])
files.sort() # only keeps video frames in order if < 10 frames per video
print "\tTraining videos found:", len(files)//FRAMES_PER_VID

# create single arrays to store data (use 224x224 for VGGNet)
X_train = np.ndarray(shape=(len(files)*AUG_FACTOR/FRAMES_PER_VID, FRAMES_PER_VID, IMAGE_SIZE, IMAGE_SIZE, 1), dtype=np.float32)
y_train = np.ndarray(shape=(len(files)*AUG_FACTOR/FRAMES_PER_VID, 1), dtype=np.float32)

# load in training data from images into normalized array
print "\tLoading and augmenting training videos..."
n = 0
for i, filename in enumerate(files):
    # only act once for each video (filenames were sorted earlier)
    if i % FRAMES_PER_VID != 0:
        continue

    # extract section of filename which determines swipe
    match = re.match("([a-z/_]+[0-9]+_)([0-9]+)(.jpg)", filename, re.I)
    if match:
        swipe = match.groups()[0]
        frame = int(match.groups()[1])
        ext = match.groups()[2]

        # read in all other video frames from that swipe
        for f in range(FRAMES_PER_VID):
            x = cv2.resize(cv2.imread(swipe+str(f+1)+ext, cv2.IMREAD_GRAYSCALE), (IMAGE_SIZE,IMAGE_SIZE))
            X_train[i//FRAMES_PER_VID,f,:,:,0] = x / 255.0

            # perform data augmentation
            X_train[n+1,f,:,:,0] = np.fliplr(X_train[n,f,:,:,0])
            X_train[n+2,f,:,:,0] = np.rot90(X_train[n,f,:,:,0])
            X_train[n+3,f,:,:,0] = np.fliplr(X_train[n+2,f,:,:,0])
            X_train[n+4,f,:,:,0] = np.rot90(X_train[n+2,f,:,:,0])
            X_train[n+5,f,:,:,0] = np.fliplr(X_train[n+4,f,:,:,0])
            X_train[n+6,f,:,:,0] = np.rot90(X_train[n+4,f,:,:,0])
            X_train[n+7,f,:,:,0] = np.fliplr(X_train[n+6,f,:,:,0])

        # assign labels
        if "/hard/" in swipe:
            y_train[n:n+AUG_FACTOR] = np.ones((AUG_FACTOR,1))
        else:
            y_train[n:n+AUG_FACTOR] = np.zeros((AUG_FACTOR,1))
        n += AUG_FACTOR
print "\tAll", n//FRAMES_PER_VID, "training videos and labels stored successfully!"

# find all test data
print "\tFinding test videos..."
test_files = []
for mat in mats:
    for pres in pressures:
        folder = "data/"+user+"/"+frames+"/"+mat+"/"+pres+"/"
        test_files.extend([folder+f for f in os.listdir(folder)])
test_files.sort() # only keeps video frames in order if < 10 frames per video
print "\tTest videos found:", len(test_files)//FRAMES_PER_VID

# create arrays to store test data
X_test = np.ndarray(shape=(len(test_files)*AUG_FACTOR/FRAMES_PER_VID, FRAMES_PER_VID, IMAGE_SIZE, IMAGE_SIZE, 1), dtype=np.float32)
y_test = np.ndarray(shape=(len(test_files)*AUG_FACTOR/FRAMES_PER_VID, 1), dtype=np.float32)

print "\tLoading test videos..."
for i, filename in enumerate(test_files):
    # only act once for each video (filenames were sorted earlier)
    if i % FRAMES_PER_VID != 0:
        continue

    # extract section of filename which determines swipe
    match = re.match("([a-z/_]+[0-9]+_)([0-9]+)(.jpg)", filename, re.I)
    if match:
        swipe = match.groups()[0]
        frame = int(match.groups()[1])
        ext = match.groups()[2]

        # read in all other video frames from that swipe
        for f in range(FRAMES_PER_VID):
            x = cv2.resize(cv2.imread(swipe+str(f+1)+ext, cv2.IMREAD_GRAYSCALE), (IMAGE_SIZE,IMAGE_SIZE))
            X_test[i//FRAMES_PER_VID,f,:,:,0] = x / 255.0
        y_test[i//FRAMES_PER_VID] = float("/hard/" in swipe)
print "\tAll", i//FRAMES_PER_VID, "test images and labels stored successfully!"

In [None]:
model = models.Sequential()
# 0th layer group
model.add(layers.Conv3D(32, (3,3,3), activation='relu', padding='same', 
                        input_shape=(8,IMAGE_SIZE,IMAGE_SIZE,1)))
model.add(layers.MaxPooling3D(pool_size=(2,2,2), strides=(1,2,2), ))
# 1st layer group
model.add(layers.Conv3D(32, (3,3,3), activation='relu', padding='same'))
model.add(layers.MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2)))
# 2nd layer group
model.add(layers.Conv3D(32, (3,3,3), activation='relu', padding='same'))
model.add(layers.MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2)))
# 3rd layer group
#model.add(layers.Conv3D(32, (3,3,3), activation='relu', padding='same'))
#model.add(layers.Conv3D(32, (3,3,3), activation='relu', padding='same'))
#model.add(layers.MaxPooling3D(pool_size=(1,2,2), strides=(1,2,2)))
# 4th layer group
#model.add(layers.Conv3D(64, (3,3,3), activation='relu', padding='same'))
#model.add(layers.Conv3D(64, (3,3,3), activation='relu', padding='same'))
#model.add(layers.MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2)))
# 5th layer group
#model.add(layers.Conv3D(64, (3,3,3), activation='relu', padding='same'))
#model.add(layers.Conv3D(64, (3,3,3), activation='relu', padding='same'))
#model.add(layers.MaxPooling3D(pool_size=(1,2,2), strides=(1,2,2)))

# fully connected
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy'])

history = model.fit(X_train, y_train,
          validation_split=0.2,
          batch_size=100,
          epochs=EPOCHS,
          verbose=1
          )
test_loss, test_acc = model.evaluate(X_test, y_test)
print('Test accuracy:', test_acc)

In [None]:
# create feature arrays from earlier per-frame predictions
y_train_frame_pred = model.predict(X_train)
X_train_svm = np.reshape(np.rollaxis(np.reshape(
    y_train_frame_pred, (len(files)//FRAMES_PER_VID,FRAMES_PER_VID,AUG_FACTOR)),1,3), (len(files),FRAMES_PER_VID))
y_train_svm = y_train[::FRAMES_PER_VID]

# same for test set
y_test_frame_pred = model.predict(X_test)
X_test_svm = np.reshape(y_test_frame_pred, (len(test_files)//FRAMES_PER_VID, FRAMES_PER_VID))
y_test_svm = y_test[::FRAMES_PER_VID]

# take average prediction, round at end
simple_y_pred = np.round(np.sum(X_test_svm, axis=1)/FRAMES_PER_VID)
avg_acc = metrics.accuracy_score(y_test_svm, simple_y_pred)
print('Per-Video AVG Accuracy:', avg_acc)

## Summary Image Classification

In [27]:
import glob
user = "alex"
training_users = [u for u in users if u != user]
files = []
for train_user in training_users:
    fldr = "data/"+train_user+"/segments/"
    files.extend([fldr+f for f in os.listdir(fldr)])
files.sort() # only keeps video frames in order if < 10 frames per video

ntrain = len(files)
ntest = glob.glob("data/alex/segments/*")#len(os.listdir("data/"+user+"/segments/"))

In [28]:
ntest

[]

In [5]:
import os                                                                       
import imageio                                                                  
import scipy.misc                                                               
import shutil                                                                   
import cv2                                                                      
import re                                                                       
import numpy as np                                                              
                                                                                
 
                                                                                                                           
SWIPE_THRESHOLD = 20                                                            
BLACK = 0                                                                       
WHITE = 255                                                                     
                                                                                
def make_re(pressures, mats):                                                   
    """ Generate a regular expression for swipe file names,                     
    given list of pressures and materials being considered. """                 
                                                                                
    # start pressure group                                                      
    re_string = r"("                                                            
    for p in pressures:                                                         
        re_string += p + "|"                                                    
                                                                                
    # remove last |, finish group, expect _, start mat group                    
    re_string = re_string[:-1] + ")_("                                          
    for m in mats:                                                              
        re_string += m+"|"                                                      
                                                                                
    # remove last |, finish group, num group, file ext                          
    re_string = re_string[:-1]+")([0-9]+).mov"                                  
    return re_string                                                            
                                                                                                                                                                     
                                                                                
# determine which frames to use for further analysis                            
print('Calculating summary image for each swipe video...')                      

X_train = np.zeros((ntrain,IMAGE_SIZE,IMAGE_SIZE,1))
y_train = np.zeros((ntrain,1))
X_test = np.zeros((ntest,IMAGE_SIZE,IMAGE_SIZE,1))
y_test = np.zeros((ntest,1))
train_idx = 0
test_idx = 0
for u in users:                                                                 
    print('\tCalculating summary for user', u)                                  
    for f in os.listdir(folder+"/"+u+"/segments/"):                             
                                                                                
        # extract mat, pressure, and id from filename                           
        match = re.match(make_re(pressures, mats), f, re.I)                     
        if match:                                                               
            p = match.groups()[0]                                               
            m = match.groups()[1]                                               
            n = match.groups()[2]                           
            
            # set up image reader                                               
            filename = folder + "/" + u + "/segments/" + f                      
            reader = imageio.get_reader(filename, 'ffmpeg')                     
                                                                                
            # calculate total frames and average frame intensity                
            summary = np.zeros((256,256))                                       
            for i, image in enumerate(reader):                                  
                                                                                
                orig_image = np.array(image).astype(np.uint8)[:,:,0]            
                                                                                
                # define kernel sizes                                           
                kernel1 = np.ones((3,3), np.uint8)                              
                kernel2 = np.ones((25,1), np.uint8)                             
                kernel3 = np.ones((11,11), np.uint8)                            
                kernel4 = np.ones((1,25), np.uint8)                             
                kernel5 = np.ones((7,7), np.uint8)                              
                                                                                
                # threshold image                                               
                image = orig_image.copy()                                       
                swipe = image[:,:] > SWIPE_THRESHOLD                            
                back = image[:,:] <= SWIPE_THRESHOLD                            
                image[swipe] = WHITE                                            
                image[back] = BLACK                                             
                                                                                
                # remove salt-and-pepper noise                                  
                image = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel1)        
                image = cv2.morphologyEx(image, cv2.MORPH_CLOSE, kernel1)       
                                                                                
                # dilate image so that when swipe is removed, we err on the side        
                # of removing section of table (therefore considered part of swipe)        
                # better to accidentally include near-zero region than part of hand        
                image = cv2.dilate(image, kernel3, iterations=1)                
                                                                                
                # remove thin horizontal or vertical lines                              
                no_swipe = cv2.morphologyEx(image, cv2.MORPH_OPEN, kernel2)     
                no_swipe = cv2.morphologyEx(no_swipe, cv2.MORPH_OPEN, kernel4)  
                                                                                
                # subtract frame w/o swipe from original frame to get swipe             
                swipe = image - no_swipe                                        
                                                                                
                # remove gradient around hand                                           
                swipe = cv2.morphologyEx(swipe, cv2.MORPH_OPEN, kernel5)        
                                                                                
                # use swipe region as mask over original video                          
                inv_mask = swipe[:,:] == BLACK                                  
                image = orig_image.copy()                                       
                image[inv_mask] = BLACK                                         
                                                                                
                # update summary                                                
                summary += image
                
        if u == user:
            X_test[test_idx,:,:,0] = summary
            y_test[test_idx,0] = (p == "hard")
            test_idx += 1 
        else:
            X_train[train_idx,:,:,0] = summary
            y_train[train_idx,0] = (p == "hard")
            train_idx += 1      

ImportError: No module named imageio

In [None]:
########################
# SHALLOW SINGLE FRAME #
########################

model = models.Sequential()
model.add(layers.Conv2D(32, (5,5), activation='relu', padding='same', input_shape=(IMAGE_SIZE,IMAGE_SIZE,1)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Dropout(0.25))
model.add(layers.Conv2D(32, (5,5), activation='relu', padding='same'))
#model.add(layers.MaxPooling2D((2,2)))
#model.add(layers.Dropout(0.25))
#model.add(layers.Conv2D(32, (5,5), activation='relu', padding='same'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy',
  optimizer='sgd',
  metrics=['accuracy'])

X_train = np.reshape(X_train, (X_train.shape[0],IMAGE_SIZE,IMAGE_SIZE,1))
history = model.fit(X_train, y_train,
          batch_size=100,
          epochs=EPOCHS,
          verbose=0,
          )
X_test = np.reshape(X_test, (X_test.shape[0],IMAGE_SIZE,IMAGE_SIZE,1))
test_loss, test_acc = model.evaluate(X_test, y_test)
print 'Shallow Single Frame Accuracy: [', frames_dirs[fd], "]", test_acc



#######################
# SHALLOW MULTI-FRAME #
#######################
# create feature arrays from earlier per-frame predictions
y_train_frame_pred = model.predict(X_train)
X_train_svm = np.reshape(np.rollaxis(np.reshape(
    y_train_frame_pred, (len(files)//FRAMES_PER_VID,FRAMES_PER_VID,AUG_FACTOR)),1,3), (len(files),FRAMES_PER_VID))
y_train_svm = y_train[::FRAMES_PER_VID]

# same for test set
y_test_frame_pred = model.predict(X_test)
X_test_svm = np.reshape(y_test_frame_pred, (len(test_files)//FRAMES_PER_VID, FRAMES_PER_VID))
y_test_svm = y_test[::FRAMES_PER_VID]

# take average prediction, round at end
simple_y_pred = np.round(np.sum(X_test_svm, axis=1)/FRAMES_PER_VID)
avg_acc = metrics.accuracy_score(y_test_svm, simple_y_pred)

print('Per-Video AVG Accuracy:', avg_acc)