In [41]:
%matplotlib inline
import matplotlib.pyplot as plt
import PIL
import tensorflow as tf
import numpy as np
import os
import time
from keras.utils.vis_utils import plot_model
from sklearn.utils.class_weight import compute_class_weight

from tensorflow.python.keras.models import Model, Sequential
from tensorflow.python.keras.layers import Dense, Flatten, Dropout
from tensorflow.python.keras.applications import VGG16
from tensorflow.python.keras.applications.vgg16 import preprocess_input, decode_predictions
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.optimizers import Adam, RMSprop
from tensorflow.python.keras.models import model_from_yaml
# from keras import backend as K

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [42]:

### Helper-function for joining a directory and list of filenames.
def path_join(dirname, filenames):
    return [os.path.join(dirname, filename) for filename in filenames]

### Helper-function for plotting images
def plot_images(images, cls_true, cls_pred=None, smooth=True):

    assert len(images) == len(cls_true)

    # Create figure with sub-plots.
    fig, axes = plt.subplots(3, 1)

    # Adjust vertical spacing.
    if cls_pred is None:
        hspace = 0.3
    else:
        hspace = 0.6
    fig.subplots_adjust(hspace=hspace, wspace=0.3)

    # Interpolation type.
    if smooth:
        interpolation = 'spline16'
    else:
        interpolation = 'nearest'

    for i, ax in enumerate(axes.flat):
        # There may be less than 9 images, ensure it doesn't crash.
        if i < len(images):
            # Plot image.
            ax.imshow(images[i],
                      interpolation=interpolation)

            # Name of the true class.
            cls_true_name = class_names[cls_true[i]]

            # Show true and predicted classes.
            if cls_pred is None:
                xlabel = "True: {0}".format(cls_true_name)
            else:
                # Name of the predicted class.
                cls_pred_name = class_names[cls_pred[i]]

                xlabel = "True: {0}\nPred: {1}".format(cls_true_name, cls_pred_name)

            # Show the classes as the label on the x-axis.
            ax.set_xlabel(xlabel)
        
        # Remove ticks from the plot.
        ax.set_xticks([])
        ax.set_yticks([])
    
    # Ensure the plot is shown correctly with multiple plots
    # in a single Notebook cell.
    plt.show()
    

### Helper-function for printing confusion matrix
# Import a function from sklearn to calculate the confusion-matrix.
from sklearn.metrics import confusion_matrix

def print_confusion_matrix(cls_pred):
    # cls_pred is an array of the predicted class-number for
    # all images in the test-set.

    # Get the confusion matrix using sklearn.
    cm = confusion_matrix(y_true=cls_test,  # True class for test-set.
                          y_pred=cls_pred)  # Predicted class.

    print("Confusion matrix:")
    
    # Print the confusion matrix as text.
    print(cm)
    np.savetxt('Confusion_matrix_20cls.txt', cm)
    # Print the class-names for easy reference.
    for i, class_name in enumerate(class_names):
        print("({0}) {1}".format(i, class_name))

### Helper-function for plotting example errors
def plot_example_errors(cls_pred):
    # cls_pred is an array of the predicted class-number for
    # all images in the test-set.

    # Boolean array whether the predicted class is incorrect.
    incorrect = (cls_pred != cls_test)

    # Get the file-paths for images that were incorrectly classified.
    image_paths = np.array(image_paths_test)[incorrect]

    # Load the first 9 images.
    images = load_images(image_paths=image_paths[0:3])
    
    # Get the predicted classes for those images.
    cls_pred = cls_pred[incorrect]

    # Get the true classes for those images.
    cls_true = cls_test[incorrect]
    
    # Plot the 9 images we have loaded and their corresponding classes.
    # We have only loaded 9 images so there is no need to slice those again.
    plot_images(images=images,
                cls_true=cls_true[0:3],
                cls_pred=cls_pred[0:3])

def example_errors():
    # The Keras data-generator for the test-set must be reset
    # before processing. This is because the generator will loop
    # infinitely and keep an internal index into the dataset.
    # So it might start in the middle of the test-set if we do
    # not reset it first. This makes it impossible to match the
    # predicted classes with the input images.
    # If we reset the generator, then it always starts at the
    # beginning so we know exactly which input-images were used.
    generator_test.reset()
    
    # Predict the classes for all images in the test-set.
    y_pred = new_model.predict_generator(generator_test,
                                         steps=steps_test)

    # Convert the predicted classes from arrays to integers.
    cls_pred = np.argmax(y_pred,axis=1)

    # Plot examples of mis-classified images.
    plot_example_errors(cls_pred)
    
    # Print the confusion matrix.
    print_confusion_matrix(cls_pred)

### Helper-function for loading images
def load_images(image_paths):
    # Load the images from disk.
    images = [plt.imread(path) for path in image_paths]

    # Convert to a numpy array and return it.
    return np.asarray(images)

### Helper-function for plotting training history
def plot_training_history(history):
    # Get the classification accuracy and loss-value
    # for the training-set.
    acc = history.history['categorical_accuracy']
    loss = history.history['loss']

    # Get it for the validation-set (we only use the test-set).
    val_acc = history.history['val_categorical_accuracy']
    val_loss = history.history['val_loss']
    
    # Plot the accuracy and loss-values for the training-set.
    fig=plt.figure()
    plt.plot(acc, linestyle='-', color='b', label='Training Acc.')
    plt.plot(loss, 'o', color='b', label='Training Loss')
    
    # Plot it for the test-set.
    plt.plot(val_acc, linestyle='--', color='r', label='Test Acc.')
    plt.plot(val_loss, 'o', color='r', label='Test Loss')

    # Plot title and legend.
    plt.title('Training and Test Accuracy')
    plt.legend()

    # Ensure the plot shows correctly.
    plt.show()
    fig.savefig('errors_20cls.png')
    fig.savefig('errors_20cls.eps')

## Example Predictions: 
# We need a helper-function for loading and resizing an image 
# so it can be input to the VGG16 model,
# as well as doing the actual prediction and showing the result.
def predict(image_path):
    # Load and resize the image using PIL.
    img = PIL.Image.open(image_path)
    img_resized = img.resize(input_shape, PIL.Image.LANCZOS)

    # Plot the image.
    plt.imshow(img_resized)
    plt.show()

    # Convert the PIL image to a numpy-array with the proper shape.
    img_array = np.expand_dims(np.array(img_resized), axis=0)

    # Use the VGG16 model to make a prediction.
    # This outputs an array with 1000 numbers corresponding to
    # the classes of the ImageNet-dataset.
    pred = model.predict(img_array)
    
    # Decode the output of the VGG16 model.
    pred_decoded = decode_predictions(pred)[0]

    # Print the predictions.
    for code, name, score in pred_decoded:
        print("{0:>6.2%} : {1}".format(score, name))

## Helper-function for printing whether a layer in the VGG16 model should be trained.
def print_layer_trainable():
    for layer in conv_model.layers:
        print("{0}:\t{1}".format(layer.trainable, layer.name))

In [None]:
train_dir = './Train_Valid/train/'
test_dir = './Train_Valid/valid/'
print('train_dir:',train_dir)

In [None]:
## Pre-Trained Model: VGG16
model = VGG16(include_top=True, weights='imagenet')
input_shape = model.layers[0].output_shape[1:3]
print('input_shape:',input_shape)
# rotation_range=180,zoom_range=[0.9, 1.5],
datagen_train = ImageDataGenerator(
      rescale=1./255,
      width_shift_range=0.1,
      height_shift_range=0.05,
      shear_range=0.1,
      horizontal_flip=False,
      vertical_flip=False,
      fill_mode='nearest')
datagen_test = ImageDataGenerator(rescale=1./255)

batch_size = 20
# We can save the randomly transformed images during training, 
# so as to inspect whether they have been overly distorted,
# so we have to adjust the parameters for the data-generator above.
if True:
    save_to_dir =None
else:
    save_to_dir='./augmented_images/'

print(save_to_dir)
print('training set:')
generator_train = datagen_train.flow_from_directory(directory=train_dir,
                                                    target_size=input_shape,
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    save_to_dir=save_to_dir)

print('test set:')
generator_test = datagen_test.flow_from_directory(directory=test_dir,
                                                  target_size=input_shape,
                                                  batch_size=batch_size,
                                                  shuffle=False)
steps_test = generator_test.n / batch_size
print('steps_test:', steps_test)

image_paths_train = path_join(train_dir, generator_train.filenames)
image_paths_test = path_join(test_dir, generator_test.filenames)

cls_train = generator_train.classes
cls_test = generator_test.classes

class_names = list(generator_train.class_indices.keys())

num_classes = generator_train.num_classes
print('num_classes:', num_classes)

# Compute class weights
class_weight = compute_class_weight(class_weight='balanced',
                                    classes=np.unique(cls_train),
                                    y=cls_train)

print('class_weight:', class_weight)
print('class_names:\n',class_names)

print('Model summary:')
print(model.summary())





## Building a new model with the pretrained VGG16 base

In [None]:
transfer_layer = model.get_layer('block5_pool')
print('Transfer_layer output:\n',transfer_layer.output)
dataset= ['UW_IOM'][0]
conv_model = Model(inputs=model.input,
                   outputs=transfer_layer.output)

# Define the trainable layers
conv_model.trainable = False
for layer in conv_model.layers:
    layer.trainable = False
print_layer_trainable()

new_model = Sequential()

# Add the convolutional part of the VGG16 model from above.
new_model.add(conv_model)

new_model.add(Flatten())

new_model.add(Dense(1024, activation='relu'))

new_model.add(Dropout(0.5))

# Add the final layer for the actual classification.
new_model.add(Dense(num_classes, activation='softmax'))

optimizer = Adam(lr=1e-7)
loss = 'categorical_crossentropy'
metrics = ['categorical_accuracy']


## we need to compile the model for the changes to take effect.
new_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

print('Model summary:')
print(new_model.summary())

plot_model(new_model, to_file='VGG_model.png', show_shapes=True)

In [None]:
epochs = 1
steps_per_epoch = 10

history = new_model.fit_generator(generator=generator_train,
                                  epochs=epochs,
                                  steps_per_epoch=steps_per_epoch,
                                  class_weight=class_weight,
                                  validation_data=generator_test,
                                  validation_steps=steps_test)

In [None]:
plot_training_history(history)

## Save the model

In [None]:
# serialize model to YAML
from os import path
if not path.exists('./Models'):
    try:  
        os.mkdir('./Models')
    except OSError:  
        print ("Creation of the directory %s failed" % './Models')
    else:  
        print ("Successfully created the directory %s " % './Models')
model_yaml = new_model.to_yaml()
with open("./Models/VGG16model.yaml", "w") as yaml_file:
    yaml_file.write(model_yaml)
# serialize weights to HDF5
new_model.save_weights("./Models/VGG16model.h5")
print("Saved model to disk")

## Evaluate the model on few examples and save the confussion matrix

In [None]:
result = new_model.evaluate_generator(generator_test, steps=steps_test)
print("Test-set classification accuracy: {0:.2%}".format(result[1]))
example_errors()

# Feature extraction

In [None]:
def ExtractFeatures(model_, dataset, featureType):
    saving_path = './Features/'+ str(dataset) + '/' + str(featureType)
    frames_folder_path='./Frames/'
#     feature_extractor = Model(inputs=model_.input, outputs=model_.get_layer('dense').output)
    feature_extractor = Model(inputs=model_.input, outputs=model_.layers[2].output) #first fc layer
    
    if not os.path.isdir(saving_path):
                os.makedirs(saving_path)
    input_shape = feature_extractor.input_shape[1:3] #[224,224]#np.array([224,224])
    print(frames_folder_path)
    print('input_shape:',input_shape)        
    for i, folder in enumerate(os.listdir(frames_folder_path)):
        print('video',folder)
        frame_path = os.path.join(frames_folder_path,folder) #this is the frame path
        feature_vector = np.zeros([1024,len(os.listdir(frame_path))])
#         print(np.shape(feature_vector))
        FFF=[];
        for j, frame_ in enumerate(sorted(os.listdir(frame_path))):
            FFF.append(frame_[:-4])
        FFF = sorted(FFF, key=int)
#         print(FFF)
        for j, frame_ in enumerate(FFF):
#             print('frame',frame_)
            image_path = os.path.join(frame_path,frame_+'.jpg')
            # Load and resize the image using PIL.
            img = PIL.Image.open(image_path)
            img_resized = img.resize(input_shape, PIL.Image.LANCZOS)
            # Convert the PIL image to a numpy-array with the proper shape.
            img_array = np.expand_dims(np.array(img_resized), axis=0)
            # generate feature [1024]
            features = feature_extractor.predict(img_array)
            features = np.squeeze(features)
### ********************* Edit here ********************** ###
            feature_vector[:,j] = features
        # original length features
        feature_vector = feature_vector.T
        # To triplicate the features by experiment
#         feature_vector = np.tile(feature_vector.T,(3,1))
        # To triplicate the features framewise
#         print(np.shape(feature_vector))
#         feature_vector = np.repeat(feature_vector,3,axis=1).T
        print(np.shape(feature_vector))
        np.save(saving_path+folder,feature_vector)
        # saves as .cvs
#         np.savetxt(saving_path+folder+'.csv', feature_vector, delimiter=",")

    return


### Load the model and make sure that you did it correctly

In [None]:
# load YAML and create model
model_name ='VGG16model'
featureType=['PreTrained','FineTuned45'][0]
# \Data\UW_dataset\Saved-models\Amazon_VGG16model_1024_UW_500_300
yaml_file = open('./Models/'+model_name+'.yaml', 'r')
loaded_model_yaml = yaml_file.read()
yaml_file.close()
loaded_model = model_from_yaml(loaded_model_yaml)
# load weights into new model
loaded_model.load_weights('./Models/'+model_name+'.h5')
print("Loaded model from disk")
 
# evaluate loaded model on test data
optimizer = Adam(lr=1e-5)
loss = 'categorical_crossentropy'
metrics = ['categorical_accuracy']


## we need to compile the model for the changes to take effect.
loaded_model.compile(loss=loss,optimizer=optimizer, metrics=metrics)

print('Model summary:')
print(loaded_model.summary())
# loaded_model.compile(loss='categorical_crossentropy', optimizer='ADAMS', metrics=['accuracy'])
## Define the test generator
train_dir = './Train_Valid/train/'
test_dir = './Train_Valid/valid/'

input_shape = loaded_model.input.shape[1:3]
print('input_shape:',input_shape)
# rotation_range=180,zoom_range=[0.9, 1.5],
datagen_train = ImageDataGenerator(
      rescale=1./255,
      width_shift_range=0.1,
      height_shift_range=0.1,
      shear_range=0.1,
      horizontal_flip=False,
      vertical_flip=False,
      fill_mode='nearest')
datagen_test = ImageDataGenerator(rescale=1./255)

batch_size = 20
# We can save the randomly transformed images during training, 
# so as to inspect whether they have been overly distorted,
# so we have to adjust the parameters for the data-generator above.
if True:
    save_to_dir =None
else:
    save_to_dir='.augmented_images/'

print(save_to_dir)
print('training set:')
generator_train = datagen_train.flow_from_directory(directory=train_dir,
                                                    target_size=input_shape,
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    save_to_dir=save_to_dir)

print('test set:')
generator_test = datagen_test.flow_from_directory(directory=test_dir,
                                                  target_size=input_shape,
                                                  batch_size=batch_size,
                                                  shuffle=False)
steps_test = generator_test.n / batch_size
print('steps_test:', steps_test)

result = loaded_model.evaluate_generator(generator_test, steps=steps_test)
print("Test-set classification accuracy: {0:.2%}".format(result[1]))
# score = loaded_model.evaluate(X, Y, verbose=0)
# print("%s: %.2f%%" % (loaded_model.metrics_names[1], score[1]*100))

## Extract features

In [None]:
t = time.time()
ExtractFeatures(loaded_model, dataset, featureType)
elapsed = time.time() - t
print('Time elapsed: ', elapsed)