In [None]:
import tensorflow as tf
import numpy      as np

import pathlib
import config
import data

from tensorflow.keras.applications.inception_v3 import preprocess_input
from tensorflow.keras.preprocessing             import image
from tensorflow.keras.models                    import load_model, Sequential
from tqdm                                       import tqdm

"""
Documentation:
- numpy
    1. expand_dims()
        - https://numpy.org/doc/stable/reference/generated/numpy.expand_dims.html?highlight=expand_dims#numpy.expand_dims
    2. save()
        - https://numpy.org/doc/stable/reference/generated/numpy.save.html?highlight=save#numpy.save
- pathlib
    1. Path(), /, glob(), .name, .stem
        - https://docs.python.org/3/library/pathlib.html
- tensorflow
    - data.Dataset
        1. predict()
            - https://www.tensorflow.org/versions/r2.1/api_docs/python/tf/data/Dataset
    - keras
        - applications.inception_v3
            1. preprocess_input()
                - https://www.tensorflow.org/versions/r2.1/api_docs/python/tf/keras/applications/inception_v3
        - models
            1. load_model()
                - https://www.tensorflow.org/versions/r2.1/api_docs/python/tf/keras/models/load_model
            2. Sequential()
                1. predict()
                    - https://www.tensorflow.org/versions/r2.1/api_docs/python/tf/keras/Sequential
        - preprocessing.image
            1. img_to_array(), load_img()
                - https://www.tensorflow.org/versions/r2.1/api_docs/python/tf/keras/preprocessing/image
- tqdm
    1. tqdm()
        - https://tqdm.github.io/
"""

In [None]:
"""
Function Name: getBestSavedModel
Number of parameters: 1
List of parameters:
    1. modelCheckpointPath | pathlib.Path | Path to the CNN model checkpoints.
Pre-condition:
    1. modelCheckpointPath exists.
Post-condition:
    1. Returns the path to the best model in the 'modelCheckpointPath' directory. The best model, is the one
       that has the smallest loss.
"""
def getBestSavedModel(modelCheckpointPath):
    dirPaths  = sorted(modelCheckpointPath.glob("*"))
    minLoss   = float('inf')
    bestModel = ""
    for dirPath in dirPaths:
        modelname = pathlib.Path(dirPath.name).stem
        loss = float(modelname.split("_")[-1])
        if loss <= minLoss:
            bestModel = dirPath
            minLoss   = loss
    return str(bestModel)

In [None]:
"""
Function Name: getPartialModel
Number of parameters: 1
List of parameters:
    1. savedModelPath | str | Path to the saved (CNN) model.
Pre-condition:
    1. savedModelPath exists and is a .h5 file.
Post-condition:
    1. Returns a model that contains some or all of the layers in the model saved in the
       savedModelPath file. If a GlobalAveragePooling2D layer exists, then the returned model will 
       consist of all layers before and including that layer.
"""
def getPartialModel(savedModelPath):
    loadedModel = load_model(savedModelPath)
    
    indexOfGAPLayer = -1
    # Get the index of the last GlobalAveragePooling2D layer starting from the back of the list of layers
    for layer in loadedModel.layers[::-1]:
        layer_type = str(type(layer))
        if layer_type != "<class 'tensorflow.python.keras.layers.pooling.GlobalAveragePooling2D'>":
            indexOfGAPLayer -= 1
        else:
            break

    if indexOfGAPLayer != -1:
        indexAfterGAPLayer = indexOfGAPLayer + 1
        partialModel = Sequential(loadedModel.layers[:indexAfterGAPLayer])
    else:
        # The GlobalAveragePooling2D Layer is the last layer. Thus, we get all the layers (no need to slice the list).
        partialModel = Sequential(loadedModel.layers[:])
    
    return partialModel

In [None]:
"""
Function Name: saveFeatureSequences
Number of parameters: 3
List of parameters:
    1. model         | tf.keras.models | Model that'll be used to make predctions.
    2. dataObj       | data.Data       | Used to iterate through the rows in data.csv.
    3. sequencesPath | pathlib.Path    | Path to the Sequences directory.
Pre-condition:
    1. 'sequencesPath' exists.
Post-condition:
    1. Numpy files each containing a list of arrays where each array
       contains features of a frame.
    2. Nothing is returned.
"""
def saveFeatureSequences(model, dataObj, sequencesPath):
    def extractFeatures(model, framePath):
        frame = image.load_img(framePath, 
                               target_size   = (299,299), 
                               interpolation = "lanczos") # shape: (299, 299, 3): # of dim: 3
        frame_arr = image.img_to_array(frame)             # pixel values in range [0,255]
        frame_arr = preprocess_input(frame_arr)           # pixel values in range [-1, 1]
        frame_arr = np.expand_dims(frame_arr, axis = 0)   # expands shape to: (1, 299, 299, 3): # of dim: 4
        features  = model.predict(frame_arr)              # returns numpy array of shape: (1, 2048): # of dim: 2
        features  = features[0]                           # shape: (2048, ): # of dim: 1
        return features
    
    for dataRow in tqdm(dataObj.data[:]):
        sequencePath = sequencesPath/dataRow[0]/dataRow[1]/(dataRow[2] + "_featureSequence")
        # get the list of paths of the frames of the video referenced in dataRow
        framePaths = dataObj.getFramesForVideo(dataRow)
        
        featureSequence = []
        for framePath in framePaths:
            features = extractFeatures(model, framePath)
            featureSequence.append(features)
        np.save(sequencePath, featureSequence)

In [None]:
"""
Function Name: main
Number of parameters: 0
List of parameters: n/a
Pre-condition: n/a
Post-condition:
    1. Extracts features from frames and saves them to their respective directory.
"""
def main():
    dataObj    = data.Data()
    numClasses = dataObj.numClasses
    cf         = config.Config()
    rootPath   = pathlib.Path(cf.rootPath)
    
    sequencesPath       = rootPath/'Sequences'
    modelCheckpointPath = rootPath/'Callbacks'/'CNN'/f'{numClasses}'/'ModelCheckpoint'
    
    savedModelPath = getBestSavedModel(modelCheckpointPath)
    partialModel   = getPartialModel(savedModelPath)
    saveFeatureSequences(partialModel, dataObj, sequencesPath)

In [None]:
main()