In [8]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import numpy as np
import cv2
import pandas as pd

import os

from tqdm.notebook import tqdm, tnrange
import matplotlib.pyplot as plt

print(tf.__version__)

1.14.0


# Test de la decision forte
##### Ou comment prendre une decision quand ça ne va pas tout droit

Après avoir entrainé le modèle, nous testons dans ce notebook ses prédictions avec une vidéo.

Cf partie 6 section 4 du rapport

### Segmentation du code

 1. Définition des constantes
 2. Création de la chaine de traitement
 4. Création du pipeline passant de la frame à la prédiction 
 8. Chargement du modèle
 9. Application à une vidéo

# Définition des constantes

In [2]:
FOLDER_PATH = "../data/videos/"
OUTPUT_PATH = "../data/outputs/"
WEIGHTS_PATH = "../data/weights/weights_tf_last.h5"

WIDTH, HEIGHT = 456, 228

SESSION = tf.Session()
keras.backend.set_session(SESSION)

# Création de la chaine de pré-traitement

In [3]:
class Resize():
    """
    Divide by 2 the image dimensions
    """
    def __call__(self, sample):
        """
        Apply the transformation

        @param image: a OpenCV image
        @return: the resized image
        """
        image = cv2.resize(sample, (0,0), fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
        return image
    
    
class Crop:
    """
    Remove pixels on the top and the right
    """
    def __call__(self, sample):
        """
        Apply the transformation

        @param image: a OpenCV image
        @return: the cropped image
        """
        width = sample.shape[1]
        return sample[45:, :width-5]

    
class Normalize():
    """
    Normalized the image with mean of 0 and std of 1
    """
    def __call__(self, sample):
        """
        Apply the transformation
        
        @param image: a grayscale OpenCV image
        @return: a Numpy array normalized image with dimension of (69, 223, 1)
        """
        image = sample
        norm_img = np.zeros(image.shape)
        norm_img = cv2.normalize(image, norm_img, 0, 1, cv2.NORM_MINMAX)
        norm_img = norm_img.reshape(69, 223, -1)
        
        return norm_img
    

class DataFormatting:
    """
    Chnage data type of a Numpy array
    """
    def __call__(self, sample):
        """
        Apply the transformation

        @param sample: a Numpy image
        @return: the Numpy float32 image
        """
        image = sample.astype(np.float32)
        return image
    
    
class ToTensor:
    """
    Add the batch dimension in axis 0
    """
    def __call__(self, sample):
        """
        Apply the transformation
        
        @param image: a Numpy array of dimension (69, 223, 1)
        @return: a Numpy array of dimension (1, 69, 223, 1)
        """
        image = sample.reshape(-1, 69, 223, 1)
        image = image.astype(np.float32)
        return image
    
    
class ProcessChain:
    """
    Create the preprocess pipeline before going in the CNN.
    Each element must be callable. 
    Take care about the dimension between the return and the argument for the next class.
    """
    def __init__(self):
        """
        Initialization of the preprocess pipeline, "line"
        """
        self.line = [
            Resize(),
            Crop(),
            Normalize(),
            ToTensor()
        ]

    def transform(self, image):
        """
        Iterate through "line" and return the last item
        
        @param image: a OpenCV image of dimension (456, 228, 3)
        @return: a Numpy array of dimension (1, 69, 223, 1)
        """
        item = image
        for process in self.line:
            item = process(item)
        
        return item

In [4]:
transformations = [
    Resize(),
    Crop(),
    Normalize(),
    DataFormatting(),
]

# Création du pipeline passant de la frame à la prédiction 

In [5]:
class Image2Prediction:
    """
    The pipeline predicting speed and direction
    from a frame of a dataset video 
    """
    def __init__(self, model):
        """
        Initialization of the pipeline
        
        @param model: the pretrained keras model
        """
        self.process = ProcessChain()
        self.model = model
        
    def analyze(self, frame):
        """
        Predict the speed and the direction
        
        @param frame: a Numpy image
        @return: the raw direction and speed prediction
        """
        with SESSION.as_default():
            image = self.process.transform(frame)
            p_dir, p_speed = self.model.predict(image)[0]
            
            return p_dir, p_speed

# Chargement du modèle

In [6]:
def build_model():
    model = keras.Sequential([
        layers.Conv2D(3, (3, 3), padding="valid", activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        
        layers.Conv2D(3, (3, 3), padding="valid", activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        
        layers.Conv2D(3, (3, 3), padding="valid", activation="relu"),
        layers.MaxPooling2D(pool_size=(2, 2)),
        
        layers.Flatten(),
        
        layers.Dense(50, activation="relu"),
        layers.Dense(8, activation="relu"),
        layers.Dense(2, activation=None),
    ])

    model.build((1, 69, 223, 1))
    model.load_weights(WEIGHTS_PATH)
    
    return model

# Application à une vidéo

In [7]:
cap = cv2.VideoCapture(FOLDER_PATH + 'move_by_hand1.mp4')
out = cv2.VideoWriter(
    OUTPUT_PATH + 'deep_prediciton.mp4',
    cv2.VideoWriter_fourcc('M','J','P','G'),
    20, (WIDTH, HEIGHT)
)

if (cap.isOpened()== False): 
    print("Error opening video stream or file")
    
# Progress bar
length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
pbar = tqdm(total=length, desc="Frames")

model = Image2Prediction(build_model())

current_dir = 0
current_speed = 0

while(cap.isOpened()):
    # Capture frame-by-frame

    ret, frame = cap.read()
    if ret == True:
        dir_prediction, speed_prediction = model.analyze(frame)
        
        val_dir = dir_prediction*WIDTH/2
        val_dir += WIDTH/2
        val_dir = int(val_dir)
        val_speed = speed_prediction*HEIGHT/2
        val_speed += HEIGHT/2
        val_speed = int(val_speed)

        # show prediction in the frame
        cv2.circle(frame, (val_dir, val_speed), 10, (255, 0, 0), -1)
        out.write(frame)
        pbar.update()
    
    # break at the end of the process which is predicting the speed and the direction from a dataset video frame
    else:
        break

pbar.close()
out.release()
cap.release()

Frames:   0%|          | 0/3603 [00:00<?, ?it/s]

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
