In [1]:
from math import log
import os

import cv2
import matplotlib.pyplot as plt
import numpy as np

import torch
from torchvision import transforms
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader

from torch.utils.data import random_split
from torch.optim import AdamW

from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

# Test de la decision forte - Pytorch Edition

Après avoir entrainer le modèle, nous testons dans ce notebook ses prédictions avec une vidéo. Il y a en plus la 1ère définition d'une fonction d'inertie pour limiter l'impact des valeurs abérantes.

Cf section XXX du rapport

### Segmentation du code

 1. Définition des constantes
 2. Création du pipeline de preprocessing
 4. Création du même réseau que lors de l'entrainement
 8. Création de la fonction d'inertie
 9. Chargment du modèle et application à une vidéo

# Définition des constantes

In [2]:
FILE_PATH = "../../data/prelabeling_videos/"
OUTPUT_PATH = "../../data/outputs/"

# Création du pipeline de preprocessing

In [3]:
class ToTensor:
    """Convert ndarrays in sample to Tensors."""

    def __call__(self, sample):
        # swap color axis because
        # numpy image: H x W x C
        # torch image: C X H X W
        image = sample.transpose((2, 0, 1))
        image = image[None, :, :, :]
        return torch.from_numpy(image)


class Normalize():
    
    def __call__(self, sample):
        image = sample
        norm_img = np.zeros(image.shape)
        norm_img = cv2.normalize(image, norm_img, 0, 255, cv2.NORM_MINMAX)
        norm_img = norm_img.reshape(69, 223, -1)
        
        return norm_img
    
class Crop:
    def __call__(self, sample):
        width = sample.shape[1]
        return sample[45:, :width-5]
    

class Resize():
    
    def __call__(self, sample):
        image = sample
        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
        image = cv2.resize(image, (0,0), fx=0.5, fy=0.5, interpolation=cv2.INTER_AREA)
        
        return image

# Création du même réseau que lors de l'entrainement

In [4]:
class Network(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.cnn_layers = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=3, kernel_size=3),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2,2)),
            nn.ReLU(),
            
            nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2,2)),
            nn.ReLU(),
            
            nn.Conv2d(in_channels=3, out_channels=3, kernel_size=3),
            nn.MaxPool2d(kernel_size=(2, 2), stride=(2,2)),
            nn.ReLU(),
        )
        
        self.flatten = nn.Flatten()
        
        self.linear_layers = nn.Sequential(
            nn.Linear(468, 50),
            nn.ReLU(),
            nn.Linear(50, 8),
            nn.ReLU(),
            nn.Linear(8, 2),
        )

    def forward(self, x):        
        x = self.cnn_layers(x)
        x = self.flatten(x)
        x = self.linear_layers(x)
        return x
        

# Création de la fonction d'inertie

In [5]:
def compute(target, current, inertia):
    offset = log(1.1+abs(target - current)) * (1 - inertia)
            
    if current > target:
        offset *= -1
        if current + offset < target:
            new = target
        else:
            new = current + offset
    elif current < target:
        if current + offset > target:
            new = target
        else:
            new = current + offset
    else:
        new = target
    return new

# Chargment du modèle et application à une vidéo

In [6]:
cap = cv2.VideoCapture(FILE_PATH + 'vid_lataaatqmqcrndd.avi')
out = cv2.VideoWriter(
    OUTPUT_PATH + 'deep_prediction_pytorch.mp4',
    cv2.VideoWriter_fourcc('M','J','P','G'), 20, (456,228)
)

if (cap.isOpened()== False): 
    print("Error opening video stream or file")

i = 0
with torch.no_grad():
    tsfrm = transforms.Compose([Resize(), Crop(), Normalize(), ToTensor()])
    
    nn_model = Network().cpu()
    nn_model.load_state_dict(torch.load("../../data/weights/weights_torch_v3_19.weights"))
    nn_model.eval()
    
    current_dir = 0
    current_speed = 0
    
    while(cap.isOpened()):
        # Capture frame-by-frame

        ret, frame = cap.read()
        if ret == True:
            #frame = cv2.flip(frame, 1)
            image = tsfrm(frame).float()
            prediction = nn_model.forward(image)
            
            current_dir = compute(prediction[0, 0], current_dir, 0.6)
            current_speed = compute(prediction[0, 1], current_speed, 0.8)
            
            val_dir = current_dir*228
            val_dir += 228
            val_dir = int(val_dir)
            val_speed = current_speed*114
            val_speed += 114
            val_speed = int(val_speed)
            
        
            cv2.circle(frame, (val_dir, val_speed), 10, (255, 0, 0), -1)
            #cv2.circle(frame, (20, val_speed), 10, (0, 255, 0), -1)
            out.write(frame)
        else: 
            break

out.release()
cap.release()