In [1]:
# dependencies`
from facenet_pytorch import MTCNN
import torch
import numpy as np
import itertools as it
import mmcv, cv2
import matplotlib.pyplot as plt

from scipy.linalg import block_diag
from scipy.optimize import linear_sum_assignment
from PIL import Image, ImageDraw
from IPython import display
from pathlib import Path
from time import time
from JAVER.track_tools import FrameHandler, FaceTracker

import logging
for name in logging.root.manager.loggerDict:
    logging.getLogger(name).setLevel("CRITICAL") 
    
from JAVER.logger import create_logger
logger = create_logger(level='DEBUG')
logger.setLevel('DEBUG')

In [2]:
# setup face tracker
p = Path("../../tests/data/Elon Musk/")
ref_paths = list(p.glob("inliers/*"))

face_tracker = FaceTracker(
    image_size=160, 
    ref_paths=ref_paths, 
    batch_size=8, 
    step_large=150, 
    step_small=10
)

In [3]:
# Single face video
video_path = '../../tests/data/Elon Musk/sample_short.mp4'
faces_dict_short = face_tracker.detect(video_path)

[2020-04-19 13:35:34,632] [32mINFO @ line 374: Rollback to 0[0m
[2020-04-19 13:35:39,712] [32mINFO @ line 398: Rollforward to 410 (Current tail @ 480)[0m
[2020-04-19 13:35:39,960] [32mINFO @ line 374: Rollback to 710[0m
[2020-04-19 13:35:44,506] [32mINFO @ line 402: Finished Tracking[0m


In [4]:
display.Video(video_path, width=640)

In [5]:
# Multi face video
video_path = '../../tests/data/Elon Musk/multi_person_jre.mp4'
faces_dict_long = face_tracker.detect(video_path)

In [6]:
display.Video(video_path, width=640)

## Data Association 

In [194]:
%load_ext autoreload
%autoreload 1  

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [195]:
from scipy.spatial.distance import mahalanobis

In [196]:
class Track:
    def __init__(self, face):
        # Kalman stuff
        x1, y1, x2, y2 = face.bbox
        x = np.mean((x1, x2))
        y = np.mean((y1, y2))
        s = np.linalg.norm((x1 - x2, y1 - y2))

        self.dt = dt = 0.1

        self.state_x = np.array([x, y, s, 0, 0, 0])
        self.state_prev_x = self.state_x

        self.state_cov = P = np.diag(np.ones(self.state_x.shape))

        self.H = np.asarray([
            [1, 0, 0, dt, 0, 0],
            [0, 1, 0, 0, dt, 0],
            [0, 0, 1, 0, 0, dt],
            [0, 0, 0, 1, 0, 0],
            [0, 0, 0, 0, 1, 0],
            [0, 0, 0, 0, 0, 1],
        ])

        # confidence probably needs tuning
        conf = 1
        self.R = np.diag(np.ones(len(self.state_x))) * conf

        # Deep sort stuff
        self.gallery = []
        self.none_count = 0
        pass

    def predict_state(self):
        x_now = self.state_x
        P_now = self.state_cov
        H = self.H

        x_pred = H @ x_now
        P_pred = H @ P_now @ H.T

        return x_pred, P_pred

    def update(self, face):
        if face is None:
            self.none_count += 1
        else:
            self.update_gallery(face)
            self.update_state(face)
            self.none_count = 0
        return

    def update_state(self, face):
        z = self.format_measurement(face)
        
        x_now = self.state_x
        P_now = self.state_cov

        H = self.H
        R = self.R

        K = P_now @ H.T @ np.linalg.inv(H @ P_now @ H.T + R)

        x_next = x_now + K @ (z - H @ x_now)
        P_next = P_now - K @ H @ P_now

        self.state_prev_x = self.state_x
        self.state_x = x_next
        self.state_cov = P_next / np.linalg.norm(P_next)

        return
    
    def format_measurement(self, face):
        x1, y1, x2, y2 = face.bbox
        _, _, _, x_prev, y_prev, s_prev = self.state_prev_x

        x = np.mean((x1, x2))
        y = np.mean((y1, y2))
        s = np.linalg.norm((x1 - x2, y1 - y2))
        xv = x - x_prev
        yv = y - y_prev
        sv = s - s_prev
        z = np.array([x, y, s, xv, yv, sv])
        return z


    def update_gallery(self, face):
        self.gallery.append(face)
        pass


In [197]:
from JAVER.track_tools import Face


In [198]:
def _d_1(face, track):
    """Motion Descriptor"""
    assert type(face) == Face
    assert type(track) == Track

    z = track.format_measurement(face)  # new measurement vector

    y, S = track.predict_state()  # should be next (x,y) predict by kalman
    
    # normalize each array
    z = z / np.linalg.norm(z)
    y = y / np.linalg.norm(y)
    S = S / np.linalg.norm(S)
    
    dist = mahalanobis(z, y, S)
    return dist


def _d_2(face, track):
    """Appearance Descriptor"""
    assert type(face) == Face
    assert type(track) == Track

    dist = 1
    r_j = face.embedding
    for _face in track.gallery:
        r_i = _face.embedding
        r_j_dist = r_j.T @ r_i
        assert 0 <= r_j_dist <= 1
        dist = min(dist, r_j_dist)
    return 1 - dist


def association_cost_matrix(faces, tracks, lam=0.1):
    n_faces = len(faces)
    n_tracks = len(tracks)
    C = np.zeros((n_faces, n_tracks))
    for i, j in it.product(range(n_faces), range(n_tracks)):
        C[i, j] = lam * _d_1(faces[i], tracks[j]) + (1 - lam) * _d_2(faces[i], tracks[j])
    return C


def gate_matrix(faces, tracks, _thresh_1=50, _thresh_2=1):
    n_faces = len(faces)
    n_tracks = len(tracks)
    
    G = np.zeros((n_faces, n_tracks))
    for i, j in it.product(range(n_faces), range(n_tracks)):
        G[i, j] = (_d_1(faces[i], tracks[j]) <= _thresh_1) * (_d_2(faces[i], tracks[j]) <= _thresh_2)
    G = G.astype(int)

    return G

In [199]:
%time
# from JAVER.track_tools import association_cost_matrix, gate_matrix, Track

salt = 1 / 10 ** 9
pepper = 10 ** 6

tracks_alive = []
tracks_dead = []

lam = 0.1
thresh_1, thresh_2 = 0.6, 0.3
kill_thresh = 2

for _, (frame, faces) in enumerate(faces_dict_short.items()):
    if _: # except first case
    
        C = association_cost_matrix(faces, tracks_alive, lam)
        G = gate_matrix(faces, tracks_alive, thresh_1, thresh_2) + salt
        gated_cost_matrix = C/G
        assert C.shape == G.shape, f'{C.shape}, {G.shape}'
        row_idxs, col_idxs = linear_sum_assignment(gated_cost_matrix)

        # row_idxs[i] is assigned to col_idxs[j]
        # (faces[ri] is assigned to tracks_alive[ci])
        for ri, ci in zip(row_idxs, col_idxs):
            if gated_cost_matrix[ri, ci] < pepper:
                logger.debug(f'Frame: {frame}. Add face {ri} to track {ci}')
                tracks_alive[ci].update(faces[ri])
                del faces[ri]
            else:
                tracks_alive[ci].update(None)
                logger.debug(f'Frame: {frame}. Track {ci} was not associated to any face. none_count={tracks_alive[ci].none_count}')
                
        
        for i, track in enumerate(tracks_alive):
            if i not in col_idxs:
                tracks_alive[i].update(None)
                logger.debug(f'Frame: {frame}. Track {ci} was not associated to any face. none_count={tracks_alive[ci].none_count}')
            
            # Terminate track with continous null updates
            if track.none_count >= kill_thresh:
                tracks_dead.append(track)
                del tracks_alive[i]
                logger.debug(f'Frame: {frame}. Track {i} killed.')
        
    # Generate new tracks if required
    for face in faces:
        new_track = Track(face)
        tracks_alive.append(new_track)
        
for track in tracks_alive:
    tracks_dead.append(track)

[2020-04-16 11:28:31,691] [36mDEBUG @ line 27: Frame: 10. Add face 0 to track 0[0m
[2020-04-16 11:28:31,693] [36mDEBUG @ line 27: Frame: 20. Add face 0 to track 0[0m
[2020-04-16 11:28:31,695] [36mDEBUG @ line 27: Frame: 30. Add face 0 to track 0[0m
[2020-04-16 11:28:31,697] [36mDEBUG @ line 27: Frame: 40. Add face 0 to track 0[0m
[2020-04-16 11:28:31,699] [36mDEBUG @ line 27: Frame: 50. Add face 0 to track 0[0m
[2020-04-16 11:28:31,701] [36mDEBUG @ line 27: Frame: 60. Add face 0 to track 0[0m
[2020-04-16 11:28:31,703] [36mDEBUG @ line 27: Frame: 70. Add face 0 to track 0[0m
[2020-04-16 11:28:31,705] [36mDEBUG @ line 27: Frame: 80. Add face 0 to track 0[0m
[2020-04-16 11:28:31,710] [36mDEBUG @ line 27: Frame: 90. Add face 0 to track 0[0m
[2020-04-16 11:28:31,712] [36mDEBUG @ line 27: Frame: 100. Add face 0 to track 0[0m
[2020-04-16 11:28:31,714] [36mDEBUG @ line 27: Frame: 110. Add face 0 to track 0[0m
[2020-04-16 11:28:31,716] [36mDEBUG @ line 27: Frame: 120. Add

CPU times: user 0 ns, sys: 0 ns, total: 0 ns
Wall time: 14.3 µs


[2020-04-16 11:28:31,791] [36mDEBUG @ line 27: Frame: 710. Add face 0 to track 0[0m
[2020-04-16 11:28:31,795] [36mDEBUG @ line 27: Frame: 720. Add face 0 to track 0[0m
[2020-04-16 11:28:31,798] [36mDEBUG @ line 27: Frame: 730. Add face 0 to track 0[0m
[2020-04-16 11:28:31,801] [36mDEBUG @ line 27: Frame: 740. Add face 0 to track 0[0m
[2020-04-16 11:28:31,804] [36mDEBUG @ line 27: Frame: 750. Add face 0 to track 0[0m
[2020-04-16 11:28:31,807] [36mDEBUG @ line 27: Frame: 760. Add face 0 to track 0[0m
[2020-04-16 11:28:31,811] [36mDEBUG @ line 27: Frame: 770. Add face 0 to track 0[0m
[2020-04-16 11:28:31,822] [36mDEBUG @ line 27: Frame: 780. Add face 0 to track 0[0m
[2020-04-16 11:28:31,825] [36mDEBUG @ line 27: Frame: 790. Add face 0 to track 0[0m
[2020-04-16 11:28:31,828] [36mDEBUG @ line 27: Frame: 800. Add face 0 to track 0[0m
[2020-04-16 11:28:31,832] [36mDEBUG @ line 32: Frame: 810. Track 0 was not associated to any face. none_count=1[0m
[2020-04-16 11:28:31,83

In [202]:
for i, track in enumerate(tracks_dead):
    cnt = len(track.gallery)
    print(f'Track {i} has {cnt} face images')

Track 0 has 48 face images
Track 1 has 25 face images


In [207]:
frame_handler

NameError: name 'frame_handler' is not defined

In [219]:
arr = np.random.random(size=(300,1000,1000))
ts = list(range(50, 200))
xs = np.random.randint(0, 900, size=1000)
ys = np.random.randint(0, 900, size=1000)

for i in range(100):
    arr[ts, xs:xs+100, ys:ys+100]

TypeError: only integer scalar arrays can be converted to a scalar index

In [233]:
arr[[ts, 10, 4]].shape

  arr[[ts, 10, 4]].shape


(150,)

In [239]:
import pickle

from moviepy.editor import *
from moviepy.video.tools.tracking import manual_tracking, to_fxfy

# LOAD THE CLIP (subclip 6'51 - 7'01 of a chaplin movie)
clip = VideoFileClip("../../videos/chaplin.mp4").subclip((6,51.7),(7,01.3))

# MANUAL TRACKING OF THE HEAD

# the three next lines are for the manual tracking and its saving
# to a file, it must be commented once the tracking has been done
# (after the first run of the script for instance).
# Note that we save the list (ti,xi,yi), not the functions fx and fy
# (that we will need) because they have dependencies.

#txy, (fx,fy) = manual_tracking(clip, fps=6)
#with open("../../chaplin_txy.dat",'w+') as f:
#    pickle.dump(txy)



# IF THE MANUAL TRACKING HAS BEEN PREVIOUSLY DONE,
# LOAD THE TRACKING DATA AND CONVERT IT TO FUNCTIONS x(t),fy(t)

with open("../../chaplin_txy.dat",'r') as f:
    fx,fy = to_fxfy( pickle.load(f) )


# BLUR CHAPLIN'S HEAD IN THE CLIP

clip_blurred = clip.fx( vfx.headblur, fx, fy, 25)


# Generate the text, put in on a grey background

txt = TextClip("Hey you ! \n You're blurry!", color='grey70',
               size = clip.size, bg_color='grey20',
               font = "Century-Schoolbook-Italic", fontsize=40)
               
               
# Concatenate the Chaplin clip with the text clip, add audio

final = concatenate_videoclips([clip_blurred,txt.set_duration(3)]).\
          set_audio(clip.audio)

# We write the result to a file. Here we raise the bitrate so that
# the final video is not too ugly.

final.write_videofile('../../blurredChaplin.avi', bitrate="3000k")


ModuleNotFoundError: No module named 'pygame'