In [1]:
from PIL import Image
import os
import numpy as np
import matplotlib.pyplot as plt
import cv2
import pandas as pd 

In [47]:
from numpy.random import choice
import random

def tile_img(image, rows = 5, cols = 5):
    image = Image.fromarray(image)
    imgwidth, imgheight = image.size
    height = imgheight // rows
    width = imgwidth // cols
    tiles = [ ]
    for i in range(0, cols):
        for j in range(0, rows):
            box = (j * width, i * height, (j + 1) * width, (i + 1) * height)
            a = image.crop(box)
            tiles.append(np.array(a))
    return tiles 

def get_tiled_adj_lists(rows = 5, cols = 5):
    neighbor_sets = []
    for row in range(rows):
        for col in range(cols):
            neighbors = []
            for (x,y) in [(-1,-1),(-1,0),(-1,1),(0,-1),(0,1),(1,-1),(1,0),(1,1)]: # (0,0) exlcuded
                if (0 <= (row + x) < rows) and (0 <= (col + y) < cols):
                    idx  = ((row + x) * cols) + col + y
                    neighbors.append(idx)
            neighbor_sets.append(neighbors)
    return neighbor_sets

def localize_corr_matrix(corr, num_images, rows, cols, include_self = False):
    adj_lists = get_tiled_adj_lists(rows, cols)
    nodes_per_img = rows * cols 

    localized_corr = np.ones_like(corr)

    for image in range(num_images):
        for block_idx, adj_list in zip(range(nodes_per_img), adj_lists):
            true_block_idx = block_idx + (nodes_per_img * image)
            true_adj_list = list(map(lambda x: x + (nodes_per_img * image), adj_list))
            localized_corr[true_block_idx] = np.mean(corr[true_adj_list], axis = 0)
    return localized_corr

def average_across_diagonal(a):
    return (a + a.T) / 2
def softmax(x):
    y = np.exp(x - np.max(x))
    f_x = y / np.sum(np.exp(x))
    return f_x
def weighted_random_walk(transition_matrix, start_node, iterations = 1000, walks = 100):
    transition_matrix = np.array(transition_matrix)
    visit_count = np.zeros(transition_matrix.shape[0])
    for _ in range(walks):
        curr = start_node # np.random.randint(transition_matrix.shape[0]) # Pick Random Starting Node
        for _ in range(iterations): 
            visit_count[curr] += 1
            curr = random.choices(
                population=range(transition_matrix.shape[0]),
                 weights=transition_matrix[curr],
                 k=1
             )[0]
    return visit_count / visit_count.max()

In [48]:
video_path = lambda x: f"/Users/walkenz1/Datasets/SALSA/train/cam_{int(x)}/cam_{int(x)}.mp4"
flow_path = lambda x: f"/Users/walkenz1/Datasets/SALSA/train/cam_{int(x)}/cam_{int(x)}.flow.mp4"
coords_path = lambda x: f"/Users/walkenz1/Datasets/SALSA/train/cam_{int(x)}/coords_fib_cam_{int(x)}.csv"

# APL Smart Campus Multi-Camera Tracking Update A
#### By: Nile Walker on 12/11/21
Weighted Correlation Aggregation for establishing regions of overlap across static multi camera systems

## Objective Recap:
* Given footage from multiple views throughout a scene, identify individuals traveling through the scene and maintain that identity across all available perspectives in which they appear.

## Assumptions:
* The footage is from static cameras.
* The real world overlap, positionings and calibrations of the cameras are not provided.
* The footage is synchronized such that frame n from any camera will represent roughly the same real world time as frame n from any other camera.
* Ground Truth multi camera track-lets are extremely limited and may not be available at all.

## Current Problem to Solve:
### Where did I come from?
As an instance-level recognition problem, person re-ID faces two major challenges. First, the intra-class (instance/identity) variations are typically big due to the changes of camera viewing conditions. For instance, the view change across cameras (front to back) brings large appearance changes, making matching the same person difficult. Second, there are also small inter-class variations – people in public space often wear similar clothing; from a distance as typically in surveillance videos,they can look incredibly similar.

Given this it would be useful if we could leverage the fact that we're observing a realtime surveillance system inorder to place additional constraints on which identities can be linked. And our assumptions provide several...

* Identities observed at the same time in the same camera cannot be the same person.
* Under most circumstances the likelihood that two seperate images contain the same person is inversely proportional to the amount of time that has passed since the most recent image was captured.
* Security cameras are often placed along passageways and common areas in such a way that the particular sequence of cameras that an individual might pass through is predictable.

While the first two are fairly simple to apply the last one requires that we either have some information on the real world placement or relationships of each camera which according to our assumptions we don't. And without enough multi camera tracks to train from we can't just observe which cameras people tend to reappear in. So in-order to get around this we need some unsupervised method to quantify which camera B, identities from camera A are likely to appear in simultaneously or after an allowed gap time. 

## Current Solutions in Code:
So what I choose to do is leverage the fact that the footage is synchronized, and the relationship between motion of objects in overlapping cameras will usually remain consistent if they stay in the same position and are observing the same real world object. So by looking at the correlated motion over time, we can build some idea of how likely an object in camera A is related to an object in camera B. Then in order to better apply these associations to new detections we break the image into patches so that if only certain areas of the image overlap we don't build unwarranted correlations. 

At this stage every patch of a camera is treated as an individual sensor in which it provides a magnitude and a direction. I grabbed every 12th frame from a camera and then build out an array so that I can compute a correlation coefficients between each of the "sensors". At the same time I also compute corelations between the occupancy of each patch which at the moment is represented as the presence of any motion in the block.

<!--
  import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
demo_ids = [0,2]
cameras_flow = [cv2.VideoCapture(flow_path(cam_id)) for cam_id in demo_ids]
cameras = [cv2.VideoCapture(video_path(cam_id)) for cam_id in demo_ids]

trows, tcols = 3,3

figure, axes = plt.subplots(nrows=len(demo_ids), ncols=trows * tcols)
figure.set_size_inches(18.5, 10.5, forward=True)
for row, (feed, flow_feed) in zip(axes,zip(cameras,cameras_flow)):
    ret,img = [feed.read() for i in range(24)][-1]
    ret,img_flow = [flow_feed.read() for i in range(24)][-1]
    
    tiles = tile_img(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), trows, tcols)
    flows = tile_img(img_flow, trows, tcols)
    
    for col, (img, flow) in zip(row, zip(tiles, flows)):
        flow[flow==0] = 125
        added_image = cv2.addWeighted(img,0.4,flow,0.7,0)
        col.axis('off')
        col.imshow(added_image)

figure.tight_layout(h_pad = 0)

_ = [camera.release() for camera in cameras]
_ = [camera.release() for camera in cameras_flow]
-->
<img src = "images/sample_flow.png"><img/>

In [11]:
cam_ids = [2, 3]

rows, cols = 7, 7

cameras = [cv2.VideoCapture(flow_path(cam_id)) for cam_id in cam_ids]

X_components = []
Y_components = []
Z_components = [] 

interval = 12
samples = 250 
running = True
while running and samples > 0: 
    samples -= 1
    X_components.append([])
    Y_components.append([])
    Z_components.append([])

    for i, feed in enumerate(cameras):
        ret, img = [feed.read() for _ in range(interval)][-1]
        if not ret or not running:
            running = False
            break

        tiles = tile_img(img, rows, cols)
        for tile in tiles:
            tile = tile.astype('float32')
            mag, ang = cv2.cartToPolar(tile[...,0], tile[...,1])
            tile[...,0], tile[...,1] = mag, ang
            avg_val_per_row = np.average(tile, axis=0)
            avg_val = np.average(avg_val_per_row, axis=0)
            X_components[-1].append(avg_val[0])
            Y_components[-1].append(avg_val[1])
            # Using the presence of any motion as a naive substitute for occupancy 
            Z_components[-1].append(np.any(tile[tile > 0]))

# Generate occupancy measurs
X = np.array(X_components[:-1])
Y = np.array(Y_components[:-1])
Z = np.array(Z_components[:-1])

In [15]:
# Pandas Dataframes are better than numpy arrays at not producing nan values in the correlation matrix 
corr_X = np.nan_to_num(pd.DataFrame(X).corr().to_numpy())
corr_Y = np.nan_to_num(pd.DataFrame(Y).corr().to_numpy())
corr_Z = np.nan_to_num(pd.DataFrame(Z).corr().to_numpy())


corr = (abs(corr_X) * .25) + (abs(corr_Y)* .25) + (abs(corr_Z)* .5) 
for _ in range(0):
    corr = localize_corr_matrix(corr, len(cam_ids), rows, cols, include_self = False)
    
# Convert Image Patch Graph
corr = average_across_diagonal(corr)

In [20]:
corr

array([[1.        , 0.23023648, 0.06525851, ..., 0.05799291, 0.03781158,
        0.08143438],
       [0.23023648, 1.        , 0.24400587, ..., 0.1206636 , 0.13356318,
        0.08027522],
       [0.06525851, 0.24400587, 1.        , ..., 0.15764894, 0.23964499,
        0.15039676],
       ...,
       [0.05799291, 0.1206636 , 0.15764894, ..., 1.        , 0.35407363,
        0.08576147],
       [0.03781158, 0.13356318, 0.23964499, ..., 0.35407363, 1.        ,
        0.2886947 ],
       [0.08143438, 0.08027522, 0.15039676, ..., 0.08576147, 0.2886947 ,
        1.        ]])

In [None]:
overlapped_indexes = [0,1] 

In [46]:
weighted_random_walk(corr,0)

array([0.64395604, 0.73919414, 0.7970696 , 0.69084249, 0.85128205,
       0.41904762, 0.55311355, 0.72014652, 0.36043956, 0.36630037,
       0.34725275, 0.57362637, 0.67912088, 0.62930403, 0.74505495,
       0.63589744, 0.64029304, 0.60952381, 0.31062271, 0.59267399,
       0.79120879, 0.7992674 , 0.78241758, 0.66080586, 0.66520147,
       0.68644689, 0.83296703, 0.75311355, 0.68864469, 0.81465201,
       0.69230769, 0.86153846, 0.93113553, 0.8029304 , 0.74358974,
       0.62124542, 0.83150183, 0.89304029, 0.9970696 , 0.91282051,
       0.8043956 , 0.52234432, 0.62051282, 0.88937729, 0.80879121,
       0.83736264, 0.84249084, 0.75531136, 0.53479853, 0.66813187,
       0.71062271, 0.71868132, 0.72820513, 0.76483516, 0.70622711,
       0.41318681, 0.66666667, 0.6996337 , 0.7970696 , 0.81684982,
       0.79413919, 0.77875458, 0.56336996, 0.65128205, 0.94578755,
       0.7970696 , 0.77655678, 0.80879121, 0.84395604, 0.73626374,
       0.69304029, 0.93186813, 0.87252747, 0.84249084, 0.86520

In [13]:
import scipy

[1] Ergys Ristani and Carlo Tomasi. Features for multi-target
multi-camera tracking and re-identification. In Proceedings of the IEEE conference on computer vision and pattern
[]