In [1]:
""" A test detector for movement detection
in the sealhits data. """

from __future__ import annotations

import numpy as np
from sealhits.db.db import DB
from sealhits.bbox import XYBox
from sealhits.image import get_group_images
from sealhits.track import get_bounding_boxes
from sealhits.video import gen_video
from scipy import signal
from tqdm.notebook import trange, tqdm

db = DB("sealhits", "sealhits_ro", "9T0J^MpjMrE!", "juve.st-andrews.ac.uk")
groups = ["believe-entire-night-back", "remember-past-red-level", "mean-legal-hour-month", "go-meet-common-company", "say-cultural-private-job", "stand-recent-result-night", "happen-public-full-team", "ask-know-general-kid"]
groups = ["may-general-democratic-word"]

In [2]:
def get_group_data(db, group_names):
    """ Get the data for all groups together, generate the initial images."""
    gdata = []
    og_bbs = []

    for group_name in group_names:
        group = db.get_group_huid(group_name)
        gimg_data, gimages = get_group_images(db, "/home/oni/work/fits", group_name, sonarid=854, fan_transform=False)
        bbs = get_bounding_boxes(db, group_name, gimages, (gimg_data.shape[2], gimg_data.shape[1]), fan_distort=False)
        gid = group.gid
        huid = group.huid
        time_start = group.timestart
        time_end = group.timeend
        gcode = group.code
        comment = group.comment
        video_path = gen_video(gimg_data, bbs, gid, huid, time_start, time_end, gcode, comment, "/home/oni/tmp", "mp4", group_name)
        gdata.append(gimg_data)
        og_bbs.append(bbs)

    return gdata, og_bbs

gdata, og_bbs = get_group_data(db, groups)

[rawvideo @ 0x564ef8497d40] Stream #0: not enough frames to estimate rate; consider increasing probesize
Input #0, rawvideo, from 'fd:':
  Duration: N/A, start: 0.000000, bitrate: 81444 kb/s
  Stream #0:0: Video: rawvideo (RGB[24] / 0x18424752), rgb24, 512x1657, 81444 kb/s, 4 tbr, 4 tbn
Stream mapping:
  Stream #0:0 -> #0:0 (rawvideo (native) -> h264 (libx264))
[libx264 @ 0x564ef84ab4c0] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0x564ef84ab4c0] profile High 4:4:4 Predictive, level 3.1, 4:4:4, 8-bit
[libx264 @ 0x564ef84ab4c0] 264 - core 164 r3108 31e19f9 - H.264/MPEG-4 AVC codec - Copyleft 2003-2023 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=4 threads=36 lookahead_threads=6 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bfr

In [3]:
from PIL import Image
from sealhits.bbox import XYZBox, bb_expand, combine_xyzbox


def resize_frames(img_data, img_size):
    """ Resize the frames - img_size is width height."""
    new_frames = np.zeros((img_data.shape[0], img_size[1], img_size[0]))
    
    for fidx in range(len(img_data)):
        pil_img = Image.fromarray(img_data[fidx].astype("uint8"))
        pil_img = pil_img.resize(img_size, Image.Resampling.BICUBIC) # BICUBIC beats NEAREST quite a bit!
        new_np = np.array(pil_img)
        new_frames[fidx] = new_np
 
    return np.array(new_frames)

def background_sub(img_data, background_frames):
    new_frames = []
    start_gap = len(img_data) - len(background_frames)

    for fidx, frame in enumerate(img_data):
        bi = fidx - start_gap
        
        if bi < 0:
            bi = 0
            
        background = background_frames[bi]  * 2.0 # This multiplication is really necessary and it's annoying! Not sure why we need it :S
        tframe = frame.astype(float)
        subbed = (tframe - background) * 2.0 # As is this one but then the sub does reduce the top end.
        subbed = np.maximum(subbed, 0) # Clipping seems to work better, but only on the stuff that's already down to 0
        subbed  = subbed / np.max(subbed) * 255.0
        new_frames.append(subbed)
        
    new_frames = np.array(new_frames)
    return new_frames

def filter_for_background(np_img_data, btype="lowpass", freq=0.1):
    num_back_frames = 16 # More seems better :/
    background_frames = []
    bi = 0
    
    while bi + num_back_frames < len(np_img_data) -1:
        low_frames = np_img_data[bi:bi+num_back_frames]
        low_frames = low_frames.astype(float)
        b, a = signal.butter(5, freq, btype=btype)
        # zi = signal.lfilter_zi(b, a)
        z = signal.lfilter(b, a, low_frames, axis=0)
        background_frames.append(np.clip(z[-1], 0.0, 255.0))
        bi+=1

    return np.array(background_frames)


def otsu_bbox(np_frames):
    from skimage.filters import threshold_otsu
    from skimage.measure import label, regionprops
    from skimage.morphology import closing, cube

    # Do closing over time and space
    # Performing Otsu over the entire volume might be a problem with occasional
    # bright frames. Local otsu over tiles/voxels might be better too.
    binary = []
    frame_size = (np_frames.shape[2], np_frames.shape[1], np_frames.shape[0])
    thresh = threshold_otsu(np_frames)
    binary = np_frames > thresh

    binary = np.array(binary, dtype=np.uint8) # Convert from bool
    binary = closing(binary, cube(5)) # Resolution dependent?

    # Now label each volume with label and max connectivity
    new_bbs = []
    labelled = label(np_frames, connectivity=3)
    regions = regionprops(labelled)

    for region in regions:
        bbox = region['bbox']
        nbb = XYZBox(bbox[2], bbox[1], bbox[0], bbox[5], bbox[4], bbox[3])
        nbb = bb_expand(nbb, frame_size, (8, 8, 1)) # TODO - How to choose expansion sizes? Maybe something smarter? Biggest gaps perhaps?
        new_bbs.append(nbb)

    # We don't keep small boxes as the combine step will take forever otherwise.
    # TODO - 5 x 5 x 5 equiv. We need to set this statistically. This is easier in fan images
    # as spatially, things are more correct in the fans than in the RAW images.
    # Keep the top nFrames * 2 boxes. Should be set statistically somehow
    # TODO - could also add a cull here on the really small ones
    def _volume(b):
        return b.volume()

    new_bbs.sort(key=_volume, reverse=True)
    new_bbs = new_bbs[:len(np_frames) * 2]

    return (binary, new_bbs)

for gi, gname in enumerate(groups):
    g = gdata[gi]
    resized = resize_frames(g, (int(g.shape[2] / 2), int(g.shape[1] / 2))) # Work on a smaller image - faster but we may loose detail.
    #resized = g
    backs_low = filter_for_background(resized, btype="bandpass", freq=[0.3, 0.4]) # Movement is in this range I think. so it seems. But there is still some high freq
    _ = gen_video(backs_low, None, "", "", "", "", "", "", "/home/oni/tmp", "mp4", gname + "_q_1")

    label_vol, new_bbs = otsu_bbox(backs_low)

    print("Len frames / backs / label_vol", len(g), len(backs_low), len(label_vol))
    print("Num bboxes", len(new_bbs))

    final_bbs = combine_xyzbox(new_bbs)
    #final_bbs = new_bbs
    img_size = (resized.shape[2], resized.shape[1])

    bbs_2D = []
    print("Num. Final bbs", len(final_bbs))

    for bb in final_bbs:
        if bb.z_max - bb.z_min > 4: # minimum of one second once combined
            
            for z in range(bb.z_min, bb.z_max + 1):
                xybox = XYBox(bb.x_min, bb.y_min, bb.x_max, bb.y_max)
                bbs_2D.append((z, xybox, "#00ff00"))

    _ = gen_video(label_vol, bbs_2D, "", "", "", "", "", "", "/home/oni/tmp", "mp4", gname + "_q_2")


Input #0, rawvideo, from 'fd:':
  Duration: N/A, start: 0.000000, bitrate: 20348 kb/s
  Stream #0:0: Video: rawvideo (RGB[24] / 0x18424752), rgb24, 256x828, 20348 kb/s, 4 fps, 4 tbr, 4 tbn
Stream mapping:
  Stream #0:0 -> #0:0 (rawvideo (native) -> h264 (libx264))
[libx264 @ 0x55a3b4d76480] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0x55a3b4d76480] profile High 4:4:4 Predictive, level 2.2, 4:4:4, 8-bit
[libx264 @ 0x55a3b4d76480] 264 - core 164 r3108 31e19f9 - H.264/MPEG-4 AVC codec - Copyleft 2003-2023 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=4 threads=26 lookahead_threads=4 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min

Len frames / backs / label_vol 59 42 42
Num bboxes 84
Num. Final bbs 13


Input #0, rawvideo, from 'fd:':
  Duration: N/A, start: 0.000000, bitrate: 20348 kb/s
  Stream #0:0: Video: rawvideo (RGB[24] / 0x18424752), rgb24, 256x828, 20348 kb/s, 4 fps, 4 tbr, 4 tbn
Stream mapping:
  Stream #0:0 -> #0:0 (rawvideo (native) -> h264 (libx264))
[libx264 @ 0x561ed738f480] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2
[libx264 @ 0x561ed738f480] profile High 4:4:4 Predictive, level 2.2, 4:4:4, 8-bit
[libx264 @ 0x561ed738f480] 264 - core 164 r3108 31e19f9 - H.264/MPEG-4 AVC codec - Copyleft 2003-2023 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=4 threads=26 lookahead_threads=4 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min