In [None]:
"""
One more time: this time our approach is to create histograms of relevant IMU data, extract the values at the peaks,
find the images that were taken while those values were observed, and inspect for similarities.
"""

import os
import xarray as xr
import tabular.subset_by_images
import matplotlib.pyplot as plt
import numpy as np

narrow_time_before = np.timedelta64(int(0.0*1000), "ms")  # Amount of time before the image timestamp to reach to compute IMU average
narrow_time_after = np.timedelta64(int(0.25*1000), "ms")  # Same as above but after; the average is across this range

input_dir = os.path.abspath("../arctic_images")
wide_time = np.timedelta64(10, "s")
relevant_variables = ["ROLL", "PITCH", "ROLL_WING", "PITCH_WING", "WING_ANGLE"]

image_to_boat = lambda fname: os.path.basename(fname).split('_')[0].split('-')[1]
image_to_datetime = lambda fname: np.datetime64(tabular.subset_by_images.image_to_datetime(fname))
image_to_camera = lambda fname: os.path.splitext(os.path.basename(fname))[0].split('_')[2].split('-')[1]
excerpt_to_boat = lambda fname: os.path.splitext(os.path.basename(fname))[0].split('_')[1]


In [None]:
all_filenames = [os.path.join(f[0], g) for f in os.walk(input_dir) for g in f[2]]
all_filenames = sorted(filter((lambda f: os.path.isfile(f) and os.path.basename(f)[0] != '.'), all_filenames))
all_images = []
excerpt_paths = sorted(f.path for f in os.scandir("../tabular/excerpts") if f.is_file() and os.path.basename(f.path)[0] != '.')
excerpts = {excerpt_to_boat(fname): xr.load_dataset(fname) for fname in excerpt_paths}
for fname in all_filenames:
    try: datetime = image_to_datetime(fname)
    # Some of our input filenames don't have dates in them; this is a quick and dirty way to skip them
    except IndexError: continue
    boat = image_to_boat(fname)
    # Some of our boats don't have tabluar data; skip them too
    if boat not in excerpts.keys(): continue
    all_images.append({"boat": boat, "datetime": datetime, "camera": image_to_camera(fname), "path": fname})
print(f"Found {len(all_images)} images")


In [None]:
for row in all_images:
   timeslice = slice(row["datetime"]-wide_time, row["datetime"]+wide_time)
   row["dataframe"] = excerpts[row["boat"]].sel(time=timeslice).to_dataframe()[relevant_variables]

In [None]:
row = all_images[0]
print(row["dataframe"]["PITCH_WING"][slice(row["datetime"]-narrow_time_before, row["datetime"]+narrow_time_after)])

In [None]:
full_boats = sorted(set(excerpts.keys()) & set(image["boat"] for image in all_images))
images_for_boat = {boat: list(filter(lambda image: image["boat"] == boat, all_images)) for boat in full_boats}

def do_histogram(iv, xrange, nbins):
    for boat in sorted(full_boats, key=lambda boat: len(images_for_boat[boat]), reverse=True):
        x = [image["dataframe"][iv][slice(image["datetime"]-narrow_time_before, image["datetime"]+narrow_time_after)].mean() for image in images_for_boat[boat]]
        x = list(filter(lambda v: xrange[0] <= v < xrange[1], x))  # Crop to the most useful range
        plt.hist(x, bins=nbins, alpha=0.75, label=boat)
    plt.legend()
    plt.xlabel(iv)
    if nbins > 50:
        plt.gca().xaxis.set_major_locator(plt.MultipleLocator((xrange[1]-xrange[0])/nbins*5))
    else:
        plt.gca().xaxis.set_major_locator(plt.MultipleLocator((xrange[1]-xrange[0])/nbins*2))
    plt.rcParams["figure.figsize"] = (20, 6)
    plt.show()

do_histogram("PITCH_WING", (-2, 2), 100)
do_histogram("ROLL_WING", (-4, 4), 100)

do_histogram("PITCH_WING", (-2, 2), 50)
do_histogram("ROLL_WING", (-4, 4), 50)

In [None]:
import shutil

def get_relevant_images(boat, camera, iv, xrange, output_dir=os.path.abspath("../representatives/histogram")):
    print(boat)
    print(iv)
    print(xrange)
    subdir = os.path.join(output_dir, f"boat_{boat}-camera_{camera}-var_{iv}-range_{xrange[0]}_{xrange[1]}")
    if os.path.exists(subdir):
        print("Directory already exists. Stopping.")
        return
    relevant_images = list(filter((lambda image: xrange[0] <= image["dataframe"][iv][slice(image["datetime"]-narrow_time_before, image["datetime"]+narrow_time_after)].mean() < xrange[1] and image["camera"] == camera), images_for_boat[boat]))
    print(f"{len(relevant_images)} relevant images")
    print(f"Saving images to {subdir}")
    os.mkdir(subdir)
    for image in relevant_images:
        shutil.copy2(image["path"], os.path.join(subdir, os.path.basename(image["path"])))

get_relevant_images(full_boats[1], "2", "PITCH_WING", (-0.4, -0.35))
get_relevant_images(full_boats[1], "2", "PITCH_WING", (0.6, 0.7))
get_relevant_images(full_boats[1], "2", "PITCH_WING", (-1.9, -1.8))
get_relevant_images(full_boats[1], "2", "PITCH_WING", (1.8, 1.9))
get_relevant_images(full_boats[1], "2", "ROLL_WING", (-3.1, -3.0))
get_relevant_images(full_boats[1], "2", "ROLL_WING", (1.4, 1.5))