## SUMMARY: Will correct the h5 prediction files for all of the single instance videos

# get single vids

In [3]:
import os
import random
import h5py
import numpy as np
import time

from utils import find_node_velocity, get_stats, fill_missing, graph_vels, nan_vals

In [4]:
defaultdir = '/gpfs/radev/pi/saxena/aj764'
rootdir = f'{defaultdir}/PairedTestingSessions/'

Compiles a list of all of the sessions in PairedTestingSessions that have a Videos folder in them

In [5]:
vid_subdirs = []
for subdir, dirs, files in os.walk(rootdir):
    if subdir.endswith("Videos"):
        vid_subdirs.append(subdir)
vid_subdirs = sorted(vid_subdirs)

Seperates all of the videos into single instance videos and multi instance videos (and takes out videos from before April).

In [6]:
single_vids = {}
multi_vids = {}
for vids in vid_subdirs:
    files = os.listdir(vids)
    cut_vids = vids[28:]
    single_vids[cut_vids] = []
    multi_vids[cut_vids] = []
    for file in files:
        if file.endswith('.mp4') and int(file[:2]) >= 4:
            KL_count = file.count('KL')
            EB_count = file.count('EB')
            HF_count = file.count('HF')
            if KL_count + EB_count + HF_count == 2:
                multi_vids[cut_vids].append(file)
            else:
                single_vids[cut_vids].append(file)

In [7]:
single_len_tot = 0
for key, value in single_vids.items():
    single_len_tot += len(value)
print(f'There are {single_len_tot} single instance videos')

There are 1106 single instance videos


In [8]:
multi_len_tot = 0
for key, value in multi_vids.items():
    multi_len_tot += len(value)
print(f'There are {multi_len_tot} multi instance videos')

There are 905 multi instance videos


Fills in the NaN values for all of the single instance files

# fills in missing vals

In [9]:
CHECK = False
actually_fill = False # will also have to change file open to "r+"

In [None]:
start_time = time.time()

total_intial_nan = 0
total_after_out_nan = 0
total_final_nan = 0
total_after_check_nan = 0

all_errors = np.empty((single_len_tot, 4))
all_vids = []

count = 0
for i, session in enumerate(single_vids.keys()): 
    video_list = single_vids[session]
    analysis_path = defaultdir + '/' + session[:-6] + 'Tracking/h5/'
    
    for video in video_list:
        # open analysis file
        analysis_file = analysis_path + video[:-3] + 'predictions.h5'
        with h5py.File(analysis_file,'r') as f:
            locations = f["tracks"][:].T 

            # find nan values
            intial = nan_vals(locations)

            # just to check you haven't already done this vid or it isn't empty
            if intial != 0:
                # take out positional outliers
                all_vels = {}
                for node in range(locations.shape[1]):
                    # find the velocities
                    all_vels[node] = find_node_velocity(locations[:, node, :])
                
                    # get values need to find outliers
                    mean, std, low, high = get_stats(all_vels[node])
                
                    # if you want to check that these values looks good
                    graph_vels(all_vels[node], CHECK)
                
                    # replace outliers in locations with nan
                    nan_index = [i for i in range(len(all_vels[node])) if (all_vels[node][i] > high or all_vels[node][i] < low)]
                    for index in nan_index:
                        locations[index + 1, node, 0], locations[index + 1, node, 0] = np.nan, np.nan
                
                    # if you want to check that new locations look good
                    test_vels = find_node_velocity(locations[:, node, :])
                    graph_vels(test_vels, check=CHECK, old_low=low, old_high=high)
    
                # find nan values again
                after_out = nan_vals(locations)
    
                # fill in missing locations
                print(f'video name: {video}')
                new_locations = fill_missing(locations)
                if actually_fill:
                    f["tracks"][:] = new_locations.T

                # finds nan values after filling
                after_fill = nan_vals(new_locations)

                # take out positional outliers again (to make sure we aren't filling in stupid stuff...?
                all_vels = {}
                for node in range(locations.shape[1]):
                    # find the velocities
                    all_vels[node] = find_node_velocity(new_locations[:, node, :])
                
                    # get values need to find outliers
                    mean, std, low, high = get_stats(all_vels[node])
                
                    # if you want to check that these values looks good
                    graph_vels(all_vels[node], CHECK)
                
                    # replace outliers in locations with nan
                    nan_index = [i for i in range(len(all_vels[node])) if (all_vels[node][i] > high or all_vels[node][i] < low)]
                    for index in nan_index:
                        new_locations[index + 1, node, 0], new_locations[index + 1, node, 0] = np.nan, np.nan
                
                    # if you want to check that new locations look good
                    test_vels = find_node_velocity(new_locations[:, node, :])
                    graph_vels(test_vels, check=CHECK, old_low=low, old_high=high)
                
                after_check = nan_vals(new_locations)
                
                total_intial_nan += intial
                total_after_out_nan += after_out
                total_final_nan += after_fill
                total_after_check_nan += after_check
                all_errors[count, :] = [intial, after_out, after_fill, after_check]
                all_vids.append(video)
                count += 1
    
                # if you want to check the nan/fill values for a each video
                if True:
                    # print(f'video name: {video}')
                    print(f'intial nan: {round(intial, 2)} %, after out nan: {round(after_out, 2)} %, final nan: {round(after_fill, 2)} %, after check nan: {round(after_check, 2)} %')
        
print('totals:')
print(f'intial nan: {round(total_intial_nan / single_len_tot, 2)} %, after out nan: {round(total_after_out_nan / single_len_tot, 2)} %, final nan: {round(total_final_nan / single_len_tot, 2)} %, after check nan: {round(total_after_check_nan / single_len_tot, 2)} %')
print(f'time elapse: {time.time() - start_time}')

video name: 041624_Cam4_TrNum13_IS_KL001Y.mp4
intial nan: 1.86 %, after out nan: 2.57 %, final nan: 0.0 %, after check nan: 0.51 %
video name: 041624_Cam4_TrNum1_IS_KL001B.mp4
intial nan: 3.05 %, after out nan: 3.99 %, final nan: 0.0 %, after check nan: 0.93 %
video name: 041624_Cam4_TrNum4_IS_KL001Y.mp4
intial nan: 2.81 %, after out nan: 3.95 %, final nan: 0.0 %, after check nan: 1.24 %
video name: 041624_Cam4_TrNum16_IS_KL001B.mp4
intial nan: 2.37 %, after out nan: 2.96 %, final nan: 0.0 %, after check nan: 0.46 %
video name: 041624_Cam4_TrNum14_PV_KL001G.mp4
intial nan: 1.95 %, after out nan: 2.55 %, final nan: 0.0 %, after check nan: 0.41 %
video name: 041624_Cam4_TrNum15_PV_KL001R.mp4
intial nan: 2.47 %, after out nan: 3.24 %, final nan: 0.0 %, after check nan: 0.85 %
video name: 041624_Cam4_TrNum3_PV_KL001R.mp4
intial nan: 3.56 %, after out nan: 4.42 %, final nan: 0.0 %, after check nan: 0.7 %
video name: 041624_Cam4_TrNum2_PV_KL001G.mp4
intial nan: 1.72 %, after out nan: 2.66 %,

# check our work... (abt 18 vids that I WOULDN'T trust!!)

In [50]:
file = open('single_corrections_output.txt','r')
output = file.readlines()

intial = []
after_out = []
after_fill = []
vid_name = []

stupid_flag = True
for line in output:
    if "video name" in line:
        vid_name.append(line[12:-1])
        stupid_flag = True
    if "could" in line:
        print("this is one of vids I couldn't do anything abt")
        print(vid_name[-1])
        vid_name.pop()
        stupid_flag = False

    if "intial" in line and stupid_flag:
        parsed = line.split('%')
        
        init_temp = float(parsed[0][12:])
        out_temp = float(parsed[1][16:])
        fill_temp = float(parsed[2][12:])
        intial.append(init_temp)
        after_out.append(out_temp)
        after_fill.append(fill_temp)
        

this is one of vids I couldn't do anything abt
091924_Cam2_TrNum15_IS_KL005Y.mp4
this is one of vids I couldn't do anything abt
102124_Cam1_TrNum8_IS_HF004Y.mp4


In [55]:
print("average over all videos:")
print(f'intial nan: {round(np.mean(intial), 2)} %, after out nan: {round(np.mean(after_out), 2)} %, final nan: {round(np.mean(after_fill), 2)} %')

average over all videos:
intial nan: 4.64 %, after out nan: 5.5 %, final nan: 0.0 %


In [57]:
# so for some reason these predictions started out atrocious... YIKES, maybe wouldn't trust the correct files
for i, init in enumerate(intial):
    if init > 20:
        print(init, vid_name[i])

26.17 101824_Cam1_TrNum2_IS_KL004Y.mp4
21.69 101824_Cam1_TrNum1_IS_KL003B.mp4
22.73 101824_Cam2_TrNum1_IS_KL006G.mp4
54.21 102124_Cam1_TrNum2_IS_HF006G.mp4
47.02 102124_Cam1_TrNum1_IS_HF003B.mp4
54.67 102124_Cam1_TrNum3_IS_HF004Y.mp4
36.01 102124_Cam2_TrNum1_IS_HF006G.mp4
32.88 102124_Cam2_TrNum3_IS_HF008Y.mp4
43.66 102124_Cam2_TrNum8_IS_HF008Y.mp4
30.4 102124_Cam2_TrNum2_IS_HF001R.mp4
29.07 102224_Cam2_TrNum3_IS_HF003B.mp4
32.05 102224_Cam2_TrNum1_IS_HF004Y.mp4
31.84 102224_Cam2_TrNum2_IS_HF001R.mp4
51.08 102224_Cam1_TrNum2_IS_HF008Y.mp4
50.73 102224_Cam1_TrNum1_IS_HF001R.mp4
55.0 102224_Cam1_TrNum3_IS_HF006G.mp4
