## SUMMARY: Will correct the h5 prediction files for all of the training cooperation videos for a specified color pair

# get color vids

In [3]:
import os
import random
import h5py
import numpy as np
import time

from utils import find_node_velocity, get_stats, fill_missing, graph_vels, nan_vals

In [4]:
defaultdir = '/gpfs/radev/pi/saxena/aj764'
rootdir = f'{defaultdir}/Training_COOPERATION/'

Compiles a list of all of the sessions in PairedTestingSessions that have a Videos folder in them

In [5]:
vid_subdirs = []
for subdir, dirs, files in os.walk(rootdir):
    vid_subdirs.append(subdir)
vid_subdirs = sorted(vid_subdirs)

Seperates all of the videos into single instance videos and multi instance videos (and takes out videos from before April).

In [6]:
multi_vids = {}
for vids in vid_subdirs:
    files = os.listdir(vids)
    cut_vids = vids[28:]
    multi_vids[cut_vids] = []
    for file in files:
        if file.endswith('.mp4'): # and int(file[:2]) >= 4:
            multi_vids[cut_vids].append(file)
            

In [7]:
multi_len_tot = 0
for key, value in multi_vids.items():
    multi_len_tot += len(value)
print(f'There are {multi_len_tot} multi instance videos')

There are 360 multi instance videos


In [8]:
# split the multi-instance videos into their respective color pairs...
color_vids = {}
for key, value in multi_vids.items():
    for vid in value:
        parsed = vid.split('-')
        
        trial_color = [parsed[0][-1], parsed[1][5]]
        trial_key = ''
        if 'R' in trial_color:
            trial_key += 'R'
        if 'G' in trial_color:
            trial_key += 'G'
        if 'Y' in trial_color:
            trial_key += 'Y'
        if 'B' in trial_color:
            trial_key += 'B'
        if trial_key not in color_vids.keys():
            color_vids[trial_key] = []
        color_vids[trial_key].append(vid)

In [9]:
len_tot = 0
for key, value in color_vids.items():
    print(f'There are {len(value)} videos from {key} color pair')
    len_tot += len(value)
print('\n')
print(f'There are {len_tot} multi instance videos')

There are 132 videos from YB color pair
There are 18 videos from GB color pair
There are 103 videos from RG color pair
There are 19 videos from GY color pair
There are 32 videos from RB color pair
There are 46 videos from RY color pair
There are 10 videos from B color pair


There are 360 multi instance videos


# fills in missing vals

In [20]:
CHECK = False
actually_fill = False # will also have to change file open to "r+"
color_pair = 'GY'
max_rat = 70

In [21]:
start_time = time.time()

all_errors = np.empty((len(color_vids[color_pair]), 5))
all_vids = []

count = 0
for i, session in enumerate(multi_vids.keys()): 
    video_list = multi_vids[session]
    analysis_path = defaultdir + '/' + session + '/Tracking/h5/'
    
    for video in video_list:
        if not (int(session[-6:-4]) > 7 or int(session[-2:]) > 24): # to not get collar vids!!
            if video in color_vids[color_pair]:
                # open analysis file
                analysis_file = analysis_path + video[:-3] + 'predictions.h5'
                with h5py.File(analysis_file,'r') as f:
                    locations = f["tracks"][:].T 
        
                    # find FIRST nan values
                    intial = nan_vals(locations)
                    all_errors[count][0] = intial
        
                    # just to check you haven't already done this vid or it isn't empty
                    if intial != 0:
                        # take out positional outliers
                        for rat in range(locations.shape[-1]): # for each rat (not actually necessary, the dims work out without this loop but I don't feel like thinking abt that)
                            all_vels = {}
                            for node in range(locations.shape[1]): # for each node
                                # find the velocities
                                all_vels[node] = find_node_velocity(locations[:, node, :,  rat:rat+1])
                            
                                # replace outliers in locations with nan
                                nan_index = [i for i in range(len(all_vels[node])) if (all_vels[node][i] > max_rat)]
                                for index in nan_index:
                                    locations[index + 1, node, 0, rat], locations[index + 1, node, 0, rat] = np.nan, np.nan
            
                        # find SECOND nan values
                        all_errors[count][1] = nan_vals(locations)
                        
            
                        # fill in missing locations
                        # print(f'video name: {video}')
                        new_locations = fill_missing(locations)
                        if actually_fill:
                            f["tracks"][:] = new_locations.T
        
                        # finds THIRD nan values
                        all_errors[count][2] = nan_vals(new_locations)
    
                        # take out positional outliers again (to make sure we aren't filling in stupid stuff...?
                        all_vels = {}
                        for node in range(locations.shape[1]):
                            # find the velocities
                            all_vels[node] = find_node_velocity(new_locations[:, node, :])
                        
                            # replace outliers in locations with nan
                            nan_index = [i for i in range(len(all_vels[node])) if (all_vels[node][i] > max_rat)]
                            for index in nan_index:
                                new_locations[index + 1, node, 0], new_locations[index + 1, node, 0] = np.nan, np.nan
                       
                        # finds FOURTH nan values
                        all_errors[count][3] = nan_vals(new_locations)
                        all_vids.append(video)
                        count += 1
            
                        # if you want to check the nan/fill values for a each video
                        if False:
                            # print(f'video name: {video}')
                            print(f'intial nan: {round(intial, 2)} %, after out nan: {round(after_out, 2)} %, final nan: {round(after_fill, 2)} %, after check nan: {round(after_check, 2)} %')
print(f'time elapse: {time.time() - start_time}')

time elapse: 125.38346433639526


In [22]:
for i in range(4):
    x = all_errors[:, i]
    print('mean: ' + str(np.mean(x)) + '; median: ' + str(np.median(x)) + '; std: ' + str(np.std(x)))

mean: 39.61304251035633; median: 43.187177073669524; std: 6.356250125247971
mean: 41.49972094306441; median: 45.32077630981967; std: 6.386508025424418
mean: 0.0; median: 0.0; std: 0.0
mean: 5.534987673515754; median: 5.4626244275387; std: 1.1127932581568585


In [23]:
len(all_vids)

19

In [24]:
np.save(f'errors/{color_pair}/training_vids.npy', all_vids)
np.save(f'errors/{color_pair}/training_errors.npy', all_errors)


In [25]:
all_vids

['032924_COOPTRAIN_LARGEARENA_KL007G-KL007Y_Camera3.mp4',
 '033024_COOPTRAIN_LARGEARENA_KL007G-KL007Y_Camera3.mp4',
 '033124_COOPTRAIN_LARGEARENA_KL007G-KL007Y_Camera3.mp4',
 '040124_COOPTRAIN_LARGEARENA_KL007G-KL007Y_Camera3.mp4',
 '040224_COOPTRAIN_LARGEARENA_KL007G-KL007Y_Camera3.mp4',
 '040324_COOPTRAIN_LARGEARENA_KL007G-KL007Y_Camera3.mp4',
 '040424_COOPTRAIN_LARGEARENA_KL007G-KL007Y_Camera3.mp4',
 '040524_COOPTRAIN_LARGEARENA_KL007G-KL007Y_Camera3.mp4',
 '040824_COOPTRAIN_LARGEARENA_KL007G-KL007Y_Camera3.mp4',
 '070224_COOPTRAIN_LARGEARENA_HF006G-HF008Y_Camera1.mp4',
 '070324_COOPTRAIN_LARGEARENA_HF006G-HF008Y_Camera1.mp4',
 '070524_COOPTRAIN_LARGEARENA_HF006G-HF008Y_Camera1.mp4',
 '070824_COOPTRAIN_LARGEARENA_HF006G-HF008Y_Camera1.mp4',
 '070924_COOPTRAIN_LARGEARENA_HF006G-HF008Y_Camera3.mp4',
 '071024_COOPTRAIN_LARGEARENA_HF006G-HF008Y.txt_Camera3.mp4',
 '071124_COOPTRAIN_LARGEARENA_HF006G-HF008Y.txt_Camera3.mp4',
 '071224_COOPTRAIN_LARGEARENA_HF006G-HF008Y_Camera4.mp4',
 '0715

In [26]:
all_errors

array([[3.83183883e+001, 4.00854854e+001, 0.00000000e+000,
        5.55485812e+000, 1.13333704e-310],
       [3.07547475e+001, 3.21647708e+001, 0.00000000e+000,
        4.15582314e+000, 4.66433987e-310],
       [2.94110523e+001, 3.07021620e+001, 0.00000000e+000,
        4.02834843e+000, 1.13333704e-310],
       [3.34524053e+001, 3.51129405e+001, 0.00000000e+000,
        5.46262443e+000, 1.13333704e-310],
       [3.10691504e+001, 3.28946797e+001, 0.00000000e+000,
        5.82400975e+000, 1.13333704e-310],
       [3.38647953e+001, 3.62458644e+001, 0.00000000e+000,
        7.39959092e+000, 1.13333704e-310],
       [3.45574708e+001, 3.64400721e+001, 0.00000000e+000,
        5.59001840e+000, 1.13333704e-310],
       [3.28378145e+001, 3.48468118e+001, 0.00000000e+000,
        5.94856573e+000, 1.13333704e-310],
       [3.48077821e+001, 3.76995720e+001, 0.00000000e+000,
        8.87307940e+000, 1.13333704e-310],
       [4.51323593e+001, 4.70417705e+001, 0.00000000e+000,
        5.43145681e+000

# check our work... (abt 28 vids that I WOULDN'T trust!!)

In [15]:
x = 69 # len(color_vids[color_pair])

In [16]:
print(f'intial nan: {round(total_intial_nan / x, 2)} %, after out nan: {round(total_after_out_nan / x, 2)} %, final nan: {round(total_final_nan / x, 2)} %')


intial nan: 23.99 %, after out nan: 24.44 %, final nan: 0.0 %


In [13]:
print(f'percent of videos intitially have over 1/5 of values nan: {round(100 * len(bad_vids) / len(color_vids[color_pair]) ,2)}% ')

percent of videos intitially have over 1/5 of values nan: 21.21% 


In [18]:
# the videos in question :(
bad_vids

['032824_COOPTRAIN_LARGEARENA_KL002B-KL002Y_Camera2.mp4',
 '032824_COOPTRAIN_LARGEARENA_KL001B-KL001Y_Camera1.mp4',
 '040124_COOPTRAIN_LARGEARENA_KL001B-KL001Y_Camera1.mp4',
 '040324_COOPTRAIN_LARGEARENA_KL001B-KL001Y_Camera1.mp4',
 '040824_COOPTRAIN_LARGEARENA_KL005B-KL005Y_Camera1.mp4',
 '041024_COOPTRAIN_LARGEARENA_EB031B-EB033Y_Camera3.mp4',
 '041224_COOPTRAIN_LARGEARENA_EB009B-EB019Y_Camera4.mp4',
 '041324_COOPTRAIN_LARGEARENA_EB031B-EB033Y_Camera4.mp4',
 '041324_COOPTRAIN_LARGEARENA_EB009B-EB019Y_Camera3.mp4',
 '041424_COOPTRAIN_LARGEARENA_EB009B-EB019Y_Camera2.mp4',
 '041524_COOPTRAIN_LARGEARENA_EB009B-EB019Y_Camera2.mp4',
 '041624_COOPTRAIN_LARGEARENA_EB009B-EB019Y_Camera2.mp4',
 '042424_COOPTRAIN_LARGEARENA_EB003B-EB019Y_Camera2.mp4',
 '042524_COOPTRAIN_LARGEARENA_EB003B-EB019Y_Camera2.mp4',
 '061224_COOPTRAIN_LARGEARENA_HF003B-HF004Y_Camera2.mp4',
 '061824_COOPTRAIN_LARGEARENA_HF003B-HF004Y_Camera2.mp4',
 '062024_COOPTRAIN_LARGEARENA_HF003B-HF004Y_Camera2.mp4',
 '062424_COOPT