SUMMARY: Will find all of the multi instance videos in the PairedTestingSessions directory that don't have a corresponding predictions file and then creates a job txt file that will save to the misha jobs folder

In [1]:
NUM_BATCH_JOBS = 'multi_sleap_track' 
mb_per_sec = 0.20447582936507938 # with partition=gpu, cpus-per-gpu=8, mem-per-cpu=10G, gpus=a40:1

In [2]:
import os
import random

In [3]:
defaultdir = '/gpfs/radev/pi/saxena/aj764'
rootdir = f'{defaultdir}/PairedTestingSessions/'

Compiles a list of all of the sessions in PairedTestingSessions that have a Videos folder in them

In [4]:
vid_subdirs = []
for subdir, dirs, files in os.walk(rootdir):
    if subdir.endswith("Videos"):
        vid_subdirs.append(subdir)
vid_subdirs = sorted(vid_subdirs)

Seperates all of the videos into single instance videos and multi instance videos (and takes out videos from before April).

In [5]:
single_vids = {}
multi_vids = {}
for vids in vid_subdirs:
    files = os.listdir(vids)
    cut_vids = vids[28:]
    single_vids[cut_vids] = []
    multi_vids[cut_vids] = []
    for file in files:
        if file.endswith('.mp4') and int(file[:2]) >= 4:
            KL_count = file.count('KL')
            EB_count = file.count('EB')
            HF_count = file.count('HF')
            if KL_count + EB_count + HF_count == 2:
                multi_vids[cut_vids].append(file)
            else:
                single_vids[cut_vids].append(file)

In [6]:
len_tot = 0
for key, value in single_vids.items():
    len_tot += len(value)
print(f'There are {len_tot} single instance videos')

There are 1106 single instance videos


In [7]:
len_tot = 0
for key, value in multi_vids.items():
    len_tot += len(value)
print(f'There are {len_tot} multi instance videos')

There are 905 multi instance videos


In [8]:
# split the multi-instance videos into their respective color pairs...
color_vids = {}
for key, value in multi_vids.items():
    for vid in value:
        trial_color = [vid[-12], vid[-5]]
        trial_key = ''
        if 'R' in trial_color:
            trial_key += 'R'
        if 'G' in trial_color:
            trial_key += 'G'
        if 'Y' in trial_color:
            trial_key += 'Y'
        if 'B' in trial_color:
            trial_key += 'B'
        if trial_key not in color_vids.keys():
            color_vids[trial_key] = []
        color_vids[trial_key].append(vid)

In [9]:
len_tot = 0
for key, value in color_vids.items():
    print(f'There are {len(value)} videos from {key} color pair')
    len_tot += len(value)
print('\n')
print(f'There are {len_tot} multi instance videos')

There are 128 videos from RY color pair
There are 169 videos from YB color pair
There are 168 videos from GB color pair
There are 176 videos from GY color pair
There are 99 videos from RB color pair
There are 165 videos from RG color pair


There are 905 multi instance videos


Creates a string composed of all of the necessary commands to run inference on all multi instance videos for a particular color that don't have a corresponding predictions file.

In [10]:
# start_command = f'module load miniconda; conda activate sleap; cd {defaultdir};'
# centroid_model = 'Tracking/DLC_Pairs/YellowBlue-V1-2024-07-23/SLEAP/models/250202_164327.centroid'
# topdown_model = 'Tracking/DLC_Pairs/YellowBlue-V1-2024-07-23/SLEAP/models/250202_164327.multi_class_topdown'
# color_pair = 'YB' # chose from one of these pairs ['RY', 'YB', 'GB', 'GY', 'RB', 'RG']

In [11]:
# start_command = f'module load miniconda; conda activate sleap; cd {defaultdir};'
# centroid_model = 'Nina_Model_Testing/Red-Yellow/models/250225_122400.centroid'
# topdown_model = 'Nina_Model_Testing/Red-Yellow/models/250225_122400.multi_class_topdown'
# color_pair = 'RY' # chose from one of these pairs ['RY', 'YB', 'GB', 'GY', 'RB', 'RG']

In [10]:
# start_command = f'module load miniconda; conda activate sleap; cd {defaultdir};'
# centroid_model = 'Nina_Model_Testing/Red-Green/models/250220_172128.centroid'
# topdown_model = 'Nina_Model_Testing/Red-Green/models/250220_172128.multi_class_topdown'
# color_pair = 'RG' # chose from one of these pairs ['RY', 'YB', 'GB', 'GY', 'RB', 'RG']

In [14]:
# start_command = f'module load miniconda; conda activate sleap; cd {defaultdir};'
# centroid_model = 'Nina_Model_Testing/Red-Blue/models/250309_231434.centroid'
# topdown_model = 'Nina_Model_Testing/Red-Blue/models/250309_231434.multi_class_topdown'
# color_pair = 'RB' # chose from one of these pairs ['RY', 'YB', 'GB', 'GY', 'RB', 'RG']

In [18]:
start_command = f'module load miniconda; conda activate sleap; cd {defaultdir};'
centroid_model = 'Nina_Model_Testing/Green-Yellow/models/250309_231459.centroid'
topdown_model = 'Nina_Model_Testing/Green-Yellow/models/250309_231459.multi_class_topdown'
color_pair = 'GY' # chose from one of these pairs ['RY', 'YB', 'GB', 'GY', 'RB', 'RG']

In [19]:
command_lines = ''
for i, session in enumerate(multi_vids.keys()): 
    video_list = multi_vids[session]
    output_path = defaultdir + '/' + session[:-6] + 'Tracking'

    # makes directory for tracking output if not already made
    if not os.path.isdir(output_path):
        os.mkdir(output_path)
    if not os.path.isdir(output_path + '/slp'):
        os.mkdir(output_path + '/slp')
    if not os.path.isdir(output_path + '/h5'):
        os.mkdir(output_path + '/h5')

    # adds command for every video that doesn't have a corresponding prediction
    for video in video_list:
        if video in color_vids[color_pair]:
            video_path = f'{session}/{video}'
            output_file = video[:-3] + 'predictions.'
            if not os.path.isfile(output_path + '/slp/' + output_file + 'slp'):
                track_command = f'sleap-track "{video_path}" --first-gpu -o "{output_path + '/slp/' + output_file + 'slp'}" -m "{centroid_model}" -m "{topdown_model}"'
                convert_command = f'; sleap-convert --format analysis -o "{output_path + '/h5/' + output_file + 'h5'}" "{output_path + '/slp/' + output_file + 'slp'}"'
                command_lines += (start_command + track_command + convert_command + '\n')

Saves the commands to a txt file in the job directory.

In [20]:
jobdir = '/gpfs/radev/project/saxena/aj764/ood/projects/default/'

In [21]:
commanddir = jobdir + str(NUM_BATCH_JOBS)
if not os.path.isdir(commanddir): 
    os.mkdir(commanddir)
with open(f"{commanddir}/{color_pair}_vids_job.txt", "w") as file:
    file.write(command_lines) 

Keeps track of which videos have predictions and which don't

In [11]:
done_vids = []
done_vids_size = []
not_done_vids = []
not_done_vids_size = []
for i, session in enumerate(multi_vids.keys()):
    video_list = multi_vids[session]
    output_path = defaultdir + '/' + session[:-6] + 'Tracking'
        
    for video in video_list:
        video_path = f'{session}/{video}'
        output_file = video[:-3] + 'predictions.'
        if os.path.isfile(output_path + '/slp/' + output_file + 'slp') and os.path.isfile(output_path + '/h5/' + output_file + 'h5'):
            done_vids.append(session + '/' + video)
            done_vids_size.append((os.path.getsize(defaultdir + '/' + session + '/' + video)) / 10 ** 6)
        else:
            not_done_vids.append(session + '/' + video)
            not_done_vids_size.append((os.path.getsize(defaultdir + '/' + session + '/' + video)) / 10 ** 6)
num_done = len(done_vids)
num_not_done = len(not_done_vids)
time_done = sum(done_vids_size) / mb_per_sec
time_not_done = sum(not_done_vids_size) / mb_per_sec

In [12]:
def seconds(secs):
    days = secs//86400
    hours = (secs - days*86400)//3600
    minutes = (secs - days*86400 - hours*3600)//60
    seconds = secs - days*86400 - hours*3600 - minutes*60
    result = ("{0} day{1}, ".format(days, "s" if days!=1 else "") if days else "") + \
    ("{0} hour{1}, ".format(hours, "s" if hours!=1 else "") if hours else "") + \
    ("{0} minute{1}, ".format(minutes, "s" if minutes!=1 else "") if minutes else "") + \
    ("{0} second{1}, ".format(seconds, "s" if seconds!=1 else "") if seconds else "")
    return result[:-2]

In [13]:
print(f'{round((len(done_vids) / (len(not_done_vids)+len(done_vids))) * 100, 2)}% of videos have been tracked ({len(done_vids)} tracked videos, {len(not_done_vids)} untracked videos)')   
print(f'compute time of tracked videos: {seconds(time_done)}')
print(f'compute time of untracked videos: {seconds(time_not_done)}')
print(f'realish time of tracked videos: {seconds(time_done / 4)}')
print(f'realish time of untracked videos: {seconds(time_not_done / 4)}')

81.22% of videos have been tracked (735 tracked videos, 170 untracked videos)
compute time of tracked videos: 11.0 days, 22.0 hours, 50.0 minutes, 11.116955124191009 seconds
compute time of untracked videos: 2.0 days, 13.0 hours, 23.0 minutes, 56.786740715557244 seconds
realish time of tracked videos: 2.0 days, 23.0 hours, 42.0 minutes, 32.77923878104775 seconds
realish time of untracked videos: 15.0 hours, 20.0 minutes, 59.19668517888931 seconds


In [14]:
737 - len(done_vids)

2

In [15]:
new_done_vids = []
for done in done_vids:
    new_done = done.split('/')
    new_done_vids.append(new_done[-1])

In [16]:
len_tot = 0
for key, value in color_vids.items():
    if key in ['YB', 'RY', 'RG', 'GY', 'RB']:
        print(f'There are {len(value)} videos from {key} color pair')
        for vid in value:
            if vid not in new_done_vids:
                print(vid)
        len_tot += len(value)
print('\n')
print(f'There are {len_tot} multi instance videos')
# these videos are completely empty video!! as in NO rats in these videos! LOL

There are 128 videos from RY color pair
There are 169 videos from YB color pair
There are 176 videos from GY color pair
091924_Cam2_TrNum13_Ineq_KL005Y-KL005G.mp4
There are 99 videos from RB color pair
There are 165 videos from RG color pair
052324_Cam1_TrNum8_Ineq_EB001R-EB002G.mp4


There are 737 multi instance videos


In [181]:
128 + 169 + 176 + 99 + 165, (128 + 169 + 176 + 99 + 165) / 905 * 100 # this is what we will get to when everything is doen running today...

(737, 81.43646408839778)