SUMMARY: Will find all of the single videos in the PairedTestingSessions directory that don't have a corresponding predictions file and then creates a job txt file that will save to the misha jobs folder

In [5]:
NUM_BATCH_JOBS = 142 # choose number on larger than last folder in home/project/ood/projects/default/
mb_per_sec = 0.20447582936507938 # with partition=gpu, cpus-per-gpu=8, mem-per-cpu=10G, gpus=a40:1

In [6]:
import os
import random

In [7]:
defaultdir = '/gpfs/radev/pi/saxena/aj764'
rootdir = f'{defaultdir}/PairedTestingSessions/'

Compiles a list of all of the sessions in PairedTestingSessions that have a Videos folder in them

In [8]:
vid_subdirs = []
for subdir, dirs, files in os.walk(rootdir):
    if subdir.endswith("Videos"):
        vid_subdirs.append(subdir)
vid_subdirs = sorted(vid_subdirs)

Seperates all of the videos into single instance videos and multi instance videos (and takes out videos from before April).

In [9]:
single_vids = {}
multi_vids = {}
for vids in vid_subdirs:
    files = os.listdir(vids)
    cut_vids = vids[28:]
    single_vids[cut_vids] = []
    multi_vids[cut_vids] = []
    for file in files:
        if file.endswith('.mp4') and int(file[:2]) >= 4:
            KL_count = file.count('KL')
            EB_count = file.count('EB')
            HF_count = file.count('HF')
            if KL_count + EB_count + HF_count == 2:
                multi_vids[cut_vids].append(file)
            else:
                single_vids[cut_vids].append(file)

In [10]:
len_tot = 0
for key, value in single_vids.items():
    len_tot += len(value)
print(f'There are {len_tot} single instance videos')

There are 1106 single instance videos


In [11]:
len_tot = 0
for key, value in multi_vids.items():
    len_tot += len(value)
print(f'There are {len_tot} multi instance videos')

There are 905 multi instance videos


Creates a string composed of all of the necessary commands to run inference on all single instance videos that don't have a corresponding predictions file.

In [9]:
start_command = f'module load miniconda; conda activate sleap; cd {defaultdir};'
model = 'Tracking/SLEAP/models/240808_075503.single_instance.n=720'

In [13]:
command_lines = ''
for i, session in enumerate(single_vids.keys()): 
    video_list = single_vids[session]
    output_path = defaultdir + '/' + session[:-6] + 'Tracking'

    # makes directory for tracking output if not already made
    if not os.path.isdir(output_path):
        os.mkdir(output_path)
    if not os.path.isdir(output_path + '/slp'):
        os.mkdir(output_path + '/slp')
    if not os.path.isdir(output_path + '/h5'):
        os.mkdir(output_path + '/h5')

    # adds command for every video that doesn't have a corresponding prediction
    for video in video_list:
        video_path = f'{session}/{video}'
        output_file = video[:-3] + 'predictions.'
        if not os.path.isfile(output_path + '/slp/' + output_file + 'slp'):
            track_command = f'sleap-track "{video_path}" --first-gpu -o "{output_path + '/slp/' + output_file + 'slp'}" -m "{model}"/'
            convert_command = f'; sleap-convert --format analysis -o "{output_path + '/h5/' + output_file + 'h5'}" "{output_path + '/slp/' + output_file + 'slp'}"'
            command_lines += (start_command + track_command + convert_command + '\n')

Saves the commands to a txt file in the job directory.

In [14]:
jobdir = '/gpfs/radev/project/saxena/aj764/ood/projects/default/'

In [15]:
commanddir = jobdir + str(NUM_BATCH_JOBS)
os.mkdir(commanddir)
with open(f"{commanddir}/all_vids_job.txt", "w") as file:
    file.write(command_lines) 

Keeps track of which videos have predictions and which don't

In [12]:
done_vids = []
done_vids_size = []
not_done_vids = []
not_done_vids_size = []
for i, session in enumerate(single_vids.keys()):
    video_list = single_vids[session]
    output_path = defaultdir + '/' + session[:-6] + 'Tracking'
        
    for video in video_list:
        video_path = f'{session}/{video}'
        output_file = video[:-3] + 'predictions.'
        if os.path.isfile(output_path + '/slp/' + output_file + 'slp') and os.path.isfile(output_path + '/h5/' + output_file + 'h5'):
            done_vids.append(session + '/' + video)
            done_vids_size.append((os.path.getsize(defaultdir + '/' + session + '/' + video)) / 10 ** 6)
        else:
            not_done_vids.append(session + '/' + video)
            not_done_vids_size.append((os.path.getsize(defaultdir + '/' + session + '/' + video)) / 10 ** 6)
num_done = len(done_vids)
num_not_done = len(not_done_vids)
time_done = sum(done_vids_size) / mb_per_sec
time_not_done = sum(not_done_vids_size) / mb_per_sec

In [13]:
def seconds(secs):
    days = secs//86400
    hours = (secs - days*86400)//3600
    minutes = (secs - days*86400 - hours*3600)//60
    seconds = secs - days*86400 - hours*3600 - minutes*60
    result = ("{0} day{1}, ".format(days, "s" if days!=1 else "") if days else "") + \
    ("{0} hour{1}, ".format(hours, "s" if hours!=1 else "") if hours else "") + \
    ("{0} minute{1}, ".format(minutes, "s" if minutes!=1 else "") if minutes else "") + \
    ("{0} second{1}, ".format(seconds, "s" if seconds!=1 else "") if seconds else "")
    return result[:-2]

In [16]:
print(f'{round((len(done_vids) / (len(not_done_vids)+1)) * 100, 2)}% of videos have been tracked ({len(done_vids)} tracked videos, {len(not_done_vids)} untracked videos)')   
print(f'compute time of tracked videos: {seconds(time_done)}')
print(f'compute time of untracked videos: {seconds(time_not_done)}')
print(f'realish time of tracked videos: {seconds(time_done / 4)}')
print(f'realish time of untracked videos: {seconds(time_not_done / 4)}')

110600.0% of videos have been tracked (1106 tracked videos, 0 untracked videos)
compute time of tracked videos: 7.0 days, 5.0 hours, 57.0 minutes, 57.87173983710818 seconds
compute time of untracked videos: 
realish time of tracked videos: 1.0 day, 19.0 hours, 29.0 minutes, 29.467934959277045 seconds
realish time of untracked videos: 


In [17]:
random.choice(done_vids)

'PairedTestingSessions/091724_FiberPho_KL004G-KL002_TimeOut/Videos/091724_Cam2_TrNum7_IS_KL004G.mp4'