In [1]:
import os
os.chdir('../AiATrack')

from lib.test.evaluation import *
from collections import OrderedDict
import importlib
import cv2 as cv
import glob
import torch
import time
from tqdm import tqdm
from multiprocessing import Pool
!nvidia-smi

Thu Aug 29 08:42:21 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce GTX TITAN X     Off | 00000000:01:00.0 Off |                  N/A |
| 22%   44C    P8              13W / 250W |     16MiB / 12288MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce GTX TITAN X     Off | 00000000:02:00.0 Off |  

In [2]:
torch.cuda.empty_cache()

In [3]:
param_module = importlib.import_module('lib.test.parameter.aiatrack')
params = param_module.parameters('baseline')
tracker_module = importlib.import_module('lib.test.tracker.aiatrack')
tracker_class = tracker_module.get_tracker_class()

In [4]:
dataset = 'lasot'
# experiments with different datasets (each dataset has it's own config, leading to different results)
tracker = tracker_class(params, dataset, debug = True)

In [5]:
def _read_image(image_file: str):
    if isinstance(image_file, str):
        im = cv.imread(image_file)
        return cv.cvtColor(im, cv.COLOR_BGR2RGB)
    else:
        raise ValueError('ERROR: type of image_file should be str')

In [6]:
# all boxes are offset inside the BBox() class, ToDo: Look into this

threshold_val = 20
FOV = 90
seq_num = '0117'
frames = sorted(glob.glob(f'/mnt/data_f_500/aarsh/data/{seq_num}/image/*.jpg'))
init_bbox_0115 = [2354.8, 1157.0, 26, 46]
init_bbox_0016 = [3157.2672413793102, 464.0, 603, 447]
init_bbox_0117 = [848.655887230514, 1145.0,  887, 754]
init_bbox_0018 = [1980.09375, 966.0, 75, 48]
init_bbox_0027 = []


init_bbox_0006 = [1013.52, 939.0, 179, 21]
init_bbox_0035 = [3485.2474226804125, 786.0, 673, 860]
init_bbox_0007 = []
init_bbox_0076 = []
init_bbox_0088 = [2662.573529411765, 784.0, 101, 300] # complete/partial occlusion
init_bbox_0089 = [642.5692307692307, 986, 63, 106] # partial occlusion
init_bbox_0040 = []

init_bbox_0028 = []
init_bbox_0038 = [3478.679012345679, 1743.5, 411, 169]
init_bbox_0074 = [3135.246031746032, 1226.5, 207, 325] # hyperparam case (refer_cap)
init_bbox_0075 = [1559.2916666666667,  922.5, 28, 75] # hyperparam case (cache_siz)
init_bbox_0106 = [1623.0758293838862, 1049.0, 224, 405] # complete occlusion
init_bbox_0044 = [1341.179487179487,  960.0,  335, 204] # hyperparam case both
init_bbox_0019 = [2009.1060606060605, 1044.0, 160, 112]
init_bbox_0081 = [48.284615384615336, 981, 121, 181] #large distortion, hyperparam case
init_bbox_0094 = []

init_bbox_0050 = [1401.0285714285715, 1093.0, 151, 143]
init_info = {
    'init_bbox': init_bbox_0117
}

In [7]:
def get_search_crop(frames, init_info, seq_name = None, threshold = 20, FOV = 90, kernel_size = 500, apply_method = False):
    output = {'target_bbox': [],
                      'time': []}

    if tracker.params.save_all_boxes:
        output['all_boxes'] = list()
        output['all_scores'] = list()


    def _store_outputs(tracker_out: dict, defaults=None):
                defaults = dict() if defaults is None else defaults
                for key in output.keys():
                    val = tracker_out.get(key, defaults.get(key, None))
                    if key in tracker_out or val is not None:
                        output[key].append(val)

    # Initialize
    image = _read_image(frames[0])

    start_time = time.time()
    out = tracker.initialize(image, init_info, seq_name = seq_name)
    if out is None:
        out = dict()

    prev_output = OrderedDict(out)

    init_default = {'target_bbox': init_info.get('init_bbox'),
                    'time': time.time() - start_time}

    if tracker.params.save_all_boxes:
        init_default['all_boxes'] = out['all_boxes']
        init_default['all_scores'] = out['all_scores']

    _store_outputs(out, init_default)

    for frame_num, frame_path in enumerate(tqdm(frames[1:]), start=1):
        image = _read_image(frame_path)

        start_time = time.time()

        out = tracker.track(image, seq_name = seq_name)

        prev_output = OrderedDict(out)
        _store_outputs(out, {'time': time.time() - start_time})
    return output

In [8]:
output_dict = get_search_crop(frames, init_info, f'{seq_num}_{dataset}_tangent_{threshold_val}_{FOV}', threshold = threshold_val, FOV = FOV, apply_method = False)

  0%|          | 0/663 [00:00<?, ?it/s]

Using /home/aarsh/.cache/torch_extensions as PyTorch extensions root...
Detected CUDA files, patching ldflags
Emitting ninja build file /home/aarsh/.cache/torch_extensions/_prroi_pooling/build.ninja...
Building extension module _prroi_pooling...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module _prroi_pooling...


100%|██████████| 663/663 [03:52<00:00,  2.86it/s]


In [9]:
def read_image(image_path):
    return cv.imread(image_path)

def create_video_from_images(image_folder, output_video_path, fps):
    images = [img for img in os.listdir(image_folder) if img.endswith(".png") or img.endswith(".jpg")]
    images.sort()
    
    # Get dimensions of the images
    frame = cv.cvtColor(cv.imread(os.path.join(image_folder, images[0])), cv.COLOR_BGR2RGB)
    height, width, layers = frame.shape
    
    video = cv.VideoWriter(output_video_path, cv.VideoWriter_fourcc(*'mp4v'), fps, (width, height))
    
    image_paths = [os.path.join(image_folder, img) for img in images]
    
    # Use multiprocessing to read images in parallel
    with Pool() as pool:
        for image in tqdm(pool.imap(read_image, image_paths), total=len(images)):
            video.write(image)
    
    video.release()

image_folder = f'/mnt/data_f_500/aarsh/outputs/{seq_num}_{dataset}_tangent_{threshold_val}_{FOV}'
output_video_path = f'/mnt/data_f_500/aarsh/output_vids/{seq_num}_{dataset}_tangent_thresh_{threshold_val}_{FOV}.mp4'
fps = 15  # Frames per second

create_video_from_images(image_folder, output_video_path, fps)


100%|██████████| 663/663 [00:43<00:00, 15.32it/s]


In [10]:
# Open a text file in write mode
with open(f'../{seq_num}.txt', 'w') as file:
    for i, sublist in enumerate(output_dict['target_bbox']):
        # Convert each sublist to a string and join elements with a space (or any delimiter you prefer)
        line = ' '.join(map(str, sublist))
        # Write the line to the file
        file.write(line)
        # Add a newline character if it's not the last list
        if i < len(output_dict['target_bbox']) - 1:
            file.write('\n')

