# Sample example

In [1]:
from absl import flags
import sys

import time # 프레임 당 시간 계산하기위해서 필요함
import numpy as np
import cv2
import matplotlib.pyplot as plt

import tensorflow as tf
from yolov3_tf2.models import YoloV3
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import convert_boxes

from deep_sort import preprocessing # NMS
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from deep_sort.tracking_utils import match_detections_with_tracks
from tools import generate_detections as gdet # feature generation


class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()] # [car, person, ... ]
yolo = YoloV3(classes=len(class_names))
yolo.load_weights('./weights/yolov3.tf')

max_cosine_distance = 0.5 # 0.5보다 클 경우 유사하다는 의미
nn_budget = None
nms_max_overlap = 0.8

# class 선언
model_filename = 'model_data/mars-small128.pb'
encoder = gdet.create_box_encoder(model_filename, batch_size=1)
metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget)
tracker = Tracker(metric)

# frame 단위로 video capture
#vid = cv2.VideoCapture('./data/video/test.mp4')

codec = cv2.VideoWriter_fourcc(*'XVID')
vid_fps = int(vid.get(cv2.CAP_PROP_FPS)) # CAP_PROP_FPS는 float을 반환해서 int로 바꿔줌 
vid_width, vid_height = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
out = cv2.VideoWriter('./data/video/results_deepsort_sample_data.avi', codec, vid_fps, (vid_width, vid_height))

from _collections import deque
pts = [deque(maxlen=30) for _ in range(1000)]

counter = []

while True:
    _, img = vid.read() # img : ndarray (height, width,channel) 한장씩 받아옴
    if img is None:
        print('Completed')
        break

    img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_in = tf.expand_dims(img_in, 0) # detector에 넣어주기 위해서 (1,height, width, channel)로 만들어줌 >> tf.Tensor로 바뀜
    img_in = transform_images(img_in, 416)  # 1. image resize >> 416으로, 2. 픽셀값 255로 나눠줌. 

    t1 = time.time()
    
    '''
    numpy array로 받음
    bounding box 갯수 : 100개
    boxes, 3D shape (1, 100, 4) ; 4 : l,t,r,b
    scores, 2D shape (1, 100) : confidence score
    classes, 2D shape(1, 100) : box에 있는 물체의 class 번호
    nums, 1D shape(1,) : 감지된 물체의 총 수
    '''
    boxes, scores, classes, nums = yolo.predict(img_in)
    
    classes = classes[0]
    names = []
    for i in range(len(classes)):
        names.append(class_names[int(classes[i])])
    names = np.array(names)
    converted_boxes = convert_boxes(img, boxes[0]) # box shape : x_min, y_min, w, h로 바뀜
    features = encoder(img, converted_boxes)

    # 박스정보 : left, top, width, height
    detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
                  zip(converted_boxes, scores[0], names, features)]

    boxs = np.array([d.tlwh for d in detections])
    scores = np.array([d.confidence for d in detections])
    classes = np.array([d.class_name for d in detections])
    indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
    detections = [detections[i] for i in indices]

    tracker.predict() # 칼만 필터로 예측
    tracker.update(detections)

    cmap = plt.get_cmap('tab20b') # 숫자를 색상에 매핑
    colors = [cmap(i)[:3] for i in np.linspace(0,1,20)] # 20개 색상 생성

    current_count = int(0)

    for track in tracker.tracks:
        # update하지 않음
        if not track.is_confirmed() or track.time_since_update > 1:
            continue

        bbox = track.to_tlbr() # cv2 출력에 사용됨 min_x, min_y, max_x, max_y
        class_name = track.get_class()
        color = colors[int(track.track_id) % len(colors)]
        color = [i*255 for i in color]

        cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]),int(bbox[3])), color, 2) # lt, rb
        cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)
                    +len(str(track.track_id)))*17,int(bbox[1])), color, -1) # id box
        cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75,
                    (255,255,255), 2)
        
        '''
        center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2)) # x,y center 좌표
        pts[track.track_id].append(center)

        # 중심점 진행방향 line 그리기
        for j in range(1, len(pts[track.track_id])):
            if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None:
                continue
            thickness = int(np.sqrt(64/float(j+1))*2)
            cv2.line(img, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness)
            
        height, width, _ = img.shape
        cv2.line(img, (0, int(3*height/6)), (width, int(3*height/6)), (0, 255, 0), thickness=2)
        #cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2)
        #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2)

        center_y = int(((bbox[1])+(bbox[3]))/2)

        if center_y <= int(3*height/6+height/30) and center_y >= int(3*height/6-height/30):
            if class_name == 'car' or class_name == 'truck':
                counter.append(int(track.track_id))
                current_count += 1

    total_count = len(set(counter))

    cv2.putText(img, 'Total Vehicle Count: ' + str(total_count), (0,130), 0, 1, (0,0,255), 2)
    cv2.putText(img, 'Current Vehicle Count: ' + str(current_count), (0,80), 0, 1, (0,0,255), 2)
    '''
    
    fps  = 1./(time.time()-t1)
    cv2.putText(img, "FPS: {:.2f}".format(fps), (0,30), 0, 1, (0,0,255), 2)
    cv2.namedWindow("output", cv2.WINDOW_NORMAL)
    cv2.resizeWindow('output', 1024, 768)
    cv2.imshow('output', img)
    out.write(img)

    if cv2.waitKey(1) == ord('q'): # 휴식
        break
vid.release()
out.release()
cv2.destroyAllWindows()




Completed


## VisDrone

In [1]:
def boxes_minmax_to_xywh(boxs):
    returned_boxes = []
    for box in boxs:
        box[2] = (box[2]-box[0])
        box[3] = (box[3]-box[1])
        box = box.astype('int').tolist()
        if box != [0,0,0,0]:
            returned_boxes.append(box)
    return returned_boxes

In [2]:
def transform_images(x_train, size):
    x_train = tf.image.resize(x_train, (size, size))
    x_train = x_train / 255
    return x_train

In [1]:
from absl import flags
import sys

import time # 프레임 당 시간 계산하기위해서 필요함
import numpy as np
import cv2
import matplotlib.pyplot as plt
import glob
import pandas as pd
import os
import pickle

import tensorflow as tf
from yolov3_tf2.models import YoloV3
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import convert_boxes

from deep_sort import preprocessing # NMS
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from deep_sort.tracking_utils import match_detections_with_tracks
from tools import generate_detections as gdet # feature generation

In [None]:
# 이미지 파일 사용 시
from absl import flags
import sys

import time # 프레임 당 시간 계산하기위해서 필요함
import numpy as np
import cv2
import matplotlib.pyplot as plt
import glob
import pandas as pd
import os
import pickle

import tensorflow as tf
from yolov3_tf2.models import YoloV3
from yolov3_tf2.dataset import transform_images
from yolov3_tf2.utils import convert_boxes

from deep_sort import preprocessing # NMS
from deep_sort import nn_matching
from deep_sort.detection import Detection
from deep_sort.tracker import Tracker
from deep_sort.tracking_utils import match_detections_with_tracks
from tools import generate_detections as gdet # feature generation

#class_names = [c.strip() for c in open('./data/labels/coco.names').readlines()] # [car, person, ... ]
class_names = {0 : 'people',
               1 : 'motor',
               2 : 'car'
              }
#yolo = YoloV3(classes=len(class_names))
#yolo.load_weights('./weights/yolov3.tf')

max_cosine_distance = 0.5 
nn_budget = None
nms_max_overlap = 0.8

# class 선언
model_filename = 'model_data/mars-small128.pb'
encoder = gdet.create_box_encoder(model_filename, batch_size=1)
metric = nn_matching.NearestNeighborDistanceMetric('cosine', max_cosine_distance, nn_budget)
tracker = Tracker(metric)

data_path='C:/Users/Son/Documents/aiffel/SIA-MOT/VisDrone2019-MOT-test/'
test_dir_names = glob.glob(data_path+"sequences/*")
model_name = 'baseline_yolov3_33'

#model_names = ['baseline_yolov4_33','yolo4_33_aug','yolo4_all','yolo4_all2','yolov4_33_608','yolov4_33_anchor150','yolov4_33_focalx']
model_names = ['yolov3_all','yolov3_all2','yolov4_33_ciou']
for model_name in model_names:  
    # metric input 저장소
    save_dict_path = './metric_input/'+model_name+'/'

    if os.path.isdir(save_dict_path):
            pass
    else:
        os.makedirs(save_dict_path)

    for seq_full_name in test_dir_names:
        seq_name = seq_full_name.split('\\')[-1]

        images_filepaths = sorted(glob.glob(seq_full_name+"/*"))

        pred_detection = pd.read_csv(data_path+model_name+'_pred/'+
                                     seq_name+'_pred.txt',
                                 sep = ",",
                                names = ['image_index', 'xmin','ymin', 'xmax','ymax', 'conf','class'])

        a = cv2.imread(images_filepaths[1])
        a_height, a_width = a.shape[0], a.shape[1]
        codec = cv2.VideoWriter_fourcc(*'XVID')

        if os.path.isdir('./data/video/'+model_name+'_result'):
            pass
        else:
            os.makedirs('./data/video/'+model_name+'_result')

        out = cv2.VideoWriter('./data/video/'+model_name+'_result/'+seq_name+'.mp4', codec, 30, (a_width, a_height))

        seq_time = time.time()
        detections_dict = {}
        for frame_id, img_path in enumerate(images_filepaths):
            if img_path is None:
                print('Completed')
                break
            img = cv2.imread(img_path)
            img_in = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_in = tf.expand_dims(img_in, 0) # detector에 넣어주기 위해서 (1,height, width, channel)로 만들어줌 >> tf.Tensor로 바뀜
            img_in = transform_images(img_in, 416)  # 1. image resize >> 416으로, 2. 픽셀값 255로 나눠줌. 

            t1 = time.time()

            '''
            numpy array로 받음
            bounding box 갯수 : 100개
            boxes, 3D shape (1, 100, 4) ; 4 : l,t,r,b
            scores, 2D shape (1, 100) : confidence score
            classes, 2D shape(1, 100) : box에 있는 물체의 class 번호
            nums, 1D shape(1,) : 감지된 물체의 총 수
            '''
            ''' model 사용시
            boxes, scores, classes, nums = yolo.predict(img_in)
            classes = classes[0]
            converted_boxes = convert_boxes(img, boxes[0]) # box shape : x_min, y_min, w, h로 바뀜
            '''
            yolo_pred = pred_detection[pred_detection['image_index']==frame_id]
            boxes = yolo_pred.iloc[:,1:5].to_numpy()
            scores = yolo_pred['conf'].to_numpy()
            classes = yolo_pred['class'].to_numpy()
            nums=len(yolo_pred['image_index']== frame_id)

            converted_boxes = boxes_minmax_to_xywh(boxes)

            names = []
            for i in range(len(classes)):
                names.append(class_names[int(classes[i])])
            names = np.array(names)
            features = encoder(img, converted_boxes)

            # 박스정보 : left, top, width, height
            detections = [Detection(bbox, score, class_name, feature) for bbox, score, class_name, feature in
                          zip(converted_boxes, scores, names, features)]

            boxs = np.array([d.tlwh for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.class_name for d in detections])
            indices = preprocessing.non_max_suppression(boxs, classes, nms_max_overlap, scores)
            detections = [detections[i] for i in indices]

            boxs = np.array([d.to_xyxy() for d in detections])
            scores = np.array([d.confidence for d in detections])
            classes = np.array([d.class_name for d in detections])


            tracker.predict() # 칼만 필터로 예측
            tracker.update(detections)

            # 현재 detection box와 현재추적중인 track사이의 IoU matching
            # detections에 track_id 넣어주기 위해 matching 시키는 것으로 추정
            track_id = match_detections_with_tracks(boxs=boxs, tracks=tracker.tracks)
            for i,detection in enumerate(detections):
                detection.add_track_id(track_id[i])

            mask = np.array([detection.track_id is not None for detection in detections], dtype=bool)
            classes = classes[mask]
            detections = list(np.array(detections)[mask])


            detections_dict[frame_id+1]= detections

            cmap = plt.get_cmap('tab20b') # 숫자를 색상에 매핑
            colors = [cmap(i)[:3] for i in np.linspace(0,1,20)] # 20개 색상 생성

            current_count = int(0)

            for track in tracker.tracks:
                if not track.is_confirmed() or track.time_since_update > 1:
                    continue

                bbox = track.to_tlbr() # cv2 출력에 사용됨 min_x, min_y, max_x, max_y
                class_name = track.get_class()
                color = colors[int(track.track_id) % len(colors)]
                color = [i*255 for i in color]

                cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]),int(bbox[3])), color, 2) # lt, rb
                cv2.rectangle(img, (int(bbox[0]), int(bbox[1]-30)), (int(bbox[0])+(len(class_name)
                            +len(str(track.track_id)))*17,int(bbox[1])), color, -1) # id box
                cv2.putText(img, class_name+"-"+str(track.track_id), (int(bbox[0]), int(bbox[1]-10)), 0, 0.75,
                            (255,255,255), 2)

                '''
                center = (int(((bbox[0]) + (bbox[2]))/2), int(((bbox[1])+(bbox[3]))/2)) # x,y center 좌표
                pts[track.track_id].append(center)

                # 중심점 진행방향 line 그리기
                for j in range(1, len(pts[track.track_id])):
                    if pts[track.track_id][j-1] is None or pts[track.track_id][j] is None:
                        continue
                    thickness = int(np.sqrt(64/float(j+1))*2)
                    cv2.line(img, (pts[track.track_id][j-1]), (pts[track.track_id][j]), color, thickness)

                height, width, _ = img.shape
                cv2.line(img, (0, int(3*height/6)), (width, int(3*height/6)), (0, 255, 0), thickness=2)
                #cv2.line(img, (0, int(3*height/6+height/20)), (width, int(3*height/6+height/20)), (0, 255, 0), thickness=2)
                #cv2.line(img, (0, int(3*height/6-height/20)), (width, int(3*height/6-height/20)), (0, 255, 0), thickness=2)

                center_y = int(((bbox[1])+(bbox[3]))/2)

                if center_y <= int(3*height/6+height/30) and center_y >= int(3*height/6-height/30):
                    if class_name == 'car' or class_name == 'truck':
                        counter.append(int(track.track_id))
                        current_count += 1

            total_count = len(set(counter))

            cv2.putText(img, 'Total Vehicle Count: ' + str(total_count), (0,130), 0, 1, (0,0,255), 2)
            cv2.putText(img, 'Current Vehicle Count: ' + str(current_count), (0,80), 0, 1, (0,0,255), 2)
            '''

            fps  = 1./(time.time()-t1)
            cv2.putText(img, "FPS: {:.2f}".format(fps), (0,30), 0, 1, (0,0,255), 2)
            cv2.namedWindow("output", cv2.WINDOW_NORMAL)
            cv2.resizeWindow('output', a_width, a_height)
            cv2.imshow('output', img)
            out.write(img)

            if cv2.waitKey(1) == ord('q'): # 휴식
                break

        out.release()
        cv2.destroyAllWindows()

        with open(save_dict_path+seq_name+'.pickle','wb') as fw:
            pickle.dump(detections_dict, fw)
        tracker.end_sequence()

        time_taken = time.time() - seq_time
        print(seq_name+' is completed\ntime taken : '+str(time_taken))
        print('-----------------\n')
    print(model_name+' is completed\n\n')
    print('----------------')

uav0000009_03358_v is completed
time taken : 300.0659945011139
-----------------

uav0000073_00600_v is completed
time taken : 465.5519106388092
-----------------

uav0000073_04464_v is completed
time taken : 843.111442565918
-----------------

uav0000077_00720_v is completed
time taken : 878.3771045207977
-----------------

uav0000088_00290_v is completed
time taken : 796.1248342990875
-----------------

uav0000119_02301_v is completed
time taken : 179.26998734474182
-----------------

uav0000120_04775_v is completed
time taken : 1973.8451886177063
-----------------

uav0000161_00000_v is completed
time taken : 879.3960831165314
-----------------

uav0000188_00000_v is completed
time taken : 360.7142906188965
-----------------



## Metric

In [13]:
from metric.io import read_results, unzip_objs

import os
import numpy as np
import copy
import motmetrics as mm
mm.lap.default_solver = 'lap'

'''
data_root = "C:/Users/Son/Documents/aiffel/SIA-MOT/VisDrone2019-MOT-test/VisDrone2019-MOT-test/annotations"
seq_name = 'uav0000009_03358_v'
data_type = 'mot'

gt_filename = os.path.join(data_root, seq_name + '.txt')

# {frame_num : (tlwh, target_id, score)}
gt_frame_dict = read_results(gt_filename, data_type, is_gt=True)
gt_ignore_frame_dict = read_results(gt_filename, data_type, is_ignore=True)
'''

'\ndata_root = "C:/Users/Son/Documents/aiffel/SIA-MOT/VisDrone2019-MOT-test/VisDrone2019-MOT-test/annotations"\nseq_name = \'uav0000009_03358_v\'\ndata_type = \'mot\'\n\ngt_filename = os.path.join(data_root, seq_name + \'.txt\')\n\n# {frame_num : (tlwh, target_id, score)}\ngt_frame_dict = read_results(gt_filename, data_type, is_gt=True)\ngt_ignore_frame_dict = read_results(gt_filename, data_type, is_ignore=True)\n'

In [20]:
import pandas
import numpy

gt_frame = pd.read_csv("C:/Users/Son/Documents/aiffel/SIA-MOT/VisDrone2019-MOT-test/data/test_dataframe.csv")
def class_change(value):
    '''
    1:0,#"pedestrian"
    2:0,#"people"
    3:1,#"bicycle"
    4:2,#"car"
    5:2,#"van"
    6:2,#"truck"
    7:1,#"tricycle"
    8:1,#"awning-tricycle"
    9:2,# "bus"
    10:1,#"motor"
    '''
    if value == 0 :
        return None
    if value in [1,2]:
        return 0
    if value in [3,7,8,10]:
        return 1
    if value in [4,5,6,9]:
        return 2

gt_frame['class'] = gt_frame['class'].apply(class_change)
gt_frame = gt_frame[gt_frame['class']!= None]
gt_frame['x_max'] = gt_frame['x_max'].values - gt_frame['x_min'].values
gt_frame['y_max'] = gt_frame['y_max'].values - gt_frame['y_min'].values
gt_frame.rename(columns={'x_max': 'w', 
                           'y_max': 'h'}, inplace=True)
#gt_frame['target_id'] = gt_frame['target_id'] + 1
gt_frame

Unnamed: 0,video_name,frame_index,target_id,x_min,y_min,w,h,score,class,truncation,occlusion,height,width
0,uav0000009_03358_v,98,0,808,1,47,22,1,2.0,0,0,765,1360
1,uav0000009_03358_v,99,0,808,55,46,21,1,2.0,0,0,765,1360
2,uav0000009_03358_v,100,0,806,112,47,23,1,2.0,0,0,765,1360
3,uav0000009_03358_v,101,0,802,122,47,22,1,2.0,0,0,765,1360
4,uav0000009_03358_v,102,0,800,122,47,22,1,2.0,0,0,765,1360
...,...,...,...,...,...,...,...,...,...,...,...,...,...
333695,uav0000370_00001_v,1,29,2298,1132,417,291,1,2.0,1,1,1530,2720
333696,uav0000370_00001_v,2,29,2332,1137,384,290,1,2.0,1,1,1530,2720
333697,uav0000370_00001_v,3,29,2366,1142,351,290,1,2.0,1,1,1530,2720
333698,uav0000370_00001_v,4,29,2400,1147,318,290,1,2.0,1,1,1530,2720


In [19]:
gt_frame['target_id'].min()

1

In [21]:
from cython_bbox import bbox_overlaps as bbox_ious

def ious(atlwhs, btlwhs):
    """
    Compute cost based on IoU
    :type atlbrs: list[tlbr] | np.ndarray
    :type atlbrs: list[tlbr] | np.ndarray

    :rtype ious np.ndarray
    """
    atlbrs = np.concatenate([atlwhs[:,:2], atlwhs[:,2:] + atlwhs[:,:2]], axis=1)
    btlbrs = np.concatenate([btlwhs[:,:2], btlwhs[:,2:] + btlwhs[:,:2]], axis=1)
    
    ious = np.zeros([atlbrs.shape[0], btlbrs.shape[0]], dtype=np.float)
    if ious.size == 0:
        return ious
    
    trk_tlwhs_list = [i for i in trk_tlwhs]
    gt_tlwhs_list = [i for i in gt_tlwhs]
    
    ious = bbox_ious(
        np.ascontiguousarray(atlbrs, dtype=np.float),
        np.ascontiguousarray(btlbrs, dtype=np.float)
    )

    return ious

In [23]:
def get_summary(accs, names, metrics=('mota', 'num_switches', 'idp', 'idr', 'idf1', 'precision', 'recall')):
    names = copy.deepcopy(names)
    if metrics is None:
        metrics = mm.metrics.motchallenge_metrics
    metrics = copy.deepcopy(metrics)

    mh = mm.metrics.create()
    summary = mh.compute_many(
            accs,
            metrics=metrics,
            names=names,
            generate_overall=True
        )

    return summary

In [24]:
from deep_sort.matching import ious
from metric.io import read_results, unzip_objs
import copy
import motmetrics as mm
mm.lap.default_solver = 'lap'
import pickle
import os

data_path='C:/Users/Son/Documents/aiffel/SIA-MOT/VisDrone2019-MOT-test/'
test_dir_names = glob.glob(data_path+"sequences/*")
model_names=['baseline_yolov4_33','yolo4_33_aug','yolo4_all','yolo4_all2',
             'yolov4_33_608','yolov4_33_anchor150','yolov4_33_focalx','yolov3_all','yolov3_all2','yolov4_33_ciou']
for model_name in model_names:
    our_metric_input={}
    score_dict={}
    for seq_name in test_dir_names:
        time_1 = time.time()
        seq_name = seq_name.split('\\')[-1]
        seq_path = glob.glob(data_path+ "sequences/"+seq_name+'/*')

        gt_frame_dict = {}
        for i in range(1,len(seq_path)+1):
            g = gt_frame.copy()
            g = g[g['video_name']==seq_name]
            g = g[g['frame_index']==i]
            tlwhs = tuple(map(tuple,g.iloc[:,3:7].values))
            target_ids = g.iloc[:,2].values
            scores = g.iloc[:,7].values

            gt=[]
            for tlwh, target_id, score in zip(tlwhs, target_ids, scores):
                gt.append(tuple([tlwh, target_id, score]))

            gt_frame_dict[i] = gt


        # load data
        try:
            with open(save_dict_path+seq_name+'.pickle', 'rb') as fr:
                detections_dict = pickle.load(fr)
        except:
            pass

        accs=[]
        names=[]
        similarity_scores=[]
        num_tracker_dets=set()
        num_gt_dets=set()
        num_gtrue_ids=set()
        gtrue_ids=[]
        tracker_ids=[]
        for i in range(1, len(gt_frame_dict)+1):

            # results
            trk_objs = detections_dict.get(i,[])
            trk_tlwhs = []
            trk_ids = []
            for a in trk_objs:
                trk_tlwhs.append(a.tlwh)
                trk_ids.append(a.track_id)
            trk_tlwhs = np.asarray(trk_tlwhs)
            trk_ids = np.asarray(trk_ids)

            # gts
            gt_objs = gt_frame_dict.get(i, [])
            gt_tlwhs, gt_ids = unzip_objs(gt_objs)[:2]

            '''
            # ignore boxes
            ignore_objs = gt_ignore_frame_dict.get(i, [])
            ignore_tlwhs = unzip_objs(ignore_objs)[0]

            # remove ignored results
            keep = np.ones(len(trk_tlwhs), dtype=bool)
            iou_distance = mm.distances.iou_matrix(ignore_tlwhs, trk_tlwhs, max_iou=0.5)
            if len(iou_distance) > 0:
                match_is, match_js = mm.lap.linear_sum_assignment(iou_distance)
                match_is, match_js = map(lambda a: np.asarray(a, dtype=int), [match_is, match_js])
                match_ious = iou_distance[match_is, match_js]

                match_js = np.asarray(match_js, dtype=int)
                match_js = match_js[np.logical_not(np.isnan(match_ious))]
                keep[match_js] = False
                trk_tlwhs = trk_tlwhs[keep]
                trk_ids = trk_ids[keep]
            '''

            ious_ = ious(gt_tlwhs, trk_tlwhs)
            iou_distance = mm.distances.iou_matrix(gt_tlwhs, trk_tlwhs, max_iou=0.5)

            acc = mm.MOTAccumulator(auto_id=True)
            acc.update(gt_ids, trk_ids, iou_distance)
            rtn_events=False

            if rtn_events and iou_distance.size > 0 and hasattr(acc, 'last_mot_events'):
                events = acc.last_mot_events  # only supported by https://github.com/longcw/py-motmetrics
            else:
                events = None
            accs.append(acc)
            names.append(i)

            # save
            similarity_scores.append(ious_)
            num_tracker_dets = num_tracker_dets | set(trk_ids)
            num_gt_dets = num_gt_dets | set(gt_ids)
            num_gtrue_ids =  num_gtrue_ids | set(gt_ids)
            gtrue_ids.append(np.array(gt_ids))
            tracker_ids.append(trk_ids)
        our_metric_input[seq_name] = {'similarity_scores':similarity_scores,
                                     'num_tracker_dets' : len(num_tracker_dets),
                                     'num_gt_dets' : len(num_gt_dets),
                                     'num_gtrue_ids':len(num_gtrue_ids),
                                     'gtrue_ids':gtrue_ids,
                                     'tracker_ids':tracker_ids}


        if os.path.exists('data/metric_score'):
            pass
        else:
            os.mkdir('data/metric_score')
        score_dict[seq_name] = get_summary(accs, names)
        print(f'per one sequence : {time.time()-time_1}')


    with open('C:/Users/Son/Documents/aiffel/SIA-MOT/workplace/yolov3_deepsort/data/metric_score/'+model_name+'.pickle','wb') as fw:
        pickle.dump(score_dict, fw)
    with open('C:/Users/Son/Documents/aiffel/SIA-MOT/workplace/yolov3_deepsort/data/metric_score/'+model_name+'_our.pickle','wb') as fw:
        pickle.dump(our_metric_input, fw)

per one sequence : 20.266359567642212
per one sequence : 29.882416248321533
per one sequence : 29.885308980941772
per one sequence : 68.68841409683228
per one sequence : 29.87662100791931
per one sequence : 16.794528484344482
per one sequence : 93.26980876922607
per one sequence : 29.960148334503174
per one sequence : 23.617835521697998
per one sequence : 60.543644189834595
per one sequence : 31.249053239822388
per one sequence : 20.762394428253174
per one sequence : 13.151840925216675
per one sequence : 34.30572247505188
per one sequence : 37.61990165710449
per one sequence : 40.49612736701965
per one sequence : 20.992689609527588
per one sequence : 19.816951274871826
per one sequence : 28.289116859436035
per one sequence : 29.4894540309906
per one sequence : 67.67888259887695
per one sequence : 27.433425664901733
per one sequence : 17.612806797027588
per one sequence : 90.87300038337708
per one sequence : 28.58820629119873
per one sequence : 22.79101061820984
per one sequence : 59.70

In [None]:
with open('C:/Users/Son/Documents/aiffel/SIA-MOT/workplace/yolov3_bytetrack/data/metric_score/'+model_name+'.pickle','rb') as fr:
    score_dict = pickle.load(fr)

In [18]:
mota_mean=[]
for name in test_dir_names:
    s = score_dict[name.split('\\')[-1]]
    mota_mean.append(s['mota'].mean())
mota_mean

[0.38573917876561864,
 -0.06633058326410475,
 -0.004998262077387667,
 0.28470720879190864,
 0.0612094233548637,
 0.14463740862091376,
 -0.00733235776792063,
 0.09107596344337021,
 0.010930567968450404,
 0.09486564695575833,
 -0.06109167999586047,
 -0.03208686550646303,
 0.028491092452955957,
 0.10264020842945841,
 -0.04249997783384979,
 0.03571218552481226,
 0.11262060167724126]

In [19]:
s['mota'].mean()

0.11262060167724126

## our metric

In [None]:
import glob
import pickle

save_dict_path = 'C:/Users/Son/Documents/aiffel/SIA-MOT/workplace/yolov3_deepsort/metric_input/'
test_dir_names = glob.glob("C:/Users/Son/Documents/aiffel/SIA-MOT/VisDrone2019-MOT-test/sequences/*")
model_name = 'baseline_yolov3'

In [None]:
seq_name = test_dir_names[0].split('\\')[-1]

with open('C:/Users/Son/Documents/aiffel/SIA-MOT/workplace/yolov3_bytetrack/data/metric_score/'+model_name+'_our.pickle','rb') as fr:
    our_metric_input = pickle.load(fr)

In [20]:
from metric.MOTA import CLEAR

# 실제 적용 예시코드
# 데이터셋에서 필요한 정보를 추출하여 data 배열을 생성
data = {
    'num_tracker_dets': num_tracker_dets,
    'num_gt_dets': num_gt_dets,
    'gt_ids': gtrue_ids,
    'tracker_ids': tracker_ids,
    'similarity_scores': similarity_scores,
    'num_timesteps' : len(images_filepaths)
}

# CLEAR 객체 생성
clear = CLEAR()

# eval_sequence 메서드를 사용하여 MOTA 계산
mota = clear.eval_sequence(data)['MOTA']


CLEAR Config:
THRESHOLD            : 0.5                           
PRINT_CONFIG         : True                          


In [21]:
mota

-0.6875981161695447