# 单目标追踪 Single Object Tracking （SOT）

在视频初始帧中，框选要追踪的`单个`目标，在后续视频中追踪该目标。

参考文档：https://github.com/open-mmlab/mmtracking/blob/master/docs/en/quick_run.md

MMtracking 预训练模型库 Model Zoo：https://mmtracking.readthedocs.io/en/latest/model_zoo.html

如果报错`CUDA out of memory.`则重启前面几个代码的`kernel`即可。

作者：同济子豪兄 2022-4-21

## 进入 MMTracking 主目录

In [1]:
import os
os.chdir('mmtracking')
os.listdir()

['.git',
 '.circleci',
 '.dev_scripts',
 '.github',
 '.gitignore',
 '.pre-commit-config.yaml',
 '.readthedocs.yml',
 'CITATION.cff',
 'LICENSE',
 'MANIFEST.in',
 'README.md',
 'README_zh-CN.md',
 'configs',
 'demo',
 'docker',
 'docs',
 'mmtrack',
 'model-index.yml',
 'requirements.txt',
 'requirements',
 'resources',
 'setup.cfg',
 'setup.py',
 'tests',
 'tools',
 'mmtrack.egg-info',
 'checkpoints',
 'outputs',
 'data']

## 导入工具包

In [2]:
# opencv-python
import cv2

import numpy as np

# 导入python绘图matplotlib
import matplotlib.pyplot as plt
# 使用ipython的魔法方法，将绘制出的图像直接嵌入在notebook单元格中
%matplotlib inline

# 定义可视化图像函数
def show_img_from_array(img):
    '''opencv读入图像格式为BGR，matplotlib可视化格式为RGB，因此需将BGR转RGB'''
    img_RGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img_RGB)
    plt.show()

## 在本地运行`【D】获取视频第一帧单目标检测框（需在本地运行）.ipynb`，将坐标复制粘贴至`data/gt_box_file.txt`

In [None]:
# 参考坐标

# bee.mp4
# 第一只蜜蜂：132, 59, 57, 61
# 第二只蜜蜂：694, 151, 87, 79
# 第三只蜜蜂：1266, 462, 12, 35

# billiards1.mp4
336, 401, 14, 14

# billiards2.mp4
229, 296, 8, 8

# billiards3.mp4
# 左边白球：325, 64, 12, 13
# 右边白球：339, 63, 12, 13

## 命令行方式实现

In [4]:
!python ./demo/demo_sot.py \
        ./configs/sot/siamese_rpn/siamese_rpn_r50_20e_lasot.py \
        --checkpoint https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siamese_rpn_r50_1x_lasot_20211203_151612-da4b3c66.pth \
        --input data/bee.mp4 \
        --output outputs/C1_SOT_bee.mp4 \
        --thickness 2 \
        --gt_bbox_file data/gt_box_file.txt


2022-04-21 13:04:36,245 - mmtrack - INFO - initialize SOTResNet with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model'}
2022-04-21 13:04:36,246 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model
2022-04-21 13:04:36,246 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model
Downloading: "https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model" to /home/featurize/.cache/torch/hub/checkpoints/sot_resnet50.model
100%|████████████████████████████████████████| 174M/174M [00:02<00:00, 69.3MB/s]
load checkpoint from http path: https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siamese_rpn_r50_1x_lasot_20211203_151612-da4b3c66.pth
Downloading: "https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siam

## Python API 方式实现

### 单目标追踪

In [6]:
import mmcv
import tempfile
from mmtrack.apis import inference_sot, init_model

# 输入输出视频路径
input_video = 'data/bee.mp4'
output = 'outputs/C2_SOT_bee.mp4'

# 指定单目标追踪算法 config 配置文件
sot_config = './configs/sot/siamese_rpn/siamese_rpn_r50_20e_lasot.py'
# 指定单目标检测算法的模型权重文件
sot_checkpoint = 'https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siamese_rpn_r50_1x_lasot_20211203_151612-da4b3c66.pth'
# 初始化单目标追踪模型
sot_model = init_model(sot_config, sot_checkpoint, device='cuda:0')

# 指定初始框的坐标 [x, y, w, h]
init_bbox = [132, 59, 57, 61]

# 转成 [x1, y1, x2, y2 ]
init_bbox = [init_bbox[0], init_bbox[1], init_bbox[0]+init_bbox[2], init_bbox[1]+init_bbox[3]]

# 读入待预测视频
imgs = mmcv.VideoReader(input_video)
prog_bar = mmcv.ProgressBar(len(imgs))
out_dir = tempfile.TemporaryDirectory()
out_path = out_dir.name
# 逐帧输入模型预测
for i, img in enumerate(imgs):
    result = inference_sot(sot_model, img, init_bbox, frame_id=i)
    
    # 绘制矩形框中心点构成的轨迹
    result_int = result['track_bboxes'].astype('uint32')
    
    sot_model.show_result(
            img,
            result,
            wait_time=int(1000. / imgs.fps),
            out_file=f'{out_path}/{i:06d}.jpg')
    prog_bar.update()

print(f'\n making the output video at {output} with a FPS of {imgs.fps}')
mmcv.frames2video(out_path, output, fps=imgs.fps, fourcc='mp4v')
out_dir.cleanup()

2022-04-21 13:09:26,649 - mmtrack - INFO - initialize SOTResNet with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model'}
2022-04-21 13:09:26,650 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model
2022-04-21 13:09:26,651 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model


load checkpoint from http path: https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siamese_rpn_r50_1x_lasot_20211203_151612-da4b3c66.pth
[>>>>>>>>>>>>>>>>>>>>>>>>>>>> ] 766/774, 16.7 task/s, elapsed: 46s, ETA:     0s
 making the output video at outputs/C2_SOT_bee.mp4 with a FPS of 30.0
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 766/766, 61.6 task/s, elapsed: 12s, ETA:     0s


### 单目标追踪 + 轨迹绘制

In [7]:
import mmcv
import tempfile
from mmtrack.apis import inference_sot, init_model

# 输入输出视频路径
input_video = 'data/bee.mp4'
output = 'outputs/C3_SOT_bee_trace.mp4'

# 指定单目标追踪算法 config 配置文件
sot_config = './configs/sot/siamese_rpn/siamese_rpn_r50_20e_lasot.py'
# 指定单目标检测算法的模型权重文件
sot_checkpoint = 'https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siamese_rpn_r50_1x_lasot_20211203_151612-da4b3c66.pth'
# 初始化单目标追踪模型
sot_model = init_model(sot_config, sot_checkpoint, device='cuda:0')

# 指定初始框的坐标 [x, y, w, h]
init_bbox = [132, 59, 57, 61]

# 转成 [x1, y1, x2, y2 ]
init_bbox = [init_bbox[0], init_bbox[1], init_bbox[0]+init_bbox[2], init_bbox[1]+init_bbox[3]]

# 读入待预测视频
imgs = mmcv.VideoReader(input_video)
prog_bar = mmcv.ProgressBar(len(imgs))
out_dir = tempfile.TemporaryDirectory()
out_path = out_dir.name
# 逐帧输入模型预测
circle_coord_list = []
for i, img in enumerate(imgs):
    result = inference_sot(sot_model, img, init_bbox, frame_id=i)
    
    # 绘制矩形框中心点构成的轨迹
    result_int = result['track_bboxes'].astype('uint32')
    circle_x = int((result_int[0] + result_int[2]) / 2)
    circle_y = int((result_int[1] + result_int[3]) / 2)
    circle_coord_list.append([circle_x, circle_y])
    for each in circle_coord_list:
        # 绘制圆，指定圆心坐标和半径，红色，最后一个参数为线宽，-1表示填充
        cv2.circle(img,(each[0],each[1]), 5, (0,0,255), -1)
    
    sot_model.show_result(
            img,
            result,
            wait_time=int(1000. / imgs.fps),
            out_file=f'{out_path}/{i:06d}.jpg')
    prog_bar.update()

print(f'\n making the output video at {output} with a FPS of {imgs.fps}')
mmcv.frames2video(out_path, output, fps=imgs.fps, fourcc='mp4v')
out_dir.cleanup()

2022-04-21 13:10:32,839 - mmtrack - INFO - initialize SOTResNet with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model'}
2022-04-21 13:10:32,841 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model
2022-04-21 13:10:32,843 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model


load checkpoint from http path: https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siamese_rpn_r50_1x_lasot_20211203_151612-da4b3c66.pth
[>>>>>>>>>>>>>>>>>>>>>>>>>>>> ] 766/774, 16.1 task/s, elapsed: 47s, ETA:     0s
 making the output video at outputs/C3_SOT_bee_trace.mp4 with a FPS of 30.0
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 766/766, 59.8 task/s, elapsed: 13s, ETA:     0s


In [13]:
# circle_coord_list 中记录了每一帧的轨迹点坐标
len(circle_coord_list)

766

In [14]:
circle_coord_list[:10]

[[160, 89],
 [165, 95],
 [168, 100],
 [172, 105],
 [175, 111],
 [178, 117],
 [181, 121],
 [184, 127],
 [188, 133],
 [193, 138]]

### 单目标追踪 + 轨迹绘制（完全OpenCV实现）

In [15]:
import mmcv
import tempfile
from mmtrack.apis import inference_sot, init_model

# 输入输出视频路径
input_video = 'data/bee.mp4'
output = 'outputs/C4_SOT_bee_trace.mp4'

# 指定单目标追踪算法 config 配置文件
sot_config = './configs/sot/siamese_rpn/siamese_rpn_r50_20e_lasot.py'
# 指定单目标检测算法的模型权重文件
sot_checkpoint = 'https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siamese_rpn_r50_1x_lasot_20211203_151612-da4b3c66.pth'
# 初始化单目标追踪模型
sot_model = init_model(sot_config, sot_checkpoint, device='cuda:0')

# 指定初始目标矩形框坐标 [x, y, w, h]
init_bbox_xywh = [132, 59, 57, 61]

# 转成 [x1, y1, x2, y2 ]
init_bbox_xyxy = [init_bbox_xywh[0], init_bbox_xywh[1], init_bbox_xywh[0]+init_bbox_xywh[2], init_bbox_xywh[1]+init_bbox_xywh[3]]

# 读入待预测视频
imgs = mmcv.VideoReader(input_video)
prog_bar = mmcv.ProgressBar(len(imgs))
out_dir = tempfile.TemporaryDirectory()
out_path = out_dir.name

# 逐帧输入模型预测
circle_coord_list = []
print('开始逐帧处理')
for i, img in enumerate(imgs):
    img_draw = img.copy()
    
    result = inference_sot(sot_model, img, init_bbox_xyxy, frame_id=i)
    # 目标检测矩形框坐标
    result_bbox = result['track_bboxes'][:4].astype('uint32')
    
    # 绘制目标检测矩形框：图像，左上角坐标，右下角坐标，颜色，线宽
    img_draw = cv2.rectangle(img_draw, (result_bbox[0], result_bbox[1]), (result_bbox[2], result_bbox[3]), (0,255,0), 2)    
    
    # 获取矩形框中心点轨迹点坐标
    circle_x = int((result_bbox[0] + result_bbox[2]) / 2)
    circle_y = int((result_bbox[1] + result_bbox[3]) / 2)
    circle_coord_list.append([circle_x, circle_y])
    # 绘制从第一帧到当前帧的轨迹
    for each in circle_coord_list:
        # 绘制圆，指定圆心坐标和半径，红色，最后一个参数为线宽，-1表示填充
        img_draw = cv2.circle(img_draw, (each[0],each[1]), 5, (0,0,255), -1)
    
    # 将当前帧的可视化效果保存为图片文件
    cv2.imwrite(f'{out_path}/{i:06d}.jpg', img_draw)
    prog_bar.update()

# 将保存下来的各帧图片文件串成视频
print('导出视频，FPS {}'.format(imgs.fps))
mmcv.frames2video(out_path, output, fps=imgs.fps, fourcc='mp4v')
print('已成功导出视频 至 {}'.format(output))
out_dir.cleanup()

2022-04-21 13:14:03,351 - mmtrack - INFO - initialize SOTResNet with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model'}
2022-04-21 13:14:03,353 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model
2022-04-21 13:14:03,354 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model


load checkpoint from http path: https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siamese_rpn_r50_1x_lasot_20211203_151612-da4b3c66.pth
[                                                  ] 0/774, elapsed: 0s, ETA:开始逐帧处理
[>>>>>>>>>>>>>>>>>>>>>>>>>>>> ] 766/774, 16.5 task/s, elapsed: 46s, ETA:     0s导出视频，FPS 30.0
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 766/766, 56.4 task/s, elapsed: 14s, ETA:     0s
已成功导出视频 至 outputs/C4_SOT_bee_trace.mp4


## Python API 方式实现（多个目标轨迹绘制）

In [8]:
import mmcv
import tempfile
from mmtrack.apis import inference_sot, init_model

import seaborn as sns
import random
# 生成调色板
palette = sns.color_palette('hls', 20)

def get_color(seed):
    '''
    传入追踪ID，生成专属颜色
    '''
    random.seed(seed)
    # 从调色板中随机挑选一种颜色
    bbox_color = random.choice(palette)
    bbox_color = [int(255 * c) for c in bbox_color][::-1]
    return bbox_color

In [9]:
# 输入输出视频路径
input_video = 'data/billiards3.mp4'
output = 'outputs/output_C4_SOT_billiards3_trace.mp4'

# 指定单目标追踪算法 config 配置文件
sot_config = './configs/sot/siamese_rpn/siamese_rpn_r50_20e_lasot.py'
# 指定单目标检测算法的模型权重文件
sot_checkpoint = 'https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siamese_rpn_r50_1x_lasot_20211203_151612-da4b3c66.pth'
# 初始化单目标追踪模型
sot_model = init_model(sot_config, sot_checkpoint, device='cuda:0')

2022-04-19 21:35:18,281 - mmtrack - INFO - initialize SOTResNet with init_cfg {'type': 'Pretrained', 'checkpoint': 'https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model'}
2022-04-19 21:35:18,283 - mmcv - INFO - load model from: https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model
2022-04-19 21:35:18,284 - mmcv - INFO - load checkpoint from http path: https://download.openmmlab.com/mmtracking/pretrained_weights/sot_resnet50.model


load checkpoint from http path: https://download.openmmlab.com/mmtracking/sot/siamese_rpn/siamese_rpn_r50_1x_lasot/siamese_rpn_r50_1x_lasot_20211203_151612-da4b3c66.pth


In [10]:
# 指定多个目标的初始矩形框坐标 [x, y, w, h]
init_bbox_xywh = [[325, 64, 12, 13], [339, 63, 12, 13]]

# 目标个数
ID_num = len(init_bbox_xywh)
print('共有{}个待追踪目标'.format(ID_num))

# 转成 [x1, y1, x2, y2 ]
init_bbox_xyxy = []
for each in init_bbox_xywh:
    init_bbox_xyxy.append([each[0], each[1], each[0]+each[2], each[1]+each[3]])

共有2个待追踪目标


In [11]:
# 读入待预测视频
imgs = mmcv.VideoReader(input_video)
# prog_bar = mmcv.ProgressBar(len(imgs))
out_dir = tempfile.TemporaryDirectory()
out_path = out_dir.name

## 获取每帧的追踪结果
# 逐帧输入模型预测
circle_coord_list = {}
print('开始逐帧处理')

for ID in range(ID_num): # 遍历每个待追踪目标
    print('\n')
    print('追踪第{}个目标'.format(ID+1))
    circle_coord_list[ID] = {}
    circle_coord_list[ID]['bbox'] = []
    circle_coord_list[ID]['trace'] = []
    
    # 启动进度条
    prog_bar = mmcv.ProgressBar(len(imgs))
    
    for i, img in enumerate(imgs): # 遍历视频每一帧
        
        # 执行单目标追踪
        result = inference_sot(sot_model, img, init_bbox_xyxy[ID], frame_id=i)
        # 目标检测矩形框坐标
        result_bbox = np.array(result['track_bboxes'][:4].astype('uint32'))
        # 保存矩形框坐标
        circle_coord_list[ID]['bbox'].append(result_bbox)
        

        # 获取矩形框中心点轨迹点坐标
        circle_x = int((result_bbox[0] + result_bbox[2]) / 2)
        circle_y = int((result_bbox[1] + result_bbox[3]) / 2)
        # 保存轨迹点坐标
        circle_coord_list[ID]['trace'].append(np.array([circle_x, circle_y]))
        
        prog_bar.update()

开始逐帧处理


追踪第1个目标
[>>>>>>>>>>>>>>>>>>            ] 202/319, 26.0 task/s, elapsed: 8s, ETA:     5s

追踪第2个目标
[>>>>>>>>>>>>>>>>>>            ] 202/319, 25.2 task/s, elapsed: 8s, ETA:     5s

In [12]:
## 可视化

# 启动进度条
prog_bar = mmcv.ProgressBar(len(imgs))

for i, img in enumerate(imgs): # 遍历视频每一帧
    img_draw = img.copy()
    
    for ID in range(ID_num): # 遍历每个待追踪目标
        # 获取该目标的专属颜色
        ID_color = get_color(ID)
        
        result_bbox = circle_coord_list[ID]['bbox'][i]
        
        # 绘制目标检测矩形框：图像，左上角坐标，右下角坐标，颜色，线宽
        img_draw = cv2.rectangle(img_draw, (result_bbox[0], result_bbox[1]), (result_bbox[2], result_bbox[3]), ID_color, 2)  

        # 绘制从第一帧到当前帧的轨迹
        for each in circle_coord_list[ID]['trace'][:i]:
            # 绘制圆，指定圆心坐标和半径，红色，最后一个参数为线宽，-1表示填充
            img_draw = cv2.circle(img_draw, (each[0],each[1]), 2,  ID_color, -1)
    
    # 将当前帧的可视化效果保存为图片文件
    cv2.imwrite(f'{out_path}/{i:06d}.jpg', img_draw)
    prog_bar.update()
    
# 将保存下来的各帧图片文件串成视频
print('导出视频，FPS {}'.format(imgs.fps))
mmcv.frames2video(out_path, output, fps=imgs.fps, fourcc='mp4v')
print('已成功导出视频 至 {}'.format(output))
out_dir.cleanup()

[>>>>>>>>>>>>>>>>>>           ] 202/319, 148.1 task/s, elapsed: 1s, ETA:     1s导出视频，FPS 25.0
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 202/202, 183.0 task/s, elapsed: 1s, ETA:     0s
已成功导出视频 至 outputs/output_C4_SOT_billiards3_trace.mp4
