# Домашнее задание № 7 

In [58]:
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor

import os
import cv2
import skimage
import numpy as np
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import time

# Поиск объектов в видеопотоке 

Задача - придумать и реализовать алгоритм поиска (обнаружения без классификации) движущихся объектов.

В качестве исходных данных приведена выборка с видеофайлами и аннотацией для каждого кадра файла. Аннотация задана в виде ограничивающих прямоугольников в формате ```(y1,x1,y2,x2)```, где
- ```(x1,y1)``` - верхний левый угол прямоугольника;
- ```(x2,y2)``` - нижний правый угол прямоугольника.

Ссылка на данные – https://disk.yandex.ru/d/RdjMDoQQO8Ngcw

В качестве обучающей можно брать любые видеофайлы. При этом должны быть отдельно выбраны тестовые данные, которые не будут использованы в создании решения. 

Видеофайл с результатами работы алгоритма должен быть прикреплен вместе с решением. Пример фрагмента видеофайла с результатом поиска объектов приведен ниже.

Исходный код может быть в формате ```.py``` или ```.ipynb```.

![annotation](annot_example.gif "annotation")

## Требования к результату
- поиск должен находить геометрические место объекта на видеоизображении. Геометрическое место задано ограничивающим прямоугольником (bounding box);
- продолжительность решения для любого одного видеофайла не должна превышать 10 минут;
- должна быть приведена оценка точности решения;
- привести демонстрацию результатов требется на одном из тестовых видеофайлов.

In [10]:
def create_video(save_dir, size, img_format='jpg', vido_format='avi'):
    out_name = Path(save_dir).parts[-1]

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(str(Path(save_dir) / Path(f'{out_name}.{vido_format}')),
                          fourcc, 20, tuple(size.astype(int)))

    for fname in tqdm(sorted(map(str, Path(save_dir).glob(f'*.{img_format}')))):
        imag = skimage.io.imread(fname)
        out.write(imag)
    out.release()

In [172]:
def show_video(num=-1,path='./tmp/', sleep=0):
    cap = get_video(num if num >=0 else 0, path=path)
    get_video_details(cap)
    cv2.startWindowThread()
    while (cap.isOpened()):
        is_ok, frame = cap.read()
        if not is_ok:
            break
            
        if sleep > 0:
            time.sleep(sleep)

        cv2.imshow("sparse optical flow", frame)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()

In [175]:
create_video('tmp', size=np.array((1920.0 , 1080.0)), vido_format='mp4')
show_video(sleep=1)

  0%|          | 0/79 [00:00<?, ?it/s]

['tmp.mov', 'tmp.mp4']
./tmp/tmp.mov
79.0 1920.0 1080.0 20.0


In [156]:
def spec_points(image):
    """
    Функция для поиска особых точек и получения их дескрипторов 
    """
    
    hyp_params = dict(
        nfeatures = 50,
        nOctaveLayers = 10,
        contrastThreshold = 0.01,
        edgeThreshold = 6,
        sigma = 2.0)  # hyp params
    detector = cv2.SIFT_create(**hyp_params)

    keypoints, desc = detector.detectAndCompute(image.copy(), None)
    return keypoints, desc

In [198]:
def get_keyp_and_d(first, second):
    FLANN_INDEX_KDTREE = 2
    index_params = dict(algorithm=FLANN_INDEX_KDTREE, trees=5)
    search_params = dict(checks=50)

    flann = cv2.FlannBasedMatcher(index_params, search_params)
    
    ratio_thresh = 0.7
    keypoints1, desc1 = spec_points(first)
    keypoints2, desc2 = spec_points(second)
    
    matches = flann.knnMatch(desc1, desc2, k=2)
    
    good_matches = []
    for m, n in matches:
        if m.distance < ratio_thresh * n.distance:
            good_matches.append(m)
    
    dx = np.zeros(len(good_matches))
    dy = np.zeros(len(good_matches))
    for k, good_matche in enumerate(good_matches):
        x1, y1 = keypoints1[good_matche.queryIdx].pt
        x2, y2 = keypoints2[good_matche.trainIdx].pt
                
        dx[k] = x1 - x2
        dy[k] = y1 - y2
    return keypoints1, keypoints2, dx, dy, good_matches

In [205]:
def get_M(first, second):
    h, w = first.shape
    
    keypoints1 = [0]*4
    keypoints2 = [0]*4
    dx = [0]*4
    dy = [0]*4
    l_good_matches = [0]*4
    
    i = 0
    for hmin in [0, h//2]:
        for wmin in [0, h//2]:
            temp_l = get_keyp_and_d(first[hmin:hmin+h//2, wmin:wmin+w//2], second[hmin:hmin+h//2, wmin:wmin+w//2])
            keypoints1[i], keypoints2[i], dx[i], dy[i], l_good_matches[i] = temp_l
            i += 1
            
    av_dx = (dx[0].mean() + dx[1].mean() + dx[2].mean() + dx[3].mean())/4
    av_dy = (dy[0].mean() + dy[1].mean() + dy[2].mean() + dy[3].mean())/4
    
    pts1 = []
    pts2 = []   
    
    for i in range(4):
        ind = np.argmin((dx[i]-av_dx)**2 + (dy[i]-av_dx)**2)
        good_matche = l_good_matches[i][ind]
        pts1.append(keypoints1[i][good_matche.queryIdx].pt)
        pts2.append(keypoints2[i][good_matche.trainIdx].pt)
    
    pts1 = np.array(pts1).astype(np.float32)
    pts2 = np.array(pts2).astype(np.float32)
    
    
    print('pts1', pts1)
    print('pts2', pts2)
    
#     plt.imshow(first, cmap='gray')
#     plt.show()
#     plt.imshow(second, cmap='gray')
#     plt.show()
    M = cv2.getPerspectiveTransform(pts1, pts2)
#     if np.sum(np.abs(M - np.array([[0, 0, 0], [0, 0, 0], [0, 0, 1]]))) < 0.1:
#         M = cv2.getAffineTransform(pts1[:3], pts2[:3])
#     else:
#         return M
#     if np.sum(np.abs(M - np.array([[0, 0, 0], [0, 0, 0]]))) < 0.1:
#         M = cv2.getAffineTransform(pts1[1:], pts2[1:])
    return M
    

In [206]:
def get_trajectory(cap):
    get_video_details(cap)
    
    color = (0, 255, 0)
    feature_params = dict(maxCorners=300, qualityLevel=0.2, minDistance=2, blockSize=7)
    lk_params = dict(winSize=(15, 15), maxLevel=2, criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT, 10, 0.03))
    
    is_ok, first_frame = cap.read()
    if not is_ok:
        print("ERR: Can't read")
        return
#     plt.imshow(first_frame)
#     plt.show()
    
    prev_gray = cv2.cvtColor(first_frame.copy(), cv2.COLOR_BGR2GRAY)
#     prev = cv2.goodFeaturesToTrack(prev_gray, mask=None, **feature_params)
    mask = np.zeros_like(first_frame)
    idx = 0
    while (cap.isOpened()):
        print("idx", idx)
        is_ok, frame = cap.read()
#         if not is_ok or idx == 10:
#             break
        if not is_ok:
            break
        gray = cv2.cvtColor(frame.copy(), cv2.COLOR_BGR2GRAY)
        mask = np.zeros_like(frame)
        
#         print('prob:', np.sum((prev_gray - gray)**2))
        M = get_M(prev_gray.copy(), gray.copy())
#         if np.sum(np.abs(M - np.array([[0, 0, 0], [0, 0, 0], [0, 0, 1]]))) < 0.1:
#             print('bad M')
#             break
        print('M',M)
        if M.shape[0] == 3:
            trans_prev_gray = cv2.warpPerspective(prev_gray.copy(), M, (prev_gray.shape[1], prev_gray.shape[0]))
        else:
            trans_prev_gray = cv2.warpAffine(prev_gray.copy(), M, (prev_gray.shape[1], prev_gray.shape[0]))
        
#         plt.imshow(trans_prev_gray, cmap='gray')
#         plt.show()

        prev = cv2.goodFeaturesToTrack(trans_prev_gray, mask=None, **feature_params)
    
        trans_gray = gray*(trans_prev_gray!=0).astype(np.uint8)
        nextp, status, error = cv2.calcOpticalFlowPyrLK(prev_gray, trans_gray, prev, None, **lk_params)

        good_old = prev[status == 1].astype(int)
        good_new = nextp[status == 1].astype(int)

        # Draws the optical flow tracks
        for i, (new, old) in enumerate(zip(good_new, good_old)):
            a, b = new.ravel()
            c, d = old.ravel()
            mask = cv2.line(mask, (a, b), (c, d), color, 2)
            frame = cv2.circle(frame, (a, b), 3, color, -1)

        output = cv2.add(frame, mask)
        prev_gray = gray.copy()
        # Updates previous good feature points
        prev = good_new.reshape(-1, 1, 2)

        cv2.imwrite(f'tmp/frame_{idx}.jpg', output)
        idx += 1
        
        # Opens a new window and displays the output frame
#         cv2.imshow("sparse optical flow", output)
        
#         idx += 1
#         if cv2.waitKey(1) & 0xFF == ord('q'):
#             break

    # The following frees up resources and closes all windows
    cap.release()

In [207]:
cap = get_video(0)
get_trajectory(cap)
create_video('tmp', size=np.array((1920.0 , 1080.0)), vido_format='mp4')
show_video(sleep=1)

['Clip_1.mov', 'Clip_10.mov', 'Clip_11.mov', 'Clip_2.mov', 'Clip_3.mov', 'Clip_37.mov', 'Clip_4.mov', 'Clip_5.mov', 'Clip_6.mov', 'Clip_7.mov', 'Clip_8.mov', 'Clip_9.mov']
./Videos/Videos/Clip_1.mov
309.0 1920.0 1080.0 29.97
idx 0
pts1 [[177.36787 324.82074]
 [318.4126  211.29285]
 [842.90405 260.1712 ]
 [302.90402 260.1712 ]]
pts2 [[172.25569 322.63614]
 [316.20105 212.21628]
 [839.5775  262.68936]
 [299.5775  262.68936]]
M [[ 1.70310084e+00  5.45010430e-01 -2.04035158e+02]
 [ 5.90864300e-02  1.93115669e+00 -1.22550015e+02]
 [ 2.24928912e-04  1.71473168e-03  1.00000000e+00]]
idx 1
pts1 [[121.73949 275.15222]
 [316.20105 212.21628]
 [756.95245 131.90005]
 [110.3671  220.88058]]
pts2 [[115.043976 271.68903 ]
 [313.5318   213.1451  ]
 [750.141    135.99603 ]
 [101.35839  224.97061 ]]
M [[ 1.33288883e+00  1.41018723e-01 -5.38221587e+01]
 [ 2.65282282e-02  1.31277251e+00 -1.67073229e+01]
 [ 2.34560768e-04  9.13471066e-04  1.00000000e+00]]
idx 2
pts1 [[115.043976 271.68903 ]
 [ 23.648537 20

pts1 [[ 72.285515  89.90598 ]
 [ 17.389482  77.64083 ]
 [485.2948   251.09581 ]
 [ 20.16682  305.9894  ]]
pts2 [[ 66.18864   85.6088  ]
 [ 13.311139  75.50593 ]
 [477.92975  250.9634  ]
 [ 13.688642 305.6539  ]]
M [[ 1.00753106e+00  1.70597118e-03 -3.51441919e+00]
 [-1.24572855e-01  1.34454979e+00 -2.20267939e+01]
 [-4.29802869e-04  8.96794762e-04  1.00000000e+00]]
idx 23
pts1 [[291.5432    15.512081]
 [203.06192   58.834682]
 [947.97833   76.20896 ]
 [407.9783    76.20896 ]]
pts2 [[286.85016   13.096584]
 [199.72684   58.718426]
 [943.49084   76.71781 ]
 [403.4908    76.71781 ]]
M [[ 1.00222864e+00  8.96990889e-02 -6.66201683e+00]
 [-9.41384387e-04  1.06738113e+00 -3.18296769e+00]
 [-1.22707419e-05  2.46849755e-04  1.00000000e+00]]
idx 24
pts1 [[748.7228  325.61487]
 [831.6535  248.7848 ]
 [673.5922  314.8679 ]
 [133.5922  314.8679 ]]
pts2 [[744.507   319.35992]
 [822.8108  247.651  ]
 [670.60864 315.85062]
 [130.60866 315.85062]]
M [[-4.41794811e-01 -3.53315443e+00  1.12015598e+03]
 

pts1 [[ 37.97599   15.823305]
 [660.81024  315.4718  ]
 [171.748     81.52701 ]
 [548.0308    10.79435 ]]
pts2 [[ 23.752064  17.377066]
 [649.2531   316.32602 ]
 [170.27757   81.822655]
 [540.0043    15.109985]]
M [[-4.27602279e+00  1.09066072e+01  1.24064570e+01]
 [-1.41418702e-01  1.25904715e+00  1.98158543e+00]
 [-9.70165036e-03  2.02155483e-02  1.00000000e+00]]
idx 45
pts1 [[277.24612  104.17114 ]
 [736.525    351.563   ]
 [867.9855   123.7481  ]
 [459.00153   43.261135]]
pts2 [[263.38306 104.92885]
 [729.86584 354.11728]
 [859.6864  129.0616 ]
 [451.1747   48.01743]]
M [[ 1.03542576e+00 -8.64147258e-02 -1.55493915e+01]
 [ 1.29147921e-02  9.58836090e-01  1.11998167e+00]
 [ 3.49096537e-05 -1.24495053e-04  1.00000000e+00]]
idx 46
pts1 [[135.89232  111.65869 ]
 [335.66806  169.10649 ]
 [697.0095   231.39691 ]
 [447.60672   42.978115]]
pts2 [[124.42922  112.82019 ]
 [329.28238  169.59996 ]
 [687.9953   237.18987 ]
 [439.68277   48.136696]]
M [[ 1.01027306e+00  7.42750448e-01 -7.7886016

pts1 [[703.00934 391.47134]
 [226.2967  258.6226 ]
 [871.26416 103.10582]
 [ 59.22375 360.5223 ]]
pts2 [[696.9011   389.03366 ]
 [222.11606  259.67828 ]
 [867.5351   102.53625 ]
 [ 54.443615 360.97305 ]]
M [[ 9.83651423e-01 -2.08104253e-02  3.27688331e+00]
 [-7.81661094e-03  9.72955219e-01  7.92015579e+00]
 [-1.01163174e-05 -1.94215376e-05  1.00000000e+00]]
idx 67
pts1 [[ 22.100918  29.483633]
 [177.9873   281.5153  ]
 [867.5351   102.53625 ]
 [429.03107   61.18743 ]]
pts2 [[ 16.175388  26.363214]
 [172.90259  281.69864 ]
 [863.74066  101.95194 ]
 [423.57523   61.20047 ]]
M [[ 9.81235208e-01 -3.88650978e-01  4.95291927e+00]
 [ 5.70359139e-02  3.20285172e-01  1.40376277e+01]
 [ 2.01212559e-04 -2.23741716e-03  1.00000000e+00]]
idx 68
pts1 [[ 16.175388  26.363214]
 [130.79025  409.24374 ]
 [174.28311   86.67077 ]
 [ 38.411514 295.1677  ]]
pts2 [[ 11.982578  21.885433]
 [124.59601  410.2774  ]
 [171.89577   84.71239 ]
 [ 30.407578 296.17047 ]]
M [[ 1.04870331e+00 -1.93184325e-02 -4.4527570

pts1 [[ 70.42173   61.70127 ]
 [713.38226  396.4333  ]
 [182.42336   99.6258  ]
 [806.4594    11.299991]]
pts2 [[ 62.32783   63.892105]
 [706.9832   400.5497  ]
 [181.62811   99.44808 ]
 [798.11334   16.65797 ]]
M [[ 1.28011380e+00 -6.64834700e-01  1.16062379e+01]
 [ 7.69849355e-03  9.18449232e-01  5.04553944e+00]
 [ 3.82220472e-04 -8.50966168e-04  1.00000000e+00]]
idx 89
pts1 [[ 62.32783   63.892105]
 [727.9123   475.13376 ]
 [181.62811   99.44808 ]
 [806.29626   11.678534]]
pts2 [[ 52.883904  64.33859 ]
 [720.2674   478.49762 ]
 [181.91861   99.59072 ]
 [798.3609    15.588155]]
M [[ 1.18072060e+00 -2.83290150e-01 -3.03237569e+00]
 [ 8.96039555e-03  9.96368758e-01 -3.96468933e-01]
 [ 2.33967358e-04 -3.53881594e-04  1.00000000e+00]]
idx 90
pts1 [[ 52.883904  64.33859 ]
 [720.2674   478.49762 ]
 [181.91861   99.59072 ]
 [790.06445   20.925928]]
pts2 [[ 43.396202  63.68403 ]
 [712.955    481.65518 ]
 [181.96936   99.746284]
 [782.44336   24.90123 ]]
M [[ 1.13968976e+00 -1.80171107e-01 -5

pts1 [[230.43094 428.04666]
 [259.455   207.23366]
 [222.34293 491.59802]
 [388.78427  53.58486]]
pts2 [[214.37497  420.3219  ]
 [252.3847   213.39076 ]
 [217.17377  491.72012 ]
 [381.99783   59.240097]]
M [[-1.37710627e+00 -1.72704239e-01  3.89727974e+02]
 [-1.95082680e-01 -2.63239512e-02  5.32301430e+01]
 [-3.55474170e-03 -4.39188243e-04  1.00000000e+00]]
idx 111
pts1 [[386.52255  296.4672  ]
 [288.31107  168.80637 ]
 [179.66028   91.976295]
 [381.99783   59.240097]]
pts2 [[377.00974  299.3683  ]
 [282.00507  175.3539  ]
 [178.97795   91.146225]
 [374.13803   65.822266]]
M [[ 5.97400891e-01  3.74772864e-01  2.71010904e+01]
 [-1.71589307e-02  1.03005568e+00 -5.64372974e+00]
 [-8.33728736e-04  1.01637610e-03  1.00000000e+00]]
idx 112
pts1 [[260.5732   487.70905 ]
 [ 53.06851   60.74468 ]
 [178.97795   91.146225]
 [181.30748  206.43542 ]]
pts2 [[241.51912  468.95926 ]
 [ 47.46473   66.78317 ]
 [177.46274   88.592995]
 [175.94714  210.38382 ]]
M [[ 1.01171685e+00  3.18171541e-02 -7.64673

pts1 [[464.55084  236.79689 ]
 [196.78976  185.43076 ]
 [729.50555  212.30373 ]
 [ 59.225487 146.05437 ]]
pts2 [[454.3027   230.96017 ]
 [190.30495  180.96207 ]
 [720.6488   210.19012 ]
 [ 49.009922 143.70497 ]]
M [[ 8.80871214e-01  1.13116720e+00 -1.59880744e+02]
 [-7.11352897e-02  1.35723603e+00 -2.54144881e+01]
 [-3.86625349e-04  1.34302191e-03  1.00000000e+00]]
idx 134
pts1 [[234.47556 268.4943 ]
 [222.7412  265.9642 ]
 [685.3445  232.43149]
 [ 35.37424 267.83597]]
pts2 [[224.31995  262.79587 ]
 [215.70906  263.9944  ]
 [675.9975   231.08633 ]
 [ 26.243055 265.6132  ]]
M [[-3.59916290e-02 -9.31819436e-01  2.50999039e+02]
 [-4.93995683e-02 -9.57824506e-01  2.59816668e+02]
 [-1.87526247e-04 -3.68736369e-03  1.00000000e+00]]
idx 135
pts1 [[314.47696  309.5395  ]
 [184.76399  178.36508 ]
 [610.6548   217.20663 ]
 [ 26.243055 265.6132  ]]
pts2 [[305.8601   305.39786 ]
 [178.60289  176.8775  ]
 [601.7539   217.02875 ]
 [ 20.026678 264.02216 ]]
M [[ 1.12111090e+00  7.82200995e-02 -2.80232

error: OpenCV(4.6.0) D:\a\opencv-python\opencv-python\opencv\modules\video\src\lkpyramid.cpp:1260: error: (-215:Assertion failed) (npoints = prevPtsMat.checkVector(2, CV_32F, true)) >= 0 in function 'cv::`anonymous-namespace'::SparsePyrLKOpticalFlowImpl::calc'


In [None]:
def get_video_details(cap):
    cnt = cap.get(cv2.CAP_PROP_FRAME_COUNT)
    w = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
    h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
    fps = cap.get(cv2.CAP_PROP_FPS)

    print(cnt, w, h, fps)

In [169]:
def get_video(num, path='./Videos/Videos/'):
    """
    Функция достающая видео
    num - номер видео (если num == -1, то достаются все видео)
    
    """
    all_names = os.listdir(path)
    names = []
    for name in all_names:
        if name.endswith(".mov") or name.endswith(".mp4"):
            names.append(name)
    print(names)
    if num >= 0:
        name_vid = names[num]
        print(path+name_vid)
        return cv2.VideoCapture(path+name_vid)
    
    videos = []
    for name_vid in names:
        videos.append(cv2.VideoCapture(path+name_vid))
    return videos
    

In [170]:
show_video(0, './Videos/Videos/', 1)

['Clip_1.mov', 'Clip_10.mov', 'Clip_11.mov', 'Clip_2.mov', 'Clip_3.mov', 'Clip_37.mov', 'Clip_4.mov', 'Clip_5.mov', 'Clip_6.mov', 'Clip_7.mov', 'Clip_8.mov', 'Clip_9.mov']
./Videos/Videos/Clip_1.mov
309.0 1920.0 1080.0 29.97


In [171]:
show_video(sleep=1)

['tmp.mov']
./tmptmp.mov
0.0 0.0 0.0 0.0
