In [3]:
import torch
import torchvision
import numpy as np
import matplotlib.pyplot as plt
import time
import copy
import random
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader
import cv2

import PIL.Image as Image
from torchvision.transforms import ToTensor, ToPILImage
import torch.nn.functional as F

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [52]:
def read_video(filename, num_of_frames=100):
    """input: 
            Video name - имя файла в текущей папке, 
            Num_of_frames - число фрэймов
       output: 
            FloatTensor [T, C, H, W]
    """
    return torchvision.io.read_video(filename, pts_unit='sec')[0][:num_of_frames].permute(0,3,1,2).type(torch.FloatTensor)

def write_video(filename, video_array, fps=1):
    """Записывает тензор в формате [T, C, H, W] в видеофайл.
       input:
           filename - путь для save видео
           video_array - тензор [T, C, H, W]
           fps - число кадров в секунду
    """
    torchvision.io.write_video(filename, video_array.permute(0,2,3,1).type(torch.uint8), fps)
    
def show_frame(input_tensor, title=''):
    """Функция для отрисовки кадров.
       input:
           Любой тензор [1,C,H,W]
    """
    image = input_tensor.permute(1, 2, 0).numpy()
    plt.imshow((image).astype(np.uint8))
    plt.title(title)
    plt.show()
    
def set_random_seed(seed=0):
    """Сделаем систему детерминированной."""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True

def get_device():
    """Если система поддерживает CUDA, то вернет GPU
       иначе вернет CPU. Нужно для закидывания модели и тензоров в GPU.
       Потом можно к любому тензору, модели применять .to(get_device())"""
    global device
    return device

def read_write_video(file_name):
    """input: Название файла."""
    counter = 0
    cap = cv2.VideoCapture(file_name)    
    fheight, fwidth = int(cap.get(3)), int(cap.get(4))
    fourcc = cv2.VideoWriter_fourcc(*'MP4V')
    out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (fwidth, fheight))  
    while(cap.isOpened()):
        check, frame = cap.read()
        counter += 1
        if counter % 20 == 0:
            frame = object_detection_api(frame)
        out.write(frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    cap.release()
    out.release()
    cv2.destroyAllWindows()
    
def get_prediction(img_tensor, threshold):
    """Получает 1 кадр размера [C,H,W]"""
    model.eval()
    with torch.no_grad():
        pred = model([img_tensor]) # Pass the image to the model
    pred_class = [COCO_INSTANCE_CATEGORY_NAMES[i] for i in list(pred[0]['labels'].numpy())] # Get the Prediction Score
    pred_boxes = [[(i[0], i[1]), (i[2], i[3])] for i in list(pred[0]['boxes'].detach().numpy())] # Bounding boxes
    pred_score = list(pred[0]['scores'].detach().numpy())
    pred_t = [pred_score.index(x) for x in pred_score if x > threshold][-1] # Get list of index with score greater than threshold.
    pred_boxes = pred_boxes[:pred_t+1]
    pred_class = pred_class[:pred_t+1]
    return pred_boxes, pred_class

def object_detection(image, threshold=0.5, rect_th=2, text_size=1, text_th=1):
    """Получает 1 кадр cv2"""
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    tensor_image = torch.tensor(image, dtype=torch.float32).permute(2,0,1)
    tensor_image /= 255.
    boxes, pred_cls = get_prediction(tensor_image, threshold) # Get predictions
    for i in range(len(boxes)):
        cv2.rectangle(image, boxes[i][0], boxes[i][1],color=(255, 0, 0), thickness=rect_th) # Draw Rectangle with the coordinates
        cv2.putText(image, pred_cls[i], boxes[i][0],  cv2.FONT_HERSHEY_SIMPLEX, text_size, (255,255,255), thickness=text_th) # Write the prediction class
    return image    

COCO_INSTANCE_CATEGORY_NAMES = [
    '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
    'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
    'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
    'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
    'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
    'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
    'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
    'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
    'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
    'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

In [4]:
set_random_seed(0)

# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) # .to(get_device())
model.eval();

In [49]:
# a = cv2.imread('h-7.jpg')
# b = object_detection_api(a)
# video_func('test.mp4')

In [51]:
# plt.figure(figsize=(20,20))
# plt.imshow(b)

In [None]:
# py run_classifier.py --task_name=MRPC --do_train=true --do_eval=true --data_dir=$GLUE_DIR/MRPC --vocab_file=$BERT_BASE_DIR/vocab.txt --bert_config_file=$BERT_BASE_DIR/bert_config.json --init_checkpoint=$BERT_BASE_DIR/bert_model.ckpt --max_seq_length=128 --train_batch_size=32 --learning_rate=2e-5 --num_train_epochs=3.0 --output_dir=/tmp/mrpc_output

## Алгоритм отслеживания центроидов
