## Import all Libraries

In [1]:
# AWS Rekognition to get bbox
import numpy as np
import boto3
from PIL import Image, ImageDraw, ExifTags, ImageColor, ImageFont
from matplotlib import pyplot as plt
from utils.rekognition import determine_color, draw_animal_count
import cv2
import time
import math
import os
import io
import json
from utils.config import *
from utils.fix_annotation import *

In [2]:
# process whole image.py to get key points
import mmcv
from mmcv.parallel import collate, scatter
from mmcv.runner import load_checkpoint
import torch as tr
#from torchvision import transforms
from mmpose.apis import (inference, inference_top_down_pose_model, init_pose_model,
                         vis_pose_result)
from mmpose.models import build_posenet
from mmpose.datasets.pipelines import Compose

FNT = ImageFont.truetype('/usr/share/fonts/default/Type1/n019004l.pfb', 23)


## Get Bounding Boxes from Video Frames

In [3]:
class LoadImage:
    """A simple pipeline to load image."""

    def __init__(self, color_type='color', channel_order='rgb'):
        self.color_type = color_type
        self.channel_order = channel_order

    def __call__(self, results):
        """Call function to load images into results.
        Args:
            results (dict): A result dict contains the img_or_path.
        Returns:
            dict: ``results`` will be returned containing loaded image.
        """
        if isinstance(results['img_or_path'], str):
            results['image_file'] = results['img_or_path']
            img = mmcv.imread(results['img_or_path'], self.color_type,
                              self.channel_order)
        elif isinstance(results['img_or_path'], np.ndarray):
            results['image_file'] = ''
            if self.color_type == 'color' and self.channel_order == 'rgb':
                img = cv2.cvtColor(results['img_or_path'], cv2.COLOR_BGR2RGB)
        else:
            raise TypeError('"img_or_path" must be a numpy array or a str or '
                            'a pathlib.Path object')
        results['img'] = img
        return results

In [4]:
def init_pose_model(config, checkpoint=None, device='cuda:0'):
    """Initialize a pose model from config file.
    Args:
        config (str or :obj:`mmcv.Config`): Config file path or the config
            object.
        checkpoint (str, optional): Checkpoint path. If left as None, the model
            will not load any weights.
    Returns:
        nn.Module: The constructed detector.
    """
    if isinstance(config, str):
        config = mmcv.Config.fromfile(config)
    elif not isinstance(config, mmcv.Config):
        raise TypeError('config must be a filename or Config object, '
                        f'but got {type(config)}')
    config.model.pretrained = None
    model = build_posenet(config.model)
    if checkpoint is not None:
        # load model checkpoint
        load_checkpoint(model, checkpoint, map_location=device)
    # save the config in the model for convenience
    model.cfg = config
    model.to(device)
    model.eval()
    return model

In [5]:
def _box2cs(cfg, box):
    """This encodes bbox(x,y,w,h) into (center, scale)
    Args:
        x, y, w, h
    Returns:
        tuple: A tuple containing center and scale.
        - np.ndarray[float32](2,): Center of the bbox (x, y).
        - np.ndarray[float32](2,): Scale of the bbox w & h.
    """

    x, y, w, h = box[:4]
    input_size = cfg.data_cfg['image_size']
    aspect_ratio = input_size[0] / input_size[1]
    center = np.array([x + w * 0.5, y + h * 0.5], dtype=np.float32)

    if w > aspect_ratio * h:
        h = w * 1.0 / aspect_ratio
    elif w < aspect_ratio * h:
        w = h * aspect_ratio

    # pixel std is 200.0
    scale = np.array([w / 200.0, h / 200.0], dtype=np.float32)

    scale = scale * 1.25

    return center, scale

In [6]:
def process_model(model, dataset, person_results, img_or_path):
    bboxes = np.array([box['bbox'] for box in person_results])
    cfg = model.cfg
    flip_pairs = None
    device = next(model.parameters()).device
    channel_order = cfg.test_pipeline[0].get('channel_order', 'rgb')
    test_pipeline = [LoadImage(channel_order=channel_order)] + cfg.test_pipeline[1:]
    test_pipeline = Compose(test_pipeline)
    if dataset == 'AnimalHorse10Dataset':
        flip_pairs = []
    else:
        raise NotImplementedError()
    batch_data = []
    for bbox in bboxes:
        center, scale = _box2cs(cfg, bbox)
        # prepare data
        data = {
            'img_or_path':
            img_or_path,
            'center':
            center,
            'scale':
            scale,
            'bbox_score':
            bbox[4] if len(bbox) == 5 else 1,
            'bbox_id':
            0,  # need to be assigned if batch_size > 1
            'dataset':
            dataset,
            'joints_3d':
            np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
            'joints_3d_visible':
            np.zeros((cfg.data_cfg.num_joints, 3), dtype=np.float32),
            'rotation':
            0,
            'ann_info': {
                'image_size': np.array(cfg.data_cfg['image_size']),
                'num_joints': cfg.data_cfg['num_joints'],
                'flip_pairs': flip_pairs
            }
        }
        data = test_pipeline(data)
        batch_data.append(data)
    batch_data = collate(batch_data, samples_per_gpu=1)
    if next(model.parameters()).is_cuda:
        # scatter not work so just move image to cuda device
        batch_data['img'] = batch_data['img'].to(device)
    # get all img_metas of each bounding box
    batch_data['img_metas'] = [
        img_metas[0] for img_metas in batch_data['img_metas'].data
    ]

    with tr.no_grad():
        result = model(
            img=batch_data['img'],
            #img = torch_data,
            img_metas=batch_data['img_metas'],
            return_loss=False,
            return_heatmap=False)
    return result['preds'], result['output_heatmap']

In [7]:
device = tr.device("cuda:0" if tr.cuda.is_available() else "cpu")


model_head = init_pose_model(config='../myConfigs/train_head_resnet.py', checkpoint='../temp_logs/cattle_head/resnet/best.pth', device = device)
model_spine = init_pose_model(config='../myConfigs/train_spine_hrnet.py', checkpoint='../temp_logs/cattle_spine/hrnet/best.pth', device = device)
model_tail = init_pose_model(config='../myConfigs/train_tail_ori_hrnet.py', checkpoint='../temp_logs/cattle_tail_ori/hrnet/best.pth', device = device)
model_leg_front = init_pose_model(config='../myConfigs/train_leg_front_hrnet.py', checkpoint='../temp_logs/cattle_leg_front/hrnet/best.pth', device = device)
model_leg_back = init_pose_model(config='../myConfigs/train_leg_back_hrnet.py', checkpoint='../temp_logs/cattle_leg_back/hrnet/best.pth', device = device)

dataset_head = model_head.cfg.data['test']['type']
dataset_spine = model_spine.cfg.data['test']['type']
dataset_tail = model_tail.cfg.data['test']['type']
dataset_leg_front = model_leg_front.cfg.data['test']['type']
dataset_leg_back = model_leg_back.cfg.data['test']['type']

Use load_from_local loader
Use load_from_local loader
Use load_from_local loader
Use load_from_local loader
Use load_from_local loader


In [8]:
def get_kp_color(label):
    # BGR
    color = (0, 0, 255)
    if label == 'Head':
        color = ['#EC51F8', '#74F54B', 
                 '#EC51F8', '#74F54B',
                 '#4394F9', '#F49736',
                 '#F49736', '#FFFB56',
                 '#FFFB56', '#4394F9',
                 '#07178D']
    elif label == 'Spine':
          color = ['#4394F9', '#4394F9', '#4394F9',
                  '#4394F9', '#4394F9', '#4394F9', 
                  '#4394F9', '#4394F9', '#24518D']
    elif label == 'Tail':
        color = ['#EC51F8', '#EC51F8',
                '#EC51F8', '#EC51F8',
                '#EC51F8', '#892B8E']
    elif label == 'Leg_front':
        color = ['#F49736', '#F49736',
                '#F49736', '#F49736',
                '#F49736', '#F49736',
                '#F49736', '#F49736',
                '#F49736', '#8C551E']
    elif label == 'Leg_back':
        color = ['#74F54B', '#74F54B',
                '#74F54B', '#74F54B',
                '#74F54B', '#74F54B',
                '#74F54B', '#74F54B',
                '#74F54B', '#3F8D28',]
    return color

In [9]:
def get_skeleton(label):
    skeleton_list = []
    if label == 'Head':
        skeleton_list = [[4, 0], [4, 2], [0, 2], [1, 3], 
                        [5, 6], [7, 8], [0, 1], [1, 5],
                        [5, 7], [7, 9], [2, 3], [3, 6],
                        [6, 8], [8, 9], [4, 9]]
    elif label == 'Spine':
        skeleton_list = [[0, 1], [1, 2], [2, 3], [3, 4],
                        [4, 5], [5, 6], [6, 7]]
    elif label == 'Tail':
        skeleton_list = [[0, 1], [1, 2], [2, 3], [3, 4]]
    elif label == 'Leg_front':
        skeleton_list = [[0, 1], [1, 2], [2, 3], [3, 4],
                        [4, 5], [5, 6], [6, 7], [7, 8]]
    elif label == 'Leg_back':
        skeleton_list = [[0, 1], [1, 2], [2, 3], [3, 4],
                        [4, 5], [5, 6], [6, 7], [7, 8]]
    return skeleton_list

In [10]:
def rgb_to_bgr(color):
    color = list(color)
    temp_r = color[0]
    color[0] = color[2]
    color[2] = temp_r
    return tuple(color)

In [11]:
def vis_pose(points, draw, label):
    points = points[0]
#     if label == 'Tail' or label == 'Leg_front' or label == 'Leg_back':
#         print(label)
#         print(points)
#     if label == 'Leg_front' or label == 'Leg_back':
#         return draw
    CS_THR = 0.4
    # keypoints
    kp_color = get_kp_color(label)
    # connect line
    skeleton_list = get_skeleton(label)
    for ske in skeleton_list:
        #print(points)
        fir_pt_x, fir_pt_y, fir_pt_p = points[ske[0]]
        sec_pt_x, sec_pt_y, sec_pt_p = points[ske[1]]
        if fir_pt_p > CS_THR and sec_pt_p > CS_THR:
            shape = [(fir_pt_x, fir_pt_y), (sec_pt_x, sec_pt_y)]
            draw.line(shape, fill=kp_color[-1], width=8)
    for i, point in enumerate(points):
        x, y, p = point
        if p > CS_THR:
            x = int(x)
            y = int(y)
            draw.ellipse([(x-11, y-11), (x+11, y+11)], fill=kp_color[-1], outline=None)
            draw.ellipse([(x-6, y-6), (x+6, y+6)], fill=kp_color[i], outline=None)
            #draw.text((x-40, y-40), '{}%'.format(int(p*100)), font=FNT, fill=(255, 255, 255))
    return draw

In [12]:
def extend_bbox(left, top, width, height, extend_rate):
    temp_left = left - left * extend_rate
    temp_top = top - top * extend_rate
    temp_width = width * extend_rate + width
    temp_height = height * extend_rate + height
    return temp_left, temp_top, temp_width, temp_height

In [13]:
def get_color(name):
    
    if name == 'Cow':
        color = '#FF9300'
    elif name == 'Head' or name == "Head Left" or name == "Head Right":
        color = '#0096FF'
    elif name == 'Tag':
        color = '#00FFFF'
    elif name == 'Knee':
        color = '#FFFB00'
    elif name == 'Hoof':
        color = '#00F900'
    elif name == 'Tail':
        color = '#FF40FF'
    elif name == 'Side Left' or name == 'Side Right':
        color = '#FF2600'
    elif name == 'Udder':
        color = '#9437FF'
    elif name == 'Teat':
        color = '#FF2F92'
    else:
        color = '#000000'

    return color

def get_opacity(name):
    
    if name == 'Cow':
        opacity = 0.3
    elif name == 'Tag':
        opacity = 0.3
    elif name == 'Head' or name == "Head Left" or name == "Head Right":
        opacity = 0.45
    elif name == 'Knee':
        opacity = 0.3
    elif name == 'Hoof':
        opacity = 0.35
    elif name == 'Tail':
        opacity = 0.3
    elif name == 'Side Left' or name == 'Side Right':
        opacity = 0.3
    elif name == 'Udder':
        opacity = 0.35
    elif name == 'Teat':
        opacity = 0.3
    else:
        opacity = 0.0

    return opacity

In [14]:
def get_confidence_cut_off(name):

    if name == 'Cow':
        confidence = 79.4
    elif name == 'Tag':
        confidence = 86.9
    elif name == 'Head':
        confidence = 92.5
    elif name == 'Knee':
        confidence = 78.0
    elif name == 'Hoof':
        confidence = 92.9
    elif name == 'Tail':
        confidence = 73.5
    elif name == 'Udder':
        confidence = 35.0
    elif name == 'Teat':
        confidence = 73.0
    else:
        confidence = 80.0

    return confidence

In [15]:
tail_count = 0
#draw response
def draw_response(image, response, animal_target, draw_boundary=True, fill=True, draw_btn=True):
    global tail_count
    tail_check = False
    temp_image = image.copy()
    b, g, r = image.split()
    image = Image.merge("RGB", (r, g, b))
    # original image size
    draw = ImageDraw.Draw(image, mode='RGBA')
    # bbox
    for customLabel in response['CustomLabels']:
        if 'Geometry' in customLabel:
            box = customLabel['Geometry']['BoundingBox']

            left, top, width, height = extend_bbox(box['Left'], box['Top'], box['Width'], box['Height'], 0)
            label = customLabel['Name']
            if label == 'Udder':
                print('Udder')
            elif label == 'Teat':
                print('Teat')
            conf_cut = get_confidence_cut_off(label)
            # skip current label
            if customLabel['Confidence'] < conf_cut:
                continue
            #draw bbox
            color = get_color(label)
            opacity = round(get_opacity(label) * 255)
            if draw_boundary and fill:  
                draw.rectangle(xy=[(left, top), (left+width, top+height)], outline=color, fill=color+f'{opacity:0>2X}', width=3)
            elif fill:
                draw.rectangle(xy=[(left, top), (left+width, top+height)], outline=None, fill=color+f'{opacity:0>2X}', width=3)
            elif draw_boundary:
                draw.rectangle(xy=[(left, top), (left+width, top+height)], outline=color, fill=None, width=3)
            if draw_btn:
                text_width, text_height = FNT.getsize(label)
                draw.rectangle(xy=[(left, top), (left+text_width, top+text_height)], outline=None, fill=color, width=3)
                draw.text((left, top), label, fill='#000000', font=FNT)
    #keypoints
    for customLabel in response['CustomLabels']:
        if 'Geometry' in customLabel:
            box = customLabel['Geometry']['BoundingBox']
            left, top, width, height = extend_bbox(box['Left'], box['Top'], box['Width'], box['Height'], 0)
            label = customLabel['Name']
            conf_cut = get_confidence_cut_off(label)
            # skip current label
            if customLabel['Confidence'] < conf_cut:
                continue
        #***** Keypoints
            if label == 'Head':
                extend_rate = 0.01
                np_image = np.array(temp_image)                
                head_bbox = list(extend_bbox(box['Left'], box['Top'], box['Width'], box['Height'], extend_rate))
                head_result = []
                head_result.append({'bbox': head_bbox})
                preds, _ = process_model(model_head, dataset_head, head_result, np_image)

                draw = vis_pose(preds, draw, 'Head')
            elif label == 'Cow':
                extend_rate = 0.01
                np_image = np.array(temp_image)
                cow_bbox = list(extend_bbox(box['Left'], box['Top'], box['Width'], box['Height'], extend_rate))
                cow_result = []
                cow_result.append({'bbox': cow_bbox})
                # spine
                preds, _ = process_model(model_spine, dataset_spine, cow_result, np_image)
                draw = vis_pose(preds, draw, 'Spine')
                # leg front
                preds, _ = process_model(model_leg_front, dataset_leg_front, cow_result, np_image)
                draw = vis_pose(preds, draw, 'Leg_front')
                # leg back
                preds, _ = process_model(model_leg_back, dataset_leg_back, cow_result, np_image)
                draw = vis_pose(preds, draw, 'Leg_back')
            elif label == 'Tail':
                extend_rate = 0.20
                np_image = np.array(temp_image)
                tail_bbox = list(extend_bbox(box['Left'], box['Top'], box['Width'], box['Height'], extend_rate))
                tail_result = []
                tail_result.append({'bbox': tail_bbox})
                preds, _ = process_model(model_tail, dataset_tail, tail_result, np_image)
                draw = vis_pose(preds, draw, 'Tail')
                tail_check = True
#*****
    

    img = np.asarray(image)[:,:,::-1].copy()
    inferred_frame = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return inferred_frame

In [16]:
def analyzeVideo(src_video, src_bbox_json, src_img_dir, output_file, fps=5):
    
    start = time.time()
        #imgWidth, imgHeight = image.size
    with Image.open(src_img_dir+'0.jpg') as img:
        imgWidth, imgHeight = img.size
        imgSize = (imgWidth, imgHeight)
        img.close()
    cap = cv2.VideoCapture(src_video)
    frameRate = cap.get(fps) #frame rate
    print('FrameRate:', frameRate)
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    videoWriter = cv2.VideoWriter(output_file, fourcc, frameRate, imgSize) 
    
    with open(src_bbox_json) as bbox_json:
        bbox_frames = json.load(bbox_json)
        for frameId, bbox_data in enumerate(bbox_frames['Frames']):
            # get each image frame
            with Image.open(src_img_dir+str(frameId)+'.jpg') as img:
                inferred_frame = draw_response(img, bbox_data, animal_target='cow')
                inferred_frame = cv2.cvtColor(inferred_frame, cv2.COLOR_BGR2RGB)
                # check each 50 frame
                if frameId % 50 == 0:
                    print("Finish Processing {} frame".format(frameId))
                    plt.imshow(inferred_frame)
                    plt.title("Frame {}".format(int(frameId)))
                    plt.savefig('debug_imgs/check_{}.jpg'.format(frameId), dpi=200)
                    lap = time.time()
                    print('lap time: ', lap - start)
                videoWriter.write(inferred_frame)
                img.close()

    videoWriter.release()
    cv2.destroyAllWindows()
    bbox_json.close()
    
    #end time
    end = time.time()
    print('total time lapse', end - start)

In [17]:
#, 'cattle_multi_1'
video_name_list = ['cattle_multi_1']
video_format = ['.mov']
for v_idx, video in enumerate(video_name_list):
    src_video = 'video_data/input_video/'+video+video_format[v_idx]
    src_bbox_json = 'json_data/'+video+'_new_bbox.json'
    src_img_dir = 'frame_img/'+video+'/'
    output_video = 'video_data/inferred_video/inferred_fixed_'+video+'.mp4'
    print(output_video)
    analyzeVideo(src_video, src_bbox_json, src_img_dir, output_video)
    print('finished analyzing the video '+video)
    print()

video_data/inferred_video/inferred_fixed_cattle_multi_1.mp4
FrameRate: 30.006466910972193
Finish Processing 0 frame
lap time:  3.503347635269165
Finish Processing 50 frame
lap time:  109.85330510139465
Finish Processing 100 frame
lap time:  226.45562505722046
Finish Processing 150 frame
lap time:  333.6754539012909
Finish Processing 200 frame
lap time:  435.8921248912811
Finish Processing 250 frame
lap time:  540.8767602443695
Finish Processing 300 frame
lap time:  666.1351451873779
Finish Processing 350 frame
lap time:  785.4227914810181
Finish Processing 400 frame
lap time:  889.8876004219055
Finish Processing 450 frame
lap time:  1017.3974685668945
Finish Processing 500 frame
lap time:  1160.2743360996246
Finish Processing 550 frame
lap time:  1297.996387720108
Finish Processing 600 frame
lap time:  1404.5304102897644
Finish Processing 650 frame
lap time:  1485.1052141189575
Finish Processing 700 frame
lap time:  1569.8113861083984
Finish Processing 750 frame
lap time:  1669.6180660