In [1]:
import os
import errno
import argparse
import numpy as np
import cv2
import torch
import torch.nn as nn
import torchvision
import torchvision.datasets as datasets
import torchvision.transforms as transforms
import torchvision.transforms.functional as F
from torchvision import models

In [2]:
##

def _run_in_batches(f, data_dict, out, batch_size):
    data_len = len(out)
    num_batches = int(data_len / batch_size)

    s, e = 0, 0
    for i in range(num_batches):
        s, e = i * batch_size, (i + 1) * batch_size
        batch_data_dict = {k: v[s:e] for k, v in data_dict.items()}
        out[s:e] = f(batch_data_dict)
    if e < len(out):
        batch_data_dict = {k: v[e:] for k, v in data_dict.items()}
        out[e:] = f(batch_data_dict)
        
        
"""     def __call__(self, data_x, batch_size=32):
            out = np.zeros((len(data_x), self.feature_dim), np.float32)
            _run_in_batches(
            lambda x: self.session.run(self.output_var, feed_dict=x),
            {self.input_var: data_x}, out, batch_size)
            return out
"""

In [3]:
##Extrahiert Bounding Boxen

def extract_image_patch(image, bbox, patch_shape):
    bbox = np.array(bbox)
    if patch_shape is not None:
        ##correct aspect ratio to patch shape
        target_aspect = float(patch_shape[1]) / patch_shape[0]
        new_width = target_aspect * bbox[3]
        bbox[0] -= (new_width - bbox[2]) / 2
        bbox[2] = new_width

    ##convert to top left, bottom right
    bbox[2:] += bbox[:2]
    bbox = bbox.astype(np.int)

    ##clip at image boundaries
    bbox[:2] = np.maximum(0, bbox[:2])
    bbox[2:] = np.minimum(np.asarray(image.shape[:2][::-1]) - 1, bbox[2:])
    if np.any(bbox[:2] >= bbox[2:]):
        return None
    sx, sy, ex, ey = bbox
    image = image[sy:ey, sx:ex]
    image = cv2.resize(image, tuple(patch_shape[::-1]))
    return image

"""
    def encoder(image, boxes):
        image_patches = []
        for box in boxes:
            patch = extract_image_patch(image, box, image_shape[:2])
            if patch is None:
                print("WARNING: Failed to extract image patch: %s." % str(box))
                patch = np.random.uniform(
                    0., 255., image_shape).astype(np.uint8)
            image_patches.append(patch)
        image_patches = np.asarray(image_patches)
        return image_encoder(image_patches, batch_size)
"""

In [4]:
##normalisiert die bboxen für torchvision

def normalize_image(image):
    transform = transforms.Compose([
        transforms.Resize(256),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
    image_normalized = transform(image)
    image_normalized = image_normalized.unsqueeze(0)
    return image_normalized

In [5]:
##CNN RESNET50

def ResNet50(image_normalized):
    cnn = torchvision.models.resnet50(pretrained=True)
    cnn = torch.nn.Sequential(*(list(cnn.children())[:-1])) 
    features = cnn(image_normalized)
    result.view(2048)
    return features

In [6]:
##command line arguments

def parse_args():
    parser = argparse.ArgumentParser(description="RESNET50 features encoder")
    parser.add_argument(
        "--mot_dir", 
        help="Path to MOTChallange directory (train or test)",
        required=True)
    parser.add_argument(
        "--output_dir", 
        help="Output directory. Will be created if it does not exist.", 
        required=True)
    return parser.parse_args()

In [None]:
##main function

def main():
    args = parse_args()


if __name__ == "__main__":
    main()