In [None]:
import os

from azure.cognitiveservices.vision.customvision.training import CustomVisionTrainingClient 
from azure.cognitiveservices.vision.customvision.training.models import ImageFileCreateBatch, ImageFileCreateEntry, Region
from msrest.authentication import ApiKeyCredentials

import xml.dom.minidom

In [2]:
cv_endpoint = "***" # get endpoint from https://www.customvision.ai/
training_key = "***" # get training key from https://www.customvision.ai/ 
project_id = "***" # get project id from https://www.customvision.ai/ 
images_dir = "Images" # extracted images.tar directory - http://vision.stanford.edu/aditya86/ImageNetDogs/images.tar
annotations_dir = "Annotation" # extracted annotation.tar directory - http://vision.stanford.edu/aditya86/ImageNetDogs/annotation.tar

In [3]:
ids_dict = {dirname.split('-')[0]: dirname.split('-')[1] for dirname in os.listdir(images_dir)}

In [4]:
credentials = ApiKeyCredentials(in_headers={"Training-key": training_key})
trainer = CustomVisionTrainingClient(cv_endpoint, credentials)
project = trainer.get_project(project_id)
project.name

'AiOnAzureDogs'

In [5]:
existing_tags = trainer.get_tags(project.id)
print('Found %d existsing tags' % len(existing_tags))

Found 120 existsing tags


In [6]:
tags = []
for tag_name in ids_dict.values():
    maybe_tag = [tag for tag in existing_tags if tag.name == tag_name]
    if maybe_tag:
        tags.append(maybe_tag[0])
    else:
        new_tag = trainer.create_tag(project.id, tag_name)
        tags.append(new_tag)
print('%d tags loaded' % len(tags))

120 tags loaded


In [7]:
images = []
for dir_name in os.listdir(images_dir):
    images_dir_path = os.path.join(images_dir, dir_name)
    annotations_dir_path = os.path.join(annotations_dir, dir_name)
    tag_name = dir_name.split('-')[1]
    tag = [t for t in tags if t.name == tag_name][0]
      
    for image_filename in os.listdir(images_dir_path):
        image_name = image_filename.split('.')[0]
        image_path = os.path.join(images_dir_path, image_filename)
        annotation_path = os.path.join(annotations_dir_path, image_name)
        annotation_xml = xml.dom.minidom.parse(annotation_path)
        
        object_xml = annotation_xml.getElementsByTagName('object')[0]
        breed = object_xml.getElementsByTagName('name')[0].firstChild.nodeValue
        left = int(object_xml.getElementsByTagName('xmin')[0].firstChild.nodeValue)
        top = int(object_xml.getElementsByTagName('ymin')[0].firstChild.nodeValue)
        width = int(object_xml.getElementsByTagName('xmax')[0].firstChild.nodeValue) - left
        height = int(object_xml.getElementsByTagName('ymax')[0].firstChild.nodeValue) - top
        
        region = Region(tag_id=tag.id, left=left, top=top, width=width, height=height)
        with open(image_path, mode="rb") as image_contents:
            azure_image = ImageFileCreateEntry(name=image_name, contents=image_contents.read(), regions=[region])
            images.append(azure_image)
        
print('Created %d images' % len(images))

Created 20580 images


In [10]:
from math import ceil
import time

batch_size = 50
for batch_idx in range(ceil(len(images)/batch_size)):
    min_idx = batch_idx*batch_size
    max_idx = min([(batch_idx+1)*batch_size, len(images)])
    
    upload_result = trainer.create_images_from_files(project.id, ImageFileCreateBatch(images=images[min_idx:max_idx]))
    if not upload_result.is_batch_successful:
        print("Image batch upload failed. Batch idx: %d" % batch_idx)
        for image in upload_result.images:
            if image.status != 'OK':
                print("Image ", image.id, ", status: ", image.status)
    time.sleep(1)

print('uploaded %d images' % trainer.get_image_count(project.id))

uploaded 20491 images
