In [1]:
# import libraries
import boto3
from boto3.dynamodb.conditions import Key, Attr
import botocore
import json
import os
import io
import random
import time
from tqdm import tqdm
from PIL import Image

# Step 1. Set Configuration

In [142]:
# Convert annotation to Yolo format
def convert_Annotation2YoloFormat(bbox, size):    
    score = bbox['score']
    # check if score is greater than threshold
    if score > score_threshold:
        width, height = size
        xmin = bbox['left']
        xmax = bbox['right']
        ymin = bbox['top']
        ymax = bbox['bottom']
        dw = 1./width
        dh = 1./height
        x = float(xmax + xmin) / 2.0 # to convert Decimal to float
        y = float(ymax + ymin) / 2.0
        w = float(xmax - xmin)
        h = float(ymax - ymin)
        x = round(x * dw, 6)
        y = round(y * dh, 6)
        w = round(w * dw, 6)
        h = round(h * dh, 6)
        class_name = bbox['id']
        try:
            class_id = classes.index(class_name)
        except:
            return None
        return [class_id, x, y, w, h]
    else: # if score is less than threshold, igore this annotation
        return None
    

In [143]:
def buildDataset(dataset, filehandle):
    # loop sub dataset
    pbar = tqdm(dataset)
    for annotation_item in pbar:
        # get objects attribute
        objects = annotation_item['objects']
        
        # check if objects attribute is empty
        if objects is None:
            continue
        
        # get webcam and timestamp
        webcam = annotation_item['webcam']
        timestamp = annotation_item['timestamp'] 
        
        # annotation file name and path
        file_name = webcam + "_" + str(timestamp)
        annotation_file_name =  file_name + ".txt"        
        annotation_file_path = os.path.join(Dataset_Dir, annotation_file_name)
        
        # image file name and path
        image_file_name = file_name + ".JPG"
        image_file_path = os.path.join(Dataset_Dir, image_file_name)
        
        # Key Path
        Key_Path = webcam + "/jpeg"
        
        # update progress bar message
        pbar.set_description("Processing : " + image_file_name)
        
        # S3 bucket object key
        key = Key_Path + "/" + str(timestamp) + ".JPG"
        # check if image already downloaded
        
        try:
            # download file from S3 bucket
            img_object = bucket.Object(key)
            imageData = img_object.get().get('Body').read()
            imageData = io.BytesIO(imageData)

            # get image and size
            img = Image.open(imageData)
            size = img.size

            # get annotations from objects attribute
            annotations = []
            for item in objects:
                annotation = convert_Annotation2YoloFormat(item, size)
                # check if annotation is valid
                if annotation is not None:
                    annotations.append(annotation)
            # print(len(annotations))
            # check if annotation is not empty, this means that image has annotations which are greater than score threshold 
            # for custom classes
            if len(annotations) != 0:
                # save Image as JPEG Format
                img.save(image_file_path, "JPEG")

                # write the annotations in txt file
                annotation_file = open(annotation_file_path, "w")
                for annotation in annotations:
                    annotation_file.write(" ".join([str(x) for x in annotation]) + "\n")
                annotation_file.close()
            filehandle.write(image_file_path + "\n")
        except botocore.exceptions.ClientError as e:
            pass
            # print("The image does not exist.")
        # if the annotation finished, write the timestamp in 'webcam'.txt
        file_index = timestamp_filename_list.index(webcam + ".txt")
        timestamp_file_list[file_index].write(str(timestamp)+"\n")
        pbar.update(1)

In [93]:
# Dataset Directory
Dataset_Dir = 'hirescam'
# custom classes
classes = ['person', 'car', 'bus', 'truck']
# score threshold
score_threshold = 0.5

In [133]:
# AWS Credential
AWS_ACCESS_KEY = "AKIA3L4JZBCA4TAH57G4"
AWS_SECRET_ACCESS_KEY = "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"

In [117]:
# webcam list
webcam_list = ['muellerblaustein01', 'jahnplatz01']

In [118]:
# Bucket Name and Key Path of S3
Bucket_Name = "hirescam-webcams"

# Step 2. Get Data from AWS DynamoDB
<br>
if you resume the annotation, you should loop Step 2 and 3.

In [134]:
# timestamp file names for each webcam
timestamp_filename_list = []
for webcam in webcam_list:
    timestamp_filename_list.append(webcam + ".txt")

In [135]:
print(timestamp_filename_list)

['muellerblaustein01.txt', 'jahnplatz01.txt']


In [136]:
# get last timestamp of previous annotation
timestamp_file_list = []
latest_timestamp_list = []
for filename in timestamp_filename_list:
    timestamp_file = open(filename, "a+")
    timestamp_file_list.append(timestamp_file)
    timestamp_file.seek(0)
    timestamp_lines = timestamp_file.read().splitlines()
    
    # get latest timestamp list for each webcam
    #if timestamp history is empty, set latest timestamp into 0
    if len(timestamp_lines) == 0:
        latest_timestamp_list.append(0)
    else:
        latest_timestamp_list.append(int(timestamp_lines[-1]))

In [137]:
print(latest_timestamp_list)

[1605811202, 0]


In [138]:
# Generate Filter Experession
filter_expression = None
for index, webcam in enumerate(webcam_list):
    if index == 0:
        filter_expression = Key('webcam').eq(webcam) & Key('timestamp').gt(latest_timestamp_list[index])
    else:
        filter_expression |= Key('webcam').eq(webcam) & Key('timestamp').gt(latest_timestamp_list[index])

In [139]:
# get annotations from DynamoDB
dynamodb = boto3.resource('dynamodb', 
                          aws_access_key_id=AWS_ACCESS_KEY, 
                          aws_secret_access_key = AWS_SECRET_ACCESS_KEY, 
                          region_name='eu-central-1'
                         )

table = dynamodb.Table('webcams.analyzer.sagemaker')

response = table.scan(FilterExpression=filter_expression)
data = response['Items']
# # get all items from DynamoDB table
# while 'LastEvaluatedKey' in response:
#     response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'])
#     data.extend(response['Items'])

In [140]:
print(len(data))

805


# Step 3. Annotation

In [141]:
dataset = data
print(len(dataset))

805


In [144]:
# Connet S3
s3 = boto3.resource('s3', 
                          aws_access_key_id=AWS_ACCESS_KEY, 
                          aws_secret_access_key = AWS_SECRET_ACCESS_KEY, 
                          region_name='eu-central-1'
                         )


bucket = s3.Bucket(Bucket_Name)

In [145]:
# train and validataion file
dataset_file_name = 'dataset.txt'
dataset_file_path = os.path.join(Dataset_Dir, dataset_file_name)

dataset_file = open(dataset_file_path, "a+")

In [146]:
# build train set
buildDataset(dataset, dataset_file)

Processing : muellerblaustein01_1605861302.JPG:   9%|███████▊                                                                                  | 70/805 [00:29<05:04,  2.41it/s]


KeyboardInterrupt: 

In [147]:
# if you want to pause and resume above step, need to run this cell
dataset_file.close()
for timestamp_file in timestamp_file_list:
    timestamp_file.close()

# Step 4. Split Train and Valid dataset
This step should run after you did annotation(step 2 and step 3) for all(or enough) images

In [149]:
split_ratio = 0.8

In [150]:
# read dataset images
dataset_file = open(dataset_file_path, "r")
data = dataset_file.readlines()
print(data)
# shuffle dataset
random.shuffle(data)
train_cnt = int(split_ratio * len(data))
train_data = data[:train_cnt]
valid_data = data[train_cnt:]

train_file_name = "train.txt"
train_file_path = os.path.join(Dataset_Dir, train_file_name)
train_file = open(train_file_path, 'w')

valid_file_name = "valid.txt"
valid_file_path = os.path.join(Dataset_Dir, valid_file_name)
valid_file = open(valid_file_path, 'w')

train_file.writelines(filename for filename in train_data)

valid_file.writelines(filename for filename in valid_data)

train_file.close()
valid_file.close()

['hirescam\\muellerblaustein01_1605804002.JPG\n', 'hirescam\\muellerblaustein01_1605805201.JPG\n', 'hirescam\\muellerblaustein01_1605811801.JPG\n']


In [None]:
print(data)