## Images path
Point to the folder where the images are

In [1]:
images_path = "../../ImagenesPractica/xview_recognition/"

## Define categories
Define the categories of the images (DON'T MODIFY)

In [19]:
categories = {13: 'CARGO_PLANE', 15: 'HELICOPTER', 18: 'SMALL_CAR', 19: 'BUS', 23: 'TRUCK', 41: 'MOTORBOAT', 47: 'FISHING_VESSEL', 60: 'DUMP_TRUCK', 64: 'EXCAVATOR', 73: 'BUILDING', 86: 'STORAGE_TANK', 91: 'SHIPPING_CONTAINER'}

## Transformations for data augmentation
Operations of rotation and flip.  
For each image we will rotate 90 degrees 3 times, then we will flip the original image and rotate it 90 degrees another 3 times more.  
For each image we will get 8 more.

In [5]:
def rotate_left(img):
    rotated_image = cv2.rotate(img, cv2.ROTATE_90_COUNTERCLOCKWISE)
    return rotated_image

def rotate_right(img):
    rotated_image = cv2.rotate(img, cv2.ROTATE_90_CLOCKWISE)
    return rotated_image

def rotate_180(img):
    rotated_image = cv2.rotate(img, cv2.ROTATE_180)
    return rotated_image

def flip_image(img):
    flipped_image = cv2.flip(img, 1)
    return flipped_image

In [63]:
def save_image(my_image, filename):
    cv2.imwrite(filename, my_image)

## Code to perform data augmentation for an image
This code will create the 8 different images and also create the json data for each transformation

In [64]:
import cv2

def create_img_json(base_image_id, base_file_name, new_ending):
    new_img_json = {
        'id': base_image_id + new_ending,
        'file_name': base_file_name + new_ending + '.tif',
        'width': 224, 
        'height': 224
    }
    return new_img_json

def create_ann_json(base_image_id, category_id, new_ending):
    new_ann_json = {
        'id': 1,
        'image_id': base_image_id + new_ending,
        'category_id': category_id,
        'bbox': [0, 0, 224, 224], 
        'iscrowd': 0
    }
    return new_ann_json

def create_new_images(json_img, json_ann):
    new_json_imgs = []
    new_json_anns = []
    
    base_image_id = json_img['id']
    base_file_name = json_img['file_name'].replace(".tif","")
    category_id = json_ann['category_id']
    
    # Read original image
    original_image = cv2.imread(base_file_name+".tif")
    
    # Rotate image left and save image and create new json data
    rotated_left_image = rotate_left(original_image)
    # save_image(rotated_left_image, base_file_name+'rli.tif')
    rli_img_json = create_img_json(base_image_id, base_file_name, 'rli')
    rli_ann_json = create_ann_json(base_image_id, category_id, 'rli')
    new_json_imgs.append(rli_img_json)
    new_json_anns.append(rli_ann_json)
    
    # Rotate image right and save image and create new json data
    rotated_right_image = rotate_right(original_image)
    # save_image(rotated_right_image, base_file_name+'rri.tif')
    rri_img_json = create_img_json(base_image_id, base_file_name, 'rri')
    rri_ann_json = create_ann_json(base_image_id, category_id, 'rri')
    new_json_imgs.append(rri_img_json)
    new_json_anns.append(rri_ann_json)
    
    # Rotate image 180 degrees and save image and create new json data
    rotated_180_image = rotate_180(original_image)
    # save_image(rotated_180_image, base_file_name+'rii.tif')
    rii_img_json = create_img_json(base_image_id, base_file_name, 'rii')
    rii_ann_json = create_ann_json(base_image_id, category_id, 'rii')
    new_json_imgs.append(rii_img_json)
    new_json_anns.append(rii_ann_json)
    
    # Flip and save image and create new json data
    flipped_image = flip_image(original_image)
    # save_image(flipped_image, base_file_name+'fi.tif')
    fi_img_json = create_img_json(base_image_id, base_file_name, 'fi')
    fi_ann_json = create_ann_json(base_image_id, category_id, 'fi')
    new_json_imgs.append(fi_img_json)
    new_json_anns.append(fi_ann_json)
    
    # Rotate fliped left and save image and create new json data
    flipped_rotated_left_image = rotate_left(flipped_image)
    # save_image(flipped_rotated_left_image, base_file_name+'fli.tif')
    fli_img_json = create_img_json(base_image_id, base_file_name, 'fli')
    fli_ann_json = create_ann_json(base_image_id, category_id, 'fli')
    new_json_imgs.append(fli_img_json)
    new_json_anns.append(fli_ann_json)
    
    # Rotate flipped right and save image and create new json data
    flipped_rotated_right_image = rotate_right(flipped_image)
    # save_image(flipped_rotated_right_image, base_file_name+'fri.tif')
    fri_img_json = create_img_json(base_image_id, base_file_name, 'fri')
    fri_ann_json = create_ann_json(base_image_id, category_id, 'fri')
    new_json_imgs.append(fri_img_json)
    new_json_anns.append(fri_ann_json)
    
    # Rotate flipped 180 degrees and save image and create new json data
    flipped_rotated_180_image = rotate_180(flipped_image)
    # save_image(flipped_rotated_180_image, base_file_name+'fii.tif')
    fii_img_json = create_img_json(base_image_id, base_file_name, 'fii')
    fii_ann_json = create_ann_json(base_image_id, category_id, 'fii')
    new_json_imgs.append(fii_img_json)
    new_json_anns.append(fii_ann_json)
    
    return new_json_imgs, new_json_anns

## Load our data base
Be sure to pick the correct json. DO THIS WITH **train.json**

In [65]:
import json
# Load database
json_file = ''
with open(json_file) as ifs:
    json_data = json.load(ifs)
ifs.close()

## Let's take a look at our data
We have a lot of images for SMALL_CAR, BUS, TRUCK, and BUILDING. We need more of the rest of categories.  
The output of this cell shows the frequency of each class **before** data augmentation.

In [66]:
import json
import numpy as np
counts = dict.fromkeys(categories.values(), 0)
for json_img, json_ann in zip(json_data['images'], json_data['annotations']):
    obj.category = list(categories.values())[json_ann['category_id']-1]
    counts[obj.category] += 1

print(counts)

{'CARGO_PLANE': 628, 'HELICOPTER': 49, 'SMALL_CAR': 195133, 'BUS': 6549, 'TRUCK': 10640, 'MOTORBOAT': 1231, 'FISHING_VESSEL': 736, 'DUMP_TRUCK': 1238, 'EXCAVATOR': 706, 'BUILDING': 283491, 'STORAGE_TANK': 1462, 'SHIPPING_CONTAINER': 1522}


## Data augmentation
We will perform data augmentation now. Don't run this more than once.

In [67]:
import json
import numpy as np
counts = dict.fromkeys(categories.values(), 0)
anns = []
new_images = []
new_annotations = []
for json_img, json_ann in zip(json_data['images'], json_data['annotations']):
    
    obj.category = list(categories.values())[json_ann['category_id']-1]
    
    # Data augmentation for categories that have low images
    if obj.category not in ['SMALL_CAR', 'BUS', 'TRUCK', 'BUILDING']:
        augmented_images, augmented_annotations = create_new_images(json_img, json_ann)
        new_images += augmented_images
        new_annotations += augmented_annotations
        
    counts[obj.category] += 1
    
json_data['images'] += new_images
json_data['annotations'] += new_annotations

with open('your_file.json', "w") as file:
    json.dump(json_data, file)

{'CARGO_PLANE': 628, 'HELICOPTER': 49, 'SMALL_CAR': 195133, 'BUS': 6549, 'TRUCK': 10640, 'MOTORBOAT': 1231, 'FISHING_VESSEL': 736, 'DUMP_TRUCK': 1238, 'EXCAVATOR': 706, 'BUILDING': 283491, 'STORAGE_TANK': 1462, 'SHIPPING_CONTAINER': 1522}


## Results
In the below cell the new frequencies are shown after the data augmentation

In [69]:
import json
import numpy as np
counts = dict.fromkeys(categories.values(), 0)
for json_img, json_ann in zip(json_data['images'], json_data['annotations']):
    obj.category = list(categories.values())[json_ann['category_id']-1]
    counts[obj.category] += 1

print(counts)

{'CARGO_PLANE': 5024, 'HELICOPTER': 392, 'SMALL_CAR': 195133, 'BUS': 6549, 'TRUCK': 10640, 'MOTORBOAT': 9848, 'FISHING_VESSEL': 5888, 'DUMP_TRUCK': 9904, 'EXCAVATOR': 5648, 'BUILDING': 283491, 'STORAGE_TANK': 11696, 'SHIPPING_CONTAINER': 12176}
