# Satcen Dataset Handling

Perform any processing that may be required on the Satcen dataset.

## Add New Files

Handle the addition of newly-received images without skiffs.

### Rename the Files

In [15]:
import os
import pathlib

# path to satcen_dataset (contains "full" and "original" subfolders)
satcen_path = pathlib.Path('../satcen_dataset/').resolve()

# rename new files
for f in os.listdir(satcen_path/'full/pictures'):

    # check if file has original name
    if 'tudor' in f:
        os.rename(satcen_path/'full/pictures'/f, satcen_path/'full/pictures'/f[6:])

### Construct the Labels

In [51]:
import json

# load the annotations of the original images
file = open(satcen_path/'original/SatCen_skiffs256.json')
labels = json.load(file)['batch']['annotations']

# reformat original annotations
labels = list(map(lambda x: {
    'name': x['name'],
    'bounding_boxes': [box['data'] for box in x['objects']]
}, labels))

# new images do not contain any ships, add bounding_boxes: []
# for the corresponding entry in the labels list
# new images contain "chip" in the beginning of the filename
pictures = os.listdir(satcen_path/'full/pictures')
for f in pictures:

    # new image
    if 'chip' in f:
        labels.append({
            'name': f,
            'bounding_boxes': []
        })

# calculate number of positive and negative observations
no_pos_obs = len(list(filter(lambda x: len(x['bounding_boxes']) > 0, labels)))
no_neg_obs = len(list(filter(lambda x: len(x['bounding_boxes']) == 0, labels)))

# print stats
print(f'Total number of labels: {len(labels)}')
print(f'Positive observations: {no_pos_obs}')
print(f'Negative observations: {no_neg_obs}')

# save json
labels_path = satcen_path/'full/labels.json'

# delete if already exists
if 'labels.json' in os.listdir():
    os.remove(labels_path)

# write to file
with open(labels_path, 'w') as f:
    json.dump(labels, f)

Total number of labels: 9420
Positive observations: 1525
Negative observations: 7895
