-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4 from NASA-IMPACT/v0_0
Initial codebase for data processing and model infrastructure.
- Loading branch information
Showing
728 changed files
with
187,614 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
item { | ||
id: 1 | ||
name: 'marine_debris' | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,204 @@ | ||
# SSD with Resnet 101 v1 FPN feature extractor, shared box predictor and focal | ||
# loss (a.k.a Retinanet). | ||
# See Lin et al, https://arxiv.org/abs/1708.02002 | ||
# Trained on open image dataset v4, initialized from scratch. | ||
|
||
# This config is TPU compatible | ||
|
||
model { | ||
ssd { | ||
inplace_batchnorm_update: true | ||
freeze_batchnorm: false | ||
num_classes: 1 | ||
box_coder { | ||
faster_rcnn_box_coder { | ||
y_scale: 10.0 | ||
x_scale: 10.0 | ||
height_scale: 5.0 | ||
width_scale: 5.0 | ||
} | ||
} | ||
matcher { | ||
argmax_matcher { | ||
matched_threshold: 0.5 | ||
unmatched_threshold: 0.5 | ||
ignore_thresholds: false | ||
negatives_lower_than_unmatched: true | ||
force_match_for_each_row: true | ||
use_matmul_gather: true | ||
} | ||
} | ||
similarity_calculator { | ||
iou_similarity { | ||
} | ||
} | ||
encode_background_as_zeros: true | ||
anchor_generator { | ||
multiscale_anchor_generator { | ||
min_level: 3 | ||
max_level: 7 | ||
anchor_scale: 4.0 | ||
aspect_ratios: [1.0, 2.0, 0.5] | ||
scales_per_octave: 2 | ||
} | ||
} | ||
image_resizer { | ||
fixed_shape_resizer { | ||
height: 256 | ||
width: 256 | ||
} | ||
} | ||
box_predictor { | ||
weight_shared_convolutional_box_predictor { | ||
depth: 256 | ||
class_prediction_bias_init: -4.6 | ||
conv_hyperparams { | ||
activation: RELU_6, | ||
regularizer { | ||
l2_regularizer { | ||
weight: 0.0001 | ||
} | ||
} | ||
initializer { | ||
random_normal_initializer { | ||
stddev: 0.01 | ||
mean: 0.0 | ||
} | ||
} | ||
batch_norm { | ||
scale: true, | ||
decay: 0.997, | ||
epsilon: 0.001, | ||
} | ||
} | ||
num_layers_before_predictor: 2 | ||
kernel_size: 3 | ||
} | ||
} | ||
feature_extractor { | ||
type: 'ssd_resnet101_v1_fpn' | ||
fpn { | ||
min_level: 3 | ||
max_level: 7 | ||
} | ||
min_depth: 16 | ||
depth_multiplier: 1.0 | ||
conv_hyperparams { | ||
activation: RELU_6, | ||
regularizer { | ||
l2_regularizer { | ||
weight: 0.0001 | ||
} | ||
} | ||
initializer { | ||
truncated_normal_initializer { | ||
stddev: 0.03 | ||
mean: 0.0 | ||
} | ||
} | ||
batch_norm { | ||
scale: true, | ||
decay: 0.997, | ||
epsilon: 0.001, | ||
} | ||
} | ||
override_base_feature_extractor_hyperparams: true | ||
} | ||
loss { | ||
classification_loss { | ||
weighted_sigmoid_focal { | ||
alpha: 0.25 | ||
gamma: 2.0 | ||
} | ||
} | ||
localization_loss { | ||
weighted_smooth_l1 { | ||
} | ||
} | ||
classification_weight: 1.0 | ||
localization_weight: 1.0 | ||
} | ||
normalize_loss_by_num_matches: true | ||
normalize_loc_loss_by_codesize: true | ||
post_processing { | ||
batch_non_max_suppression { | ||
score_threshold: 0.1 | ||
iou_threshold: 0.5 | ||
max_detections_per_class: 10 | ||
max_total_detections: 50 | ||
use_static_shapes: true | ||
} | ||
score_converter: SIGMOID | ||
} | ||
} | ||
} | ||
|
||
train_config { | ||
batch_size: 12 # 14 too big for p3.2xlarge w/ 256 depth, pyr 3-7; 12 works w/ depth 128 | ||
### Use multilabel boxes so buildings can have multiple properties | ||
merge_multiple_label_boxes: true | ||
data_augmentation_options { | ||
random_horizontal_flip { | ||
} | ||
} | ||
data_augmentation_options { | ||
random_adjust_brightness { | ||
} | ||
} | ||
data_augmentation_options { | ||
random_adjust_contrast { | ||
} | ||
} | ||
data_augmentation_options { | ||
random_crop_image { | ||
min_area: 0.75 | ||
max_area: 1.0 | ||
} | ||
} | ||
optimizer { | ||
rms_prop_optimizer { | ||
learning_rate { | ||
cosine_decay_learning_rate { | ||
learning_rate_base: 0.0001 | ||
total_steps: 50000 | ||
warmup_learning_rate: 0.0000001 | ||
warmup_steps: 1000 | ||
} | ||
} | ||
momentum_optimizer_value: 0.9} | ||
# use_moving_average: false | ||
} | ||
fine_tune_checkpoint: "/home/ubuntu/tensorflow_od_api/models/research/object_detection/marine_litter_training/ssd_resnet101_v1_fpn_shared_box_predictor_oid_512x512_sync_2019_01_20/model.ckpt" | ||
from_detection_checkpoint: true # From object detection checkpoint | ||
load_all_detection_checkpoint_vars: true | ||
num_steps: 50000 # Match to momentum_optimizer above | ||
startup_delay_steps: 0.0 | ||
unpad_groundtruth_tensors: false | ||
sync_replicas: true | ||
} | ||
train_input_reader { | ||
label_map_path: "/home/ubuntu/tensorflow_od_api/models/research/object_detection/marine_litter_training/marine_debris.pbtxt" | ||
shuffle_buffer_size: 8192 | ||
tf_record_input_reader { | ||
input_path: "/home/ubuntu/tensorflow_od_api/models/research/object_detection/marine_litter_training/data/tf_records/*train.records" | ||
} | ||
} | ||
eval_config { | ||
num_examples: 100 # Set to size of eval TF Serving; deprecated | ||
#max_evals: 1 # Max out at X evalutations to save time; deprecated | ||
eval_interval_secs: 300 # Default 300 | ||
max_num_boxes_to_visualize: 50 | ||
visualize_groundtruth_boxes: true | ||
num_visualizations: 12 | ||
use_moving_averages: false | ||
include_metrics_per_category: true | ||
metrics_set: "weighted_pascal_voc_detection_metrics" | ||
} | ||
eval_input_reader { | ||
label_map_path: "/home/ubuntu/tensorflow_od_api/models/research/object_detection/marine_litter_training/marine_debris.pbtxt" | ||
shuffle: true | ||
num_readers: 4 | ||
tf_record_input_reader { | ||
input_path: "/home/ubuntu/tensorflow_od_api/models/research/object_detection/marine_litter_training/data/tf_records/*val.records" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
import base64 | ||
import boto3 | ||
import json | ||
import os | ||
import rasterio | ||
import requests | ||
import subprocess | ||
from glob import glob | ||
from rasterio.io import MemoryFile | ||
from rasterio.warp import reproject, calculate_default_transform, Resampling | ||
from zipfile import ZipFile | ||
|
||
ACCOUNT = os.environ['AWS_ACCOUNT_NUMBER'] | ||
BASE_URL = "https://labeler.nasa-impact.net" | ||
DEFAULT_CRS = 'EPSG:4326' | ||
DOWNLOAD_FOLDER = 'downloaded_files' | ||
LOGIN_URL = f"{BASE_URL}/accounts/login/" | ||
IL_URL = { | ||
'geotiff': f"{BASE_URL}/api/geotiffs" | ||
} | ||
S3_URL = f"s3://marine-litter-observations" | ||
class Uploader: | ||
def __init__(self, username, password, client_id, client_secret): | ||
""" | ||
Initializer | ||
Args: | ||
username (str): ImageLabeler Username | ||
password (str): ImageLabeler Password | ||
""" | ||
self.request_token(username, password, client_id, client_secret) | ||
Uploader.mkdir('updated') | ||
def upload_geotiffs(self, file_name): | ||
""" | ||
Upload geotiffs into imagelabeler | ||
Args: | ||
file_name (str): path to downloaded geotiff. | ||
""" | ||
foldername, _ = os.path.splitext(file_name) | ||
Uploader.mkdir(foldername) | ||
with ZipFile(file_name) as zip_file: | ||
print("================ Reading files ================") | ||
compressed_files = zip_file.namelist() | ||
for compressed_file in compressed_files: | ||
compressed_file = str(compressed_file) | ||
_, extension = os.path.splitext(compressed_file) | ||
if extension == '.tif': | ||
self.process_geotiff( | ||
compressed_file, | ||
zip_file, | ||
foldername | ||
) | ||
def process_geotiff(self, compressed_file, zip_file, foldername): | ||
""" | ||
Reproject and upload geotiff into imagelabeler | ||
Args: | ||
compressed_file (str): path of tif file in zip file | ||
zip_file (zipfile.ZipFile): zipfile instance | ||
foldername (str): foldername of where to store file | ||
""" | ||
split = compressed_file.split('/')[-1].split('_') | ||
updated_filename = f"marine_plastic_{'T'.join(split[0:2])}_{'_'.join(split[2:])}" | ||
filename = f"{foldername}/{updated_filename}" | ||
mem_tiff = zip_file.read(compressed_file) | ||
tiff_file = MemoryFile(mem_tiff).open() | ||
updated_profile = self.calculate_updated_profile(tiff_file) | ||
with rasterio.open(filename, 'w', **updated_profile) as dst: | ||
for band in range(1, 4): | ||
reproject( | ||
source=rasterio.band(tiff_file, band), | ||
destination=rasterio.band(dst, band), | ||
src_transform=tiff_file.transform, | ||
src_crs=tiff_file.crs, | ||
dst_transform=updated_profile['transform'], | ||
dst_crs=DEFAULT_CRS, | ||
resampling=Resampling.nearest | ||
) | ||
_, status_code = self.upload_to_image_labeler(filename) | ||
if status_code == 200: | ||
os.remove(filename) | ||
print(f"{filename} uploaded to imagelabeler with: {status_code}") | ||
def calculate_updated_profile(self, tiff_file): | ||
""" | ||
Create updated profile for the provided tiff_file | ||
Args: | ||
tiff_file (rasterio.io.MemoryFile): rasterio memoryfile. | ||
Returns: | ||
dict: updated profile for new tiff file | ||
""" | ||
profile = tiff_file.profile | ||
transform, width, height = calculate_default_transform( | ||
tiff_file.crs, | ||
DEFAULT_CRS, | ||
tiff_file.width, | ||
tiff_file.height, | ||
*tiff_file.bounds | ||
) | ||
profile.update( | ||
crs=DEFAULT_CRS, | ||
transform=transform, | ||
width=width, | ||
height=height, | ||
count=3, | ||
nodata=0, | ||
compress='lzw', | ||
dtype='uint8' | ||
) | ||
return profile | ||
def request_token(self, username, password, client_id, client_secret): | ||
""" | ||
this funtion will return an authentication token for users to use | ||
Args: | ||
username (string) : registered username of the user using the script | ||
password (string) : password associated with the user | ||
Exceptions: | ||
UserNotFound: Given user does not exist | ||
Returns: | ||
headers (dict): { | ||
"Authorization": "Bearer ..." | ||
} | ||
""" | ||
payload = { | ||
"username": username, | ||
"password": password, | ||
"grant_type": "password" | ||
} | ||
response = requests.post( | ||
f"{BASE_URL}/authentication/token/", | ||
data=payload, | ||
auth=(client_id, client_secret) | ||
) | ||
access_token = json.loads(response.text)['access_token'] | ||
self.headers = { | ||
'Authorization': f"Bearer {access_token}", | ||
} | ||
def upload_to_image_labeler(self, file_name, file_type='geotiff'): | ||
""" | ||
Uploads a single shapefile to the image labeler | ||
Args: | ||
file_name : name of zip file containing shapefiles | ||
Returns: | ||
response (tuple[string]): response text, response code | ||
""" | ||
with open(file_name, 'rb') as upload_file_name: | ||
file_headers = { | ||
**self.headers, | ||
} | ||
files = { | ||
'file': (file_name, upload_file_name), | ||
} | ||
response = requests.post( | ||
IL_URL[file_type], | ||
files=files, | ||
headers=file_headers | ||
) | ||
return response.text, response.status_code | ||
@classmethod | ||
def mkdir(cls, dirname): | ||
if not os.path.exists(dirname): | ||
os.mkdir(dirname) | ||
print(f'directory created: {dirname}') | ||
def main(profile_name, username, password, client_id, client_secret): | ||
session = boto3.session.Session(profile_name=profile_name) | ||
s3_connection = session.resource('s3') | ||
bucket = s3_connection.Bucket('marine-litter-observations') | ||
uploader = Uploader(username, password, client_id, client_secret) | ||
Uploader.mkdir(DOWNLOAD_FOLDER) | ||
for s3_object in bucket.objects.all(): | ||
if '.zip' in s3_object.key: | ||
filename = s3_object.key.split('/')[-1] | ||
print(f"================ Downloading file: {filename} ================") | ||
zip_filename = f"{DOWNLOAD_FOLDER}/{filename}" | ||
bucket.download_file(s3_object.key, zip_filename) | ||
print("================ Download complete ================ ") | ||
print("================ Upload in progress ================") | ||
uploader.upload_geotiffs(zip_filename) | ||
print("================ Upload Complete ================") | ||
|
||
|
||
main(profile_name=os.environ['AWS_PROFILE_NAME'], client_id=os.environ['AWS_ACCESS_KEY'], client_secret=os.environ['AWS_SECRET_ACCESS_KEY']) | ||
|
Oops, something went wrong.