Merge pull request #4 from NASA-IMPACT/v0_0

Initial codebase for data processing and model infrastructure.
NASA-IMPACT · Apr 8, 2021 · 4e3e876 · 4e3e876
2 parents a8ad6fc + 30bb3b8
commit 4e3e876
Show file tree

Hide file tree

Showing 728 changed files with 187,614 additions and 0 deletions.
diff --git a/configs/marine_debris.pbtxt b/configs/marine_debris.pbtxt
@@ -0,0 +1,4 @@
+item {
+  id: 1
+  name: 'marine_debris'
+}
diff --git a/configs/ssd_resnet101_v1_fpn_marine_debris.config b/configs/ssd_resnet101_v1_fpn_marine_debris.config
@@ -0,0 +1,204 @@
+# SSD with Resnet 101 v1 FPN feature extractor, shared box predictor and focal
+# loss (a.k.a Retinanet).
+# See Lin et al, https://arxiv.org/abs/1708.02002
+# Trained on open image dataset v4, initialized from scratch.
+
+# This config is TPU compatible
+
+model {
+  ssd {
+    inplace_batchnorm_update: true
+    freeze_batchnorm: false
+    num_classes: 1
+    box_coder {
+      faster_rcnn_box_coder {
+        y_scale: 10.0
+        x_scale: 10.0
+        height_scale: 5.0
+        width_scale: 5.0
+      }
+    }
+    matcher {
+      argmax_matcher {
+        matched_threshold: 0.5
+        unmatched_threshold: 0.5
+        ignore_thresholds: false
+        negatives_lower_than_unmatched: true
+        force_match_for_each_row: true
+        use_matmul_gather: true
+      }
+    }
+    similarity_calculator {
+      iou_similarity {
+      }
+    }
+    encode_background_as_zeros: true
+    anchor_generator {
+      multiscale_anchor_generator {
+        min_level: 3
+        max_level: 7
+        anchor_scale: 4.0
+        aspect_ratios: [1.0, 2.0, 0.5]
+        scales_per_octave: 2
+      }
+    }
+    image_resizer {
+      fixed_shape_resizer {
+        height: 256
+        width: 256
+      }
+    }
+    box_predictor {
+      weight_shared_convolutional_box_predictor {
+        depth: 256
+        class_prediction_bias_init: -4.6
+        conv_hyperparams {
+          activation: RELU_6,
+          regularizer {
+            l2_regularizer {
+              weight: 0.0001
+            }
+          }
+          initializer {
+            random_normal_initializer {
+              stddev: 0.01
+              mean: 0.0
+            }
+          }
+          batch_norm {
+            scale: true,
+            decay: 0.997,
+            epsilon: 0.001,
+          }
+        }
+        num_layers_before_predictor: 2
+        kernel_size: 3
+      }
+    }
+    feature_extractor {
+      type: 'ssd_resnet101_v1_fpn'
+      fpn {
+        min_level: 3
+        max_level: 7
+      }
+      min_depth: 16
+      depth_multiplier: 1.0
+      conv_hyperparams {
+        activation: RELU_6,
+        regularizer {
+          l2_regularizer {
+            weight: 0.0001
+          }
+        }
+        initializer {
+          truncated_normal_initializer {
+            stddev: 0.03
+            mean: 0.0
+          }
+        }
+        batch_norm {
+          scale: true,
+          decay: 0.997,
+          epsilon: 0.001,
+        }
+      }
+      override_base_feature_extractor_hyperparams: true
+    }
+    loss {
+      classification_loss {
+        weighted_sigmoid_focal {
+          alpha: 0.25
+          gamma: 2.0
+        }
+      }
+      localization_loss {
+        weighted_smooth_l1 {
+        }
+      }
+      classification_weight: 1.0
+      localization_weight: 1.0
+    }
+    normalize_loss_by_num_matches: true
+    normalize_loc_loss_by_codesize: true
+    post_processing {
+      batch_non_max_suppression {
+        score_threshold: 0.1
+        iou_threshold: 0.5
+        max_detections_per_class: 10
+        max_total_detections: 50
+        use_static_shapes: true
+      }
+      score_converter: SIGMOID
+    }
+  }
+}
+
+train_config {
+  batch_size: 12  # 14 too big for p3.2xlarge w/ 256 depth, pyr 3-7; 12 works w/ depth 128
+  ### Use multilabel boxes so buildings can have multiple properties
+  merge_multiple_label_boxes: true
+  data_augmentation_options {
+    random_horizontal_flip {
+    }
+  }
+  data_augmentation_options {
+    random_adjust_brightness {
+    }
+  }
+  data_augmentation_options {
+    random_adjust_contrast {
+    }
+  }
+  data_augmentation_options {
+    random_crop_image {
+      min_area: 0.75
+      max_area: 1.0
+    }
+  }
+  optimizer {
+     rms_prop_optimizer {
+       learning_rate {
+         cosine_decay_learning_rate {
+           learning_rate_base: 0.0001
+           total_steps: 50000
+           warmup_learning_rate: 0.0000001
+           warmup_steps: 1000
+         }
+       }
+       momentum_optimizer_value: 0.9}
+    # use_moving_average: false
+  }
+  fine_tune_checkpoint: "/home/ubuntu/tensorflow_od_api/models/research/object_detection/marine_litter_training/ssd_resnet101_v1_fpn_shared_box_predictor_oid_512x512_sync_2019_01_20/model.ckpt"
+  from_detection_checkpoint: true  # From object detection checkpoint
+  load_all_detection_checkpoint_vars: true
+  num_steps: 50000 # Match to momentum_optimizer above
+  startup_delay_steps: 0.0
+  unpad_groundtruth_tensors: false
+  sync_replicas: true
+}
+train_input_reader {
+  label_map_path: "/home/ubuntu/tensorflow_od_api/models/research/object_detection/marine_litter_training/marine_debris.pbtxt"
+  shuffle_buffer_size: 8192
+  tf_record_input_reader {
+    input_path: "/home/ubuntu/tensorflow_od_api/models/research/object_detection/marine_litter_training/data/tf_records/*train.records"
+  }
+}
+eval_config {
+  num_examples: 100  # Set to size of eval TF Serving; deprecated
+  #max_evals: 1  # Max out at X evalutations to save time; deprecated
+  eval_interval_secs: 300  # Default 300
+  max_num_boxes_to_visualize: 50
+  visualize_groundtruth_boxes: true
+  num_visualizations: 12
+  use_moving_averages: false
+  include_metrics_per_category: true
+  metrics_set: "weighted_pascal_voc_detection_metrics"
+}
+eval_input_reader {
+  label_map_path: "/home/ubuntu/tensorflow_od_api/models/research/object_detection/marine_litter_training/marine_debris.pbtxt"
+  shuffle: true
+  num_readers: 4
+  tf_record_input_reader {
+    input_path: "/home/ubuntu/tensorflow_od_api/models/research/object_detection/marine_litter_training/data/tf_records/*val.records"
+  }
+}
diff --git a/data_utils/S32ImageLabeler.py b/data_utils/S32ImageLabeler.py
@@ -0,0 +1,180 @@
+import base64
+import boto3
+import json
+import os
+import rasterio
+import requests
+import subprocess
+from glob import glob
+from rasterio.io import MemoryFile
+from rasterio.warp import reproject, calculate_default_transform, Resampling
+from zipfile import ZipFile
+
+ACCOUNT = os.environ['AWS_ACCOUNT_NUMBER']
+BASE_URL = "https://labeler.nasa-impact.net"
+DEFAULT_CRS = 'EPSG:4326'
+DOWNLOAD_FOLDER = 'downloaded_files'
+LOGIN_URL = f"{BASE_URL}/accounts/login/"
+IL_URL = {
+    'geotiff': f"{BASE_URL}/api/geotiffs"
+}
+S3_URL = f"s3://marine-litter-observations"
+class Uploader:
+    def __init__(self, username, password, client_id, client_secret):
+        """
+        Initializer
+        Args:
+            username (str): ImageLabeler Username
+            password (str): ImageLabeler Password
+        """
+        self.request_token(username, password, client_id, client_secret)
+        Uploader.mkdir('updated')
+    def upload_geotiffs(self, file_name):
+        """
+        Upload geotiffs into imagelabeler
+        Args:
+            file_name (str): path to downloaded geotiff.
+        """
+        foldername, _ = os.path.splitext(file_name)
+        Uploader.mkdir(foldername)
+        with ZipFile(file_name) as zip_file:
+            print("================ Reading files ================")
+            compressed_files = zip_file.namelist()
+            for compressed_file in compressed_files:
+                compressed_file = str(compressed_file)
+                _, extension = os.path.splitext(compressed_file)
+                if extension == '.tif':
+                    self.process_geotiff(
+                        compressed_file,
+                        zip_file,
+                        foldername
+                    )
+    def process_geotiff(self, compressed_file, zip_file, foldername):
+        """
+        Reproject and upload geotiff into imagelabeler
+        Args:
+            compressed_file (str): path of tif file in zip file
+            zip_file (zipfile.ZipFile): zipfile instance
+            foldername (str): foldername of where to store file
+        """
+        split = compressed_file.split('/')[-1].split('_')
+        updated_filename = f"marine_plastic_{'T'.join(split[0:2])}_{'_'.join(split[2:])}"
+        filename = f"{foldername}/{updated_filename}"
+        mem_tiff = zip_file.read(compressed_file)
+        tiff_file = MemoryFile(mem_tiff).open()
+        updated_profile = self.calculate_updated_profile(tiff_file)
+        with rasterio.open(filename, 'w', **updated_profile) as dst:
+            for band in range(1, 4):
+                reproject(
+                    source=rasterio.band(tiff_file, band),
+                    destination=rasterio.band(dst, band),
+                    src_transform=tiff_file.transform,
+                    src_crs=tiff_file.crs,
+                    dst_transform=updated_profile['transform'],
+                    dst_crs=DEFAULT_CRS,
+                    resampling=Resampling.nearest
+                )
+        _, status_code = self.upload_to_image_labeler(filename)
+        if status_code == 200:
+            os.remove(filename)
+        print(f"{filename} uploaded to imagelabeler with: {status_code}")
+    def calculate_updated_profile(self, tiff_file):
+        """
+        Create updated profile for the provided tiff_file
+        Args:
+            tiff_file (rasterio.io.MemoryFile): rasterio memoryfile.
+        Returns:
+            dict: updated profile for new tiff file
+        """
+        profile = tiff_file.profile
+        transform, width, height = calculate_default_transform(
+            tiff_file.crs,
+            DEFAULT_CRS,
+            tiff_file.width,
+            tiff_file.height,
+            *tiff_file.bounds
+        )
+        profile.update(
+            crs=DEFAULT_CRS,
+            transform=transform,
+            width=width,
+            height=height,
+            count=3,
+            nodata=0,
+            compress='lzw',
+            dtype='uint8'
+        )
+        return profile
+    def request_token(self, username, password, client_id, client_secret):
+        """
+        this funtion will return an authentication token for users to use
+        Args:
+            username (string) : registered username of the user using the script
+            password (string) : password associated with the user
+        Exceptions:
+            UserNotFound: Given user does not exist
+        Returns:
+            headers (dict): {
+                "Authorization": "Bearer ..."
+            }
+        """
+        payload = {
+            "username": username,
+            "password": password,
+            "grant_type": "password"
+        }
+        response = requests.post(
+            f"{BASE_URL}/authentication/token/",
+            data=payload,
+            auth=(client_id, client_secret)
+        )
+        access_token = json.loads(response.text)['access_token']
+        self.headers = {
+            'Authorization': f"Bearer {access_token}",
+        }
+    def upload_to_image_labeler(self, file_name, file_type='geotiff'):
+        """
+        Uploads a single shapefile to the image labeler
+        Args:
+            file_name : name of zip file containing shapefiles
+        Returns:
+            response (tuple[string]): response text, response code
+        """
+        with open(file_name, 'rb') as upload_file_name:
+            file_headers = {
+                **self.headers,
+            }
+            files = {
+                'file': (file_name, upload_file_name),
+            }
+            response = requests.post(
+                IL_URL[file_type],
+                files=files,
+                headers=file_headers
+            )
+            return response.text, response.status_code
+    @classmethod
+    def mkdir(cls, dirname):
+        if not os.path.exists(dirname):
+            os.mkdir(dirname)
+            print(f'directory created: {dirname}')
+def main(profile_name, username, password, client_id, client_secret):
+    session = boto3.session.Session(profile_name=profile_name)
+    s3_connection = session.resource('s3')
+    bucket = s3_connection.Bucket('marine-litter-observations')
+    uploader = Uploader(username, password, client_id, client_secret)
+    Uploader.mkdir(DOWNLOAD_FOLDER)
+    for s3_object in bucket.objects.all():
+        if '.zip' in s3_object.key:
+            filename = s3_object.key.split('/')[-1]
+            print(f"================ Downloading file: {filename} ================")
+            zip_filename = f"{DOWNLOAD_FOLDER}/{filename}"
+            bucket.download_file(s3_object.key, zip_filename)
+            print("================ Download complete ================ ")
+            print("================ Upload in progress ================")
+            uploader.upload_geotiffs(zip_filename)
+            print("================ Upload Complete ================")
+
+
+main(profile_name=os.environ['AWS_PROFILE_NAME'], client_id=os.environ['AWS_ACCESS_KEY'], client_secret=os.environ['AWS_SECRET_ACCESS_KEY'])
+