# Download and understand the data in hand

A subset of High Latitude Dust data is pre-prepared and uploaded into a s3 bucket (impact-datashare). The details can be found at https://github.com/nasa-impact/data_share .

In [None]:
!pip install -r src/requirements.txt

In [None]:
import boto3
import fiona

import math
import numpy as np
import os
import random
import rasterio.features
import re
import requests
import shutil

from datetime import datetime
from glob import glob
from io import BytesIO
from IPython.display import Image as Display
from PIL import Image


## Setup Constant variables

In [None]:
ACCOUNT_NUMBER = "350996086543"
ROLE_NAME = "notebookAccessRole"
ROLE_ARN = f"arn:aws:iam::{ACCOUNT_NUMBER}:role/{ROLE_NAME}"
SOURCE_BUCKET = "impact-datashare"
DESTINATION_BUCKET = f"s3://{ACCOUNT_NUMBER}-model-bucket"

# NOTE: Use image_url function above to create a valid url, if the shapefile generation was not done in Aqua, TrueColor 
DATA_FOLDER = "data"
EVENT = "hld-labeled"
IMAGE_FOLDER = "images"
SHAPEFILE_FOLDER = "shapefiles"
URL = "https://gibs.earthdata.nasa.gov/wms/epsg4326/best/wms.cgi?SERVICE=WMS&REQUEST=GetMap&layers=MODIS_Aqua_CorrectedReflectance_TrueColor&version=1.3.0&crs=EPSG:4326&transparent=false&width={}&height={}&bbox={}&format=image/tiff&time={}"
KM_PER_DEG_AT_EQ = 111.
RESOLUTION = 0.25

## Setup environment for data transfer

In [None]:
def assumed_role_session():
    # Assume the "notebookAccessRole" role we created using AWS CDK.
    client = boto3.client('sts')
    creds = client.assume_role(
        RoleArn=ROLE_ARN,
        RoleSessionName=ROLE_NAME
    )['Credentials']
    return boto3.session.Session(
        aws_access_key_id=creds['AccessKeyId'],
        aws_secret_access_key=creds['SecretAccessKey'],
        aws_session_token=creds['SessionToken'],
        region_name='us-east-1'
    )


## Helper methods to download and visualize data

In [None]:
def mkdir(foldername):
    if os.path.exists(foldername):
        print(f"'{foldername}' folder already exists.")
        return
    os.makedirs(foldername)
    print(f"Created folder: {foldername}")

    
def delete_folder(foldername):
    shutil.rmtree(foldername) 


In [None]:
# Remove already existing folder for a split and create a new one with passed filenames
def create_split(split, files):
    """
    Clear and create folder with new files.
    split: choice of "train", "test", and "val"
    files: list of tiff file paths
    """
    print(f'Preparing {split} split with {len(files)} examples.')
    folder_name = f"{DATA_FOLDER}/{split}"
    if os.path.exists(folder_name):
        delete_folder(folder_name)
    mkdir(folder_name)
    for filename in files:
        internal_filename = filename.split('/')[-1]
        bitmap_filename = filename.replace('.tiff', '_bitmap.png')
        shutil.copyfile(filename, f"{folder_name}/{internal_filename}")
        shutil.copyfile(bitmap_filename, f"{folder_name}/{bitmap_filename.split('/')[-1]}")
        
        

In [None]:
# prepare train, val, and test splits
def prepare_splits(source_folder, splits={'train': 0.6, 'val': 0.2, 'test': 0.2}):
    files = glob(f"{source_folder}/*.tiff")
    print(f"Total examples found: {len(files)}")
    random.shuffle(files)
    length = len(files)
    train_limit = math.ceil(length * splits['train'])
    val_limit = train_limit + math.ceil(length * splits['train'])
    create_split('train', files[0:train_limit])
    create_split('val', files[train_limit:val_limit])
    create_split('test', files[train_limit:val_limit])

    

# Check downloaded data

In [None]:
session = assumed_role_session()

In [None]:
from sagemaker.tensorflow import TensorFlow


estimator = TensorFlow(
    entry_point='hld_sagemaker_demo.py',
    source_dir="/home/ec2-user/SageMaker/workshop_notebooks/chapter-3/src",
    role=ROLE_NAME,
    instance_count=1,
    instance_type='ml.p3.2xlarge',
    py_version='py3',
    output_path=DESTINATION_BUCKET,
    image_uri='763104351884.dkr.ecr.us-east-1.amazonaws.com/tensorflow-training:2.4.1-gpu-py37-cu110-ubuntu18.04',
    distribution={
        'parameter_server': {'enabled': True}
    }
)


In [None]:
import sagemaker
sagemaker_session = sagemaker.Session()
train_images = sagemaker_session.upload_data(path='../data/train')
val_images = sagemaker_session.upload_data(path='../data/val')
test_images = sagemaker_session.upload_data(path='../data/test')

In [None]:
estimator.fit({'train': train_images, 'eval': val_images, 'test': test_images})

In [None]:
estimator.deploy(initial_instance_count=1, instance_type='ml.t2.large')

# Deployment (move to chapter-4)

In [None]:
from sagemaker.tensorflow import TensorFlowModel
model = TensorFlowModel(framework_version='2.4.1', model_data=f'{DESTINATION_BUCKET}/tensorflow-training-2021-05-05-10-10-34-979/output/model.tar.gz', role=ROLE_NAME)

In [None]:
predictor = model.deploy(initial_instance_count=1, instance_type='ml.t2.large')


In [None]:
predictor.predict(np.asarray([np.zeros((256, 256, 3))]))

In [None]:
predictor

In [None]:
model.endpoint_name

In [None]:
model.predict