In [13]:
import azureml.core
from azureml.core import Experiment, Workspace, Dataset, Datastore, ScriptRunConfig
from azureml.core.environment import Environment
from azureml.core.conda_dependencies import CondaDependencies
import os
import shutil
import glob
from os.path import join
import tensorflow as tf
from sys import path
import numpy as np

# check core SDK version number

print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.28.0


In [14]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
# get metadata about the workspace
print(ws.name, ws.location, ws.resource_group, sep='\t')
# list the registered datastores
ws.datastores

wetlands	eastus	cic_ai


{'workspaceartifactstore': {
   "name": "workspaceartifactstore",
   "container_name": "azureml",
   "account_name": "wetlands2489098639",
   "protocol": "https",
   "endpoint": "core.windows.net"
 },
 'wetlanddatastore': {
   "name": "wetlanddatastore",
   "container_name": "data",
   "account_name": "wetlands2489098639",
   "protocol": "https",
   "endpoint": "core.windows.net"
 },
 'workspacefilestore': {
   "name": "workspacefilestore",
   "container_name": "azureml-filestore-2640f222-8801-40ec-b413-83a7ec003a55",
   "account_name": "wetlands2489098639",
   "protocol": "https",
   "endpoint": "core.windows.net"
 },
 'workspaceblobstore': {
   "name": "workspaceblobstore",
   "container_name": "azureml-blobstore-2640f222-8801-40ec-b413-83a7ec003a55",
   "account_name": "wetlands2489098639",
   "protocol": "https",
   "endpoint": "core.windows.net"
 }}

In [15]:
# access our registered data share containing image data in this workspace
datastore = Datastore.get(workspace = ws, datastore_name = 'wetlanddatastore')

train_path = (datastore, 'seasonal/training/')
eval_path = (datastore, 'seasonal/eval/')

test_path = (datastore, 'seasonal/predict/test_aoi_David1_DE')

train_dataset = Dataset.File.from_files(path = [train_path])
eval_dataset = Dataset.File.from_files(path = [eval_path])
test_dataset = Dataset.File.from_files(path = [test_path])

# when we combine datasets the selected directories and relative paths to the datastore are brought in
# mount folder
# |-cddatafilestore
# | |-GEE
# | | |-training
# | | |-eval
# | |-Onera
# | | |-training
# | | |-eval
# train_dataset = Dataset.File.from_files(path = [gee_train_path, onera_train_path])
# eval_dataset = Dataset.File.from_files(path = [gee_eval_path, onera_eval_path])



In [16]:
envs = Environment.list(workspace = ws)
env = envs.get('wetland-training')

In [17]:
experiment_name = 'wetland-unet-seasonal'
exp = Experiment(workspace = ws, name = experiment_name)

In [18]:
# define the compute target
ws.compute_targets
wetlandGPU = ws.compute_targets['wetlandsGPU']
wetlandDSVM = ws.compute_targets['wetland-dsvm']
wetlandDSVM2 = ws.compute_targets['wetland-dsvm2']

KeyboardInterrupt: 

## Project specific code

In [7]:
# make sure we have the most current version of github repo
%cd Satellite_ComputerVision
!git pull
%cd ..

/mnt/batch/tasks/shared/LS_root/mounts/clusters/wetlandsbasiccpu/code/Users/mevans/Satellite_ComputerVision
Already up to date.
/mnt/batch/tasks/shared/LS_root/mounts/clusters/wetlandsbasiccpu/code/Users/mevans


In [19]:
# attach our utilities folder to the path to import modules
path.append('/home/azureuser/cloudfiles/code/Users/mevans/Wetland_UNet/azure/scv')

In [20]:
from utils.model_tools import get_binary_model, make_confusion_matrix
from utils.processing import get_training_dataset

In [21]:
# Define some global variabes

# specify surface layers
lidar = ['lidar_intensity']
geomorphon = ["geomorphons"]

# Specify inputs (Sentinel bands) to the model
opticalBands = ['B3', 'B4', 'B5', 'B6']
thermalBands = ['B8', 'B11', 'B12']
senBands = opticalBands + thermalBands

# get band names for three seasons
seasonalBands = [[band+'_summer', band + '_fall', band + '_spring'] for band in senBands]


# specify NAIP bands
naipBands = ['R', 'G', 'B', 'N']

BANDS = [item for sublist in seasonalBands for item in sublist] + naipBands
MORPHS = lidar + geomorphon
RESPONSE = 'wetland'
FEATURES = BANDS + MORPHS + [RESPONSE]
print(FEATURES)
# Specify the size and shape of patches expected by the model.
KERNEL_SIZE = 256
KERNEL_SHAPE = [KERNEL_SIZE, KERNEL_SIZE]
COLUMNS = [
  tf.io.FixedLenFeature(shape=KERNEL_SHAPE, dtype=tf.float32) for k in FEATURES
]
FEATURES_DICT = dict(zip(FEATURES, COLUMNS))

['B3_summer', 'B3_fall', 'B3_spring', 'B4_summer', 'B4_fall', 'B4_spring', 'B5_summer', 'B5_fall', 'B5_spring', 'B6_summer', 'B6_fall', 'B6_spring', 'B8_summer', 'B8_fall', 'B8_spring', 'B11_summer', 'B11_fall', 'B11_spring', 'B12_summer', 'B12_fall', 'B12_spring', 'R', 'G', 'B', 'N', 'lidar_intensity', 'geomorphons', 'wetland']


### Calculate Training Data Stats (optional)

In [22]:
# Create mountcontext and mount the dataset
train_mount = train_dataset.mount()

# open access to the mount point
train_mount.start() 

# Get the mount point
dataset_mount_folder = train_mount.mount_point
print(dataset_mount_folder)

train_files = []
i = 1
for root, dirs, files in os.walk(dataset_mount_folder):
    for f in files:
        if i%2 == 0:
            train_files.append(join(root, f))
        i += 1
# test_files = glob.glob(join(dataset_mount_folder, '*.'))

In [11]:
len(train_files)

919

In [12]:

# read in the training data so we can calculate size

training = get_training_dataset(
    files = train_files,
    ftDict = FEATURES_DICT,
    features = naipBands,#BANDS+MORPHS, # reading data can be memory intensive. for the purpose of calculating class weights, we don't need all the predictor variables
    response = RESPONSE,
    buff = 1,
    batch = 1,
    repeat = False,
    splits = None)#,
    #one_hot = {'geomorphons':11})

In [16]:
# confirm the dataset looks like we expect
iterator = iter(training)

print(iterator.next())

Downloaded path: /tmp/tmpd8v27ea0/c2c7ede0-a39a-4105-a83b-639e600f02d0/seasonal/training/UNET_256wetlandDE100.tfrecord.gz is different from target path: /tmp/tmpd8v27ea0/c2c7ede0-a39a-4105-a83b-639e600f02d0/UNET_256wetlandDE100.tfrecord.gz
(<tf.Tensor: shape=(1, 256, 256, 4), dtype=float32, numpy=
array([[[[-0.7752477 , -0.17200427, -0.734836  ,  1.6820883 ],
         [-0.64721626,  0.27211615, -1.1233104 ,  1.4984117 ],
         [-0.71114206,  0.11474342, -0.98695976,  1.5833576 ],
         ...,
         [-0.6828924 , -0.3518775 , -0.68146133,  1.7162315 ],
         [-0.7713043 , -0.3029089 , -0.6325876 ,  1.7068007 ],
         [-0.7156118 , -0.2944966 , -0.6972377 ,  1.7073463 ]],

        [[-0.5969886 ,  0.33027044, -1.1893197 ,  1.4560373 ],
         [-0.68969655,  0.1046523 , -1.0001936 ,  1.5852379 ],
         [-0.6783959 ,  0.234815  , -1.0794123 ,  1.5229937 ],
         ...,
         [-0.72748935, -0.2686813 , -0.70657074,  1.7027409 ],
         [-0.7931485 , -0.24930568, -0.65

In [18]:
# # calculate some summary statistics used in model training
wetlandPix = 0
nonPix = 0
for example in iterator:
    ones = tf.reduce_sum(example[1])
    zeroes = (256*256) - ones
    wetlandPix += ones
    nonPix += zeroes
    i += 1

# m = get_binary_model(depth = len(naipBands), optim = tf.keras.optimizers.Adam(learning_rate = 0.001, beta_1=0.9, beta_2=0.999), loss = 'mse', mets = [tf.keras.metrics.categorical_accuracy], bias = None)
# train_con_mat = make_confusion_matrix(training, m)

Downloaded path: /tmp/tmpd8v27ea0/c2c7ede0-a39a-4105-a83b-639e600f02d0/seasonal/training/UNET_256wetlandDE1035.tfrecord.gz is different from target path: /tmp/tmpd8v27ea0/c2c7ede0-a39a-4105-a83b-639e600f02d0/UNET_256wetlandDE1035.tfrecord.gz
Downloaded path: /tmp/tmpd8v27ea0/c2c7ede0-a39a-4105-a83b-639e600f02d0/seasonal/training/UNET_256wetlandDE1045.tfrecord.gz is different from target path: /tmp/tmpd8v27ea0/c2c7ede0-a39a-4105-a83b-639e600f02d0/UNET_256wetlandDE1045.tfrecord.gz
Downloaded path: /tmp/tmpd8v27ea0/c2c7ede0-a39a-4105-a83b-639e600f02d0/seasonal/training/UNET_256wetlandDE1050.tfrecord.gz is different from target path: /tmp/tmpd8v27ea0/c2c7ede0-a39a-4105-a83b-639e600f02d0/UNET_256wetlandDE1050.tfrecord.gz
Downloaded path: /tmp/tmpd8v27ea0/c2c7ede0-a39a-4105-a83b-639e600f02d0/seasonal/training/UNET_256wetlandDE1060.tfrecord.gz is different from target path: /tmp/tmpd8v27ea0/c2c7ede0-a39a-4105-a83b-639e600f02d0/UNET_256wetlandDE1060.tfrecord.gz
Downloaded path: /tmp/tmpd8v27ea

In [19]:
# classums = train_con_mat.sum(axis = 1)
BIAS = np.log(wetlandPix/nonPix)#np.log(classums[1]/classums[0])
WEIGHT = nonPix/wetlandPix
TRAIN_SIZE = (nonPix+wetlandPix)/(256*256)#train_con_mat.sum()//(256*256)

print('size = ', TRAIN_SIZE)
print(f'bias = {BIAS}')
print(f'weight = {WEIGHT}')

size =  tf.Tensor(8016.003, shape=(), dtype=float32)
bias = -0.8413100242614746
weight = 2.3194034099578857


In [47]:
# 3507+3459+3507+3752
(0.0957+ -1.4 + -0.42 + -1.3) /4
# (0.91  + 4.02 + 1.5 + 3.7)/4

-0.756075

In [20]:
train_mount.stop()

### Define Training Parameters

In [32]:
# create script run config
# use the azure folder as our script folder
source = 'scv'
script_folder = 'azure'
script_file = 'train_wetland.py'

# copy the training script from github repo to local folder
# shutil.copy(src = os.path.join(source, script_folder, script_file),
# dst = os.path.join(script_folder, script_file))

# Initially we need to copy the directories
# shutil.copytree(src = '/home/azureuser/cloudfiles/code/Users/mevans/Satellite_ComputerVision/azure', dst = '/home/azureuser/cloudfiles/code/Users/mevans/azure')
# shutil.copytree(src = '/home/azureuser/cloudfiles/code/Users/mevans/Satellite_ComputerVision/utils', dst = '/home/azureuser/cloudfiles/code/Users/mevans/azure/utils')



In [22]:
print(FEATURES)
basic = ['B3_summer', 'B3_fall', 'B3_spring', 'B4_summer', 'B4_fall', 'B4_spring', 'B5_summer', 'B5_fall', 'B5_spring', 'B6_summer', 'B6_fall', 'B6_spring', 'B8_summer', 'B8_fall', 'B8_spring', 'B11_summer', 'B11_fall', 'B11_spring', 'B12_summer', 'B12_fall', 'B12_spring', 'R', 'G', 'B', 'N']
wlidar = ['B3_summer', 'B3_fall', 'B3_spring', 'B4_summer', 'B4_fall', 'B4_spring', 'B5_summer', 'B5_fall', 'B5_spring', 'B6_summer', 'B6_fall', 'B6_spring', 'B8_summer', 'B8_fall', 'B8_spring', 'B11_summer', 'B11_fall', 'B11_spring', 'B12_summer', 'B12_fall', 'B12_spring', 'R', 'G', 'B', 'N', 'lidar_intensity']
wgeomorphon = ['B3_summer', 'B3_fall', 'B3_spring', 'B4_summer', 'B4_fall', 'B4_spring', 'B5_summer', 'B5_fall', 'B5_spring', 'B6_summer', 'B6_fall', 'B6_spring', 'B8_summer', 'B8_fall', 'B8_spring', 'B11_summer', 'B11_fall', 'B11_spring', 'B12_summer', 'B12_fall', 'B12_spring', 'R', 'G', 'B', 'N', 'lidar_intensity', 'geomorphons']
full = ['B3_summer', 'B3_fall', 'B3_spring', 'B4_summer', 'B4_fall', 'B4_spring', 'B5_summer', 'B5_fall', 'B5_spring', 'B6_summer', 'B6_fall', 'B6_spring', 'B8_summer', 'B8_fall', 'B8_spring', 'B11_summer', 'B11_fall', 'B11_spring', 'B12_summer', 'B12_fall', 'B12_spring', 'R', 'G', 'B', 'N', 'lidar_intensity', 'geomorphons', 'wetland']


['B3_summer', 'B3_fall', 'B3_spring', 'B4_summer', 'B4_fall', 'B4_spring', 'B5_summer', 'B5_fall', 'B5_spring', 'B6_summer', 'B6_fall', 'B6_spring', 'B8_summer', 'B8_fall', 'B8_spring', 'B11_summer', 'B11_fall', 'B11_spring', 'B12_summer', 'B12_fall', 'B12_spring', 'R', 'G', 'B', 'N', 'lidar_intensity', 'geomorphons', 'wetland']


In [48]:
# define the command line arguments to our training sript
features = 'wgeomorphon'
args = [
    '--train_data', train_dataset.as_mount(),
    '--eval_data', eval_dataset.as_mount(),
    '--test_data', test_dataset.as_mount(),
    '--bands', features,
    '--weight', 2, 
    '--bias', -0.8,
    '-lr', 0.001,
    '--epochs', 100,
    '--batch', 16,
    '--size', 8016, #14225,
    '--kernel_size', 256,
    '--response', 'wetland']
    # '--bands', "B3_summer " "B3_fall " "B3_spring " "B4_summer " "B4_fall " "B4_spring " "B5_summer " "B5_fall " "B5_spring " "B6_summer " "B6_fall " "B6_spring " "B8_summer " "B8_fall " "B8_spring " "B11_summer " "B11_fall " "B11_spring " "B12_summer " "B12_fall " "B12_spring " "R " "G " "B " "N " "lidar_intensity " "geomorphons"
    # '--splits', None]

# TODO: figure out how to pass lists as command line arguments



In [31]:
%cd Wetland_UNet

/mnt/batch/tasks/shared/LS_root/mounts/clusters/wetlandsbasiccpu/code/Users/mevans/Wetland_UNet


In [49]:
#  run the training job
src = ScriptRunConfig(source_directory=script_folder,
                      script=script_file, 
                      arguments=args,
                      compute_target=wetlandGPU,
                      environment=env)
run = exp.submit(
    config=src,
     tags = dict({'splits':'[12,4]', 'one_hot':'geomorphon', 'model':'Unet', 'normalization':'pixel', 'features':f'{features}', 'epochs':'0-100', 'weight':f'{WEIGHT}'})
     )
run

Experiment,Id,Type,Status,Details Page,Docs Page
wetland-unet-seasonal,wetland-unet-seasonal_1652189390_86a54d03,azureml.scriptrun,Preparing,Link to Azure Machine Learning studio,Link to Documentation


In [28]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

In [43]:
#  run the training job
src_basic = ScriptRunConfig(source_directory=script_folder,
                      script=script_file, 
                      arguments=args,
                      compute_target=wetlandDSVM2,
                      environment=env)
run_full = exp.submit(config=src_basic,
 tags = dict({'splits':'[21,4,1]', 'one_hot':'None', 'model':'Unet', 'normalization':'pixel', 'features':f'{features}', 'epochs':'0-100', 'weight':f'{WEIGHT}'})
)
run_full

Experiment,Id,Type,Status,Details Page,Docs Page
wetland-unet-seasonal,wetland-unet-seasonal_1652041401_745b2492,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


In [None]:
RunDetails(run_basic).show()

In [37]:
#  run the training job
features = 'wlidar'
src_lidar = ScriptRunConfig(source_directory=script_folder,
                      script=script_file, 
                      arguments=args,
                      compute_target=wetlandDSVM,
                      environment=env)
run_lidar = exp.submit(config=src_lidar, tags = dict({'splits':'None', 'one_hot':'geomorphons', 'model':'Unet', 'normalization':'pixel', 'features':f'{features}'}))
run_lidar

Experiment,Id,Type,Status,Details Page,Docs Page
wetland-unet-seasonal,wetland-unet-seasonal_1652035384_213cd69c,azureml.scriptrun,Starting,Link to Azure Machine Learning studio,Link to Documentation


In [14]:
features = 'wgeomorphon'
src_geomorphon = ScriptRunConfig(source_directory = script_folder,
    script = script_file.format(features),
    arguments = args,
    compute_target = wetlandGPU,
    environment = env)

run_geomporphon = exp.submit(config = src_geomorphon, tags = dict({'splits':'None', 'one_hot':'geomorphons', 'model':'Unet', 'normalization':'pixel', 'features':f'{features}'}))

In [15]:
from azureml.widgets import RunDetails
RunDetails(run_geomporphon).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…