In [1]:
# Import the necessary packages and libraries #
import scipy
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy
import numpy.matlib
import pandas 
import sklearn
from sklearn import preprocessing
from sklearn import linear_model
import tensorflow
from tensorflow import keras 
import os
from sklearn.neighbors import DistanceMetric

In [2]:
# Sets the precision of the data in the Pandas Dataframes 

pandas.set_option("precision", 10)

In [3]:
# Removes any Tensorflow warnings 

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [4]:
# Import the MatLab data #

matlabData = sio.loadmat(file_name='./data/s2_sl2p_uniform_10_replicates_sobol_prosail_inout.mat', variable_names=['Input', 'Output'])

In [5]:
# Extract the input and output data #

bands = pandas.DataFrame(data=matlabData['Input']['Rho_Toc'][0][0])
angles = pandas.DataFrame(data=matlabData['Input']['Angles'][0][0])
LAI = pandas.Series(data=matlabData['Output']['LAI'][0][0].flatten())
FAPAR = pandas.Series(data=matlabData['Output']['FAPAR'][0][0].flatten())
FCOVER = pandas.Series(data=matlabData['Output']['FCOVER'][0][0].flatten())

In [6]:
# Standardize the calibration data #

cal_data = pandas.concat([bands, angles, LAI, FAPAR, FCOVER], axis=1, join='outer')

cal_data.columns = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'A1', 'A2', 'A3', 'LAI', 'FAPAR', 'FCOVER']

cal_data

Unnamed: 0,B0,B1,B2,B3,B4,B5,B6,B7,A1,A2,A3,LAI,FAPAR,FCOVER
0,0.1312746755,0.0743505601,0.1729546392,0.2952160738,0.3209243927,0.3303722287,0.1873586099,0.1113007554,0.9638065534,0.5275411500,-0.5752876765,1,0.4391203617,0.4803244610
1,0.1019407701,0.0868185076,0.1599851769,0.3680334831,0.4389734403,0.4759755142,0.3043390179,0.1996883219,0.9995852344,0.5284670738,-0.4777431279,1,0.5296906764,0.5338211214
2,0.0577228876,0.0543735427,0.0726863963,0.1146353863,0.1262129829,0.1300015294,0.1048168652,0.0853760008,0.9807097061,0.6843199822,0.6909005295,1,0.2406051357,0.2324486617
3,0.0608335819,0.0423475494,0.0917885392,0.1738484853,0.1980468499,0.2153382831,0.1396374825,0.0906137034,0.9865515976,0.7869673865,0.8590635747,1,0.3515168619,0.3732491055
4,0.0789422177,0.1359804109,0.1677317262,0.2554469658,0.2962514793,0.3108466204,0.3167107908,0.2088934617,0.9999190888,0.8237423248,-0.9085753290,1,0.2890892994,0.2565897729
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122875,0.0642385315,0.0276301255,0.0866383370,0.3106952678,0.4255780128,0.4466691326,0.1173674161,0.0414608341,0.9641458993,0.8501574022,-0.9266152189,10,0.9454688738,0.9530549586
122876,0.0913131658,0.0504649365,0.1222839075,0.3081750760,0.4047671458,0.4293059492,0.1725548796,0.1005631041,0.9616583754,0.8710934328,0.9265827175,10,0.8085248415,0.8034524935
122877,0.0621419307,0.0380236717,0.0921177459,0.2614424931,0.3577263614,0.3626203975,0.1294881968,0.0624406803,0.9757927762,0.8637999983,-0.9813994186,10,0.8185069725,0.8122050332
122878,0.0514800307,0.0288523379,0.0690024543,0.4158467721,0.6343113015,0.6354447032,0.1810943898,0.0474550174,0.9606931097,0.8268533675,-0.8694016324,10,0.9810850825,0.9992241257


In [7]:
# Standardize the calibration data #

cal_data_scaled = pandas.DataFrame(sklearn.preprocessing.StandardScaler().fit_transform(cal_data))

cal_data_scaled.columns = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'A1', 'A2', 'A3', 'LAI', 'FAPAR', 'FCOVER']

cal_data_scaled

Unnamed: 0,B0,B1,B2,B3,B4,B5,B6,B7,A1,A2,A3,LAI,FAPAR,FCOVER
0,1.1600887942,1.1894959571,1.1005309674,-0.7574953553,-1.2136760897,-1.2551477212,-0.5123998591,0.0482701907,-1.5351947333,-1.2590753240,-0.7795178075,-1.5666989036,-1.8949148320,-1.6291084301
1,0.3768666595,1.6844505369,0.8279254564,-0.1275628723,-0.4502226101,-0.3312429285,0.9573384489,1.5671047307,1.3154921199,-1.2529678973,-0.6517039023,-1.5666989036,-1.4265332698,-1.3709908078
2,-0.8037611472,0.3964451147,-1.0070101806,-2.3196717910,-2.4729247349,-2.5265712936,-1.5494518415,-0.3972156885,-0.1884263875,-0.2249564670,0.8795852552,-1.5666989036,-2.9215301414,-2.8250917213
3,-0.7207048794,-0.0809646948,-0.6055016769,-1.8074281556,-2.0083566857,-1.9850791466,-1.1119666599,-0.3072120391,0.2770296632,0.4521095310,1.0999315055,-1.5666989036,-2.3479536034,-2.1457394865
4,-0.2372000684,3.6360876602,0.9907504054,-1.1015319421,-1.3732421509,-1.3790446724,1.1127770292,1.7252840412,1.3420921626,0.6946783564,-1.2162290029,-1.5666989036,-2.6707957989,-2.7086125584
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122875,-0.6297919312,-0.6652193518,-0.7137539294,-0.6235871705,-0.5368542767,-0.5172023756,-1.3917671005,-1.1518452670,-1.5081571622,0.8689131637,-1.2398669079,1.5666989036,0.7236507314,0.6517829598
122876,0.0931071377,0.2412806393,0.0354813671,-0.6453889716,-0.6714434932,-0.6273779690,-0.6983934776,-0.1362434958,-1.7063520463,1.0070079804,1.1884027264,1.5666989036,0.0154489326,-0.0700384081
122877,-0.6857716627,-0.2526146846,-0.5985820716,-1.0496655348,-0.9756683152,-1.0505215854,-1.2394819780,-0.7913317214,-0.5801855863,0.9589002217,-1.3116513655,1.5666989036,0.0670712121,-0.0278080201
122878,-0.9704468859,-0.6166997679,-1.0844430668,0.2860627172,0.8130774569,0.6806459689,-0.5911033299,-1.0488424022,-1.7832601421,0.7151989067,-1.1648991929,1.5666989036,0.9078388454,0.8745459430


In [8]:
# Create subset id's #

rep = 10

subsets = numpy.arange(0, int(cal_data_scaled.shape[0]/10))

subset_ids = numpy.matlib.repmat(subsets, 1, rep)

cal_data_scaled['subset_id'] = subset_ids[0]

cal_data_scaled.columns = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'A1', 'A2', 'A3', 'LAI', 'FAPAR', 'FCOVER', 'subset_id']

cal_data_scaled

Unnamed: 0,B0,B1,B2,B3,B4,B5,B6,B7,A1,A2,A3,LAI,FAPAR,FCOVER,subset_id
0,1.1600887942,1.1894959571,1.1005309674,-0.7574953553,-1.2136760897,-1.2551477212,-0.5123998591,0.0482701907,-1.5351947333,-1.2590753240,-0.7795178075,-1.5666989036,-1.8949148320,-1.6291084301,0
1,0.3768666595,1.6844505369,0.8279254564,-0.1275628723,-0.4502226101,-0.3312429285,0.9573384489,1.5671047307,1.3154921199,-1.2529678973,-0.6517039023,-1.5666989036,-1.4265332698,-1.3709908078,1
2,-0.8037611472,0.3964451147,-1.0070101806,-2.3196717910,-2.4729247349,-2.5265712936,-1.5494518415,-0.3972156885,-0.1884263875,-0.2249564670,0.8795852552,-1.5666989036,-2.9215301414,-2.8250917213,2
3,-0.7207048794,-0.0809646948,-0.6055016769,-1.8074281556,-2.0083566857,-1.9850791466,-1.1119666599,-0.3072120391,0.2770296632,0.4521095310,1.0999315055,-1.5666989036,-2.3479536034,-2.1457394865,3
4,-0.2372000684,3.6360876602,0.9907504054,-1.1015319421,-1.3732421509,-1.3790446724,1.1127770292,1.7252840412,1.3420921626,0.6946783564,-1.2162290029,-1.5666989036,-2.6707957989,-2.7086125584,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122875,-0.6297919312,-0.6652193518,-0.7137539294,-0.6235871705,-0.5368542767,-0.5172023756,-1.3917671005,-1.1518452670,-1.5081571622,0.8689131637,-1.2398669079,1.5666989036,0.7236507314,0.6517829598,12283
122876,0.0931071377,0.2412806393,0.0354813671,-0.6453889716,-0.6714434932,-0.6273779690,-0.6983934776,-0.1362434958,-1.7063520463,1.0070079804,1.1884027264,1.5666989036,0.0154489326,-0.0700384081,12284
122877,-0.6857716627,-0.2526146846,-0.5985820716,-1.0496655348,-0.9756683152,-1.0505215854,-1.2394819780,-0.7913317214,-0.5801855863,0.9589002217,-1.3116513655,1.5666989036,0.0670712121,-0.0278080201,12285
122878,-0.9704468859,-0.6166997679,-1.0844430668,0.2860627172,0.8130774569,0.6806459689,-0.5911033299,-1.0488424022,-1.7832601421,0.7151989067,-1.1648991929,1.5666989036,0.9078388454,0.8745459430,12286


In [9]:
# Sample data to create reference database # 

ref_data = cal_data_scaled.sample(n=100, ignore_index=False)

ref_data

Unnamed: 0,B0,B1,B2,B3,B4,B5,B6,B7,A1,A2,A3,LAI,FAPAR,FCOVER,subset_id
102073,1.9146009090,-0.1862052050,1.7241164684,2.3767153325,2.2268243574,2.0591199297,2.4125373569,1.5603638760,-0.4744403059,-1.2323017149,0.6977375763,1.2185435917,0.7630195241,0.8406327655,3769
17004,-0.0763754017,1.1140656030,0.1105349933,-1.1713620800,-1.4569924447,-1.5275102058,-0.4993412985,0.7516432092,-1.3477029522,0.8589452331,-1.2384316309,-1.2185435917,-1.7339379604,-1.7102364455,4716
49708,-0.9504099732,0.4991663697,-0.9548524507,-1.8771910290,-1.8251159881,-1.7944767916,-1.0828814035,-0.1051855618,1.2872533361,-0.1726569257,0.9098448954,-0.1740776560,-1.2115522266,-1.3718761225,556
84093,-1.6407370008,-0.9235291785,-1.6927726350,-1.7923798713,-1.4208255645,-1.2931168511,-1.2426967980,-0.8166900001,0.4140462440,1.0943275794,1.1945304013,0.5222329679,-0.2717084160,-0.3348812077,10365
36098,-1.3342160431,-0.3624892266,-1.1737580192,-1.9147389853,-2.0035179667,-2.0449652419,-1.6404210731,-0.9967598303,1.0885045323,0.7622158810,1.2063688995,-0.8703882798,-1.6175139580,-1.5802306350,11522
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80164,-0.0435949017,-0.6424506560,-0.0577564331,-0.0170579551,0.0489140345,0.1989454807,0.4848629451,0.3540625153,-0.0352902811,-1.8000892737,-0.6796668395,0.5222329679,0.4479898205,0.4481998822,6436
45654,0.0158531939,-0.2537003267,0.0217738801,0.6599563902,0.6555693385,0.7061803276,-0.4142529265,-0.6008976848,-0.2538632845,-0.3368365924,-0.9036086720,-0.5222329679,0.5947502241,0.5803605433,8790
81057,-1.1070655784,-0.9017168881,-1.1868517804,-0.5983490452,-0.2770009517,-0.2848381918,-0.8920790272,-0.9822944434,0.8417497075,-1.5153212975,-0.7915802401,0.5222329679,0.6114068686,0.5666941539,7329
35695,-0.7018796164,0.1705636029,-0.5347302577,0.3885734665,0.6627136887,0.6947705265,0.5865768430,-0.0609915354,0.9734603381,0.9318147042,1.0808281880,-0.8703882798,0.4112345089,0.3761518110,11119


In [10]:
# List of indices to remove from the calibration database #

index_list = ref_data.index.values

In [11]:
# Resets the indices in the reference data so that they start from zero

ref_data = ref_data.reset_index(drop=True)

In [12]:
# Removes the indices from calibration database that are in the reference database #

cal_data_scaled = cal_data_scaled.drop(index_list)

In [13]:
# Resets the indices in the calibration data so that they start from zero

cal_data_scaled = cal_data_scaled.reset_index(drop=True)

In [14]:
# Creates the training and validation sets from the calibration data

features_training, features_valid = sklearn.model_selection.train_test_split(cal_data_scaled, test_size=0.3, train_size=0.7, random_state=None, shuffle=True, stratify=None)

In [15]:
# Resets the indices in the training data so that they start from zero

features_training = features_training.reset_index(drop=True)

In [16]:
# Resets the indices in the validation data so that they start from zero

features_valid = features_valid.reset_index(drop=True)

In [17]:
# Extracts the LAI, FAPAR, and FCOVER data to be used for training

LAI_feature_training = features_training['LAI']
FAPAR_feature_training = features_training['FAPAR']
FCOVER_feature_training = features_training['FCOVER']

In [18]:
# Extracts the LAI, FAPAR, and FCOVER data to be used for validation

LAI_feature_valid = features_valid['LAI']
FAPAR_feature_valid = features_valid['FAPAR']
FCOVER_feature_valid = features_valid['FCOVER']

In [19]:
# Removes that isn't needed for training and validation 
features_training = features_training.drop(['LAI', 'FAPAR', 'FCOVER','subset_id'], axis=1)
features_valid = features_valid.drop(['LAI', 'FAPAR', 'FCOVER','subset_id'], axis=1)

In [20]:
# Creates a model for LAI, FAPAR, and FCOVER using LARs regression 

LAI_feature_model = sklearn.linear_model.Lars(n_nonzero_coefs=4)
LAI_feature_model = LAI_feature_model.fit(features_training, LAI_feature_training)

FAPAR_feature_model = sklearn.linear_model.Lars(n_nonzero_coefs=4)
FAPAR_feature_model = FAPAR_feature_model.fit(features_training, FAPAR_feature_training)

FCOVER_feature_model = sklearn.linear_model.Lars(n_nonzero_coefs=3)
FCOVER_feature_model = FCOVER_feature_model.fit(features_training, FCOVER_feature_training)

In [21]:
# Makes predictions on the validation data using the LARS models

LAI_feature_predicted = pandas.Series(LAI_feature_model.predict(features_valid))
FAPAR_feature_predicted = pandas.Series(FAPAR_feature_model.predict(features_valid))
FCOVER_feature_predicted = pandas.Series(FCOVER_feature_model.predict(features_valid))

In [22]:
# Extracts the features from the LARS Model

LAI_features = numpy.nonzero(LAI_feature_model.coef_)[0]
FAPAR_features = numpy.nonzero(FAPAR_feature_model.coef_)[0]
FCOVER_features = numpy.nonzero(FCOVER_feature_model.coef_)[0]

LAI_features = features_valid.columns[LAI_features]
print(LAI_features)
FAPAR_features = features_valid.columns[FAPAR_features]
print(FAPAR_features)
FCOVER_features = features_valid.columns[FCOVER_features]
print(FCOVER_features)

Index(['B1', 'B5', 'B6', 'B7'], dtype='object')
Index(['B1', 'B2', 'B5', 'B7'], dtype='object')
Index(['B1', 'B5', 'B7'], dtype='object')


In [23]:
# Creates arrays containing the calibration and reference data 

ref_array = numpy.array(ref_data[LAI_features])

cal_array = numpy.array(cal_data_scaled[LAI_features])

In [24]:
# Calls function from sci-kit learn for calculating the euclidean distance 

dist = DistanceMetric.get_metric('euclidean')

In [25]:
# Calculates the probability for each sample in the calibration data

probs = numpy.exp(-numpy.amin(dist.pairwise(cal_array,ref_array),1))

In [26]:
# Initialize the probability column in the calibration database #

cal_data_scaled['prob'] = probs 

cal_data_scaled.columns = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'A1', 'A2', 'A3', 'LAI', 'FAPAR', 'FCOVER', 'subset_id', 'prob']

In [27]:
cal_data_scaled

Unnamed: 0,B0,B1,B2,B3,B4,B5,B6,B7,A1,A2,A3,LAI,FAPAR,FCOVER,subset_id,prob
0,1.1600887942,1.1894959571,1.1005309674,-0.7574953553,-1.2136760897,-1.2551477212,-0.5123998591,0.0482701907,-1.5351947333,-1.2590753240,-0.7795178075,-1.5666989036,-1.8949148320,-1.6291084301,0,0.6346613590
1,0.3768666595,1.6844505369,0.8279254564,-0.1275628723,-0.4502226101,-0.3312429285,0.9573384489,1.5671047307,1.3154921199,-1.2529678973,-0.6517039023,-1.5666989036,-1.4265332698,-1.3709908078,1,0.3981959296
2,-0.8037611472,0.3964451147,-1.0070101806,-2.3196717910,-2.4729247349,-2.5265712936,-1.5494518415,-0.3972156885,-0.1884263875,-0.2249564670,0.8795852552,-1.5666989036,-2.9215301414,-2.8250917213,2,0.3978523439
3,-0.7207048794,-0.0809646948,-0.6055016769,-1.8074281556,-2.0083566857,-1.9850791466,-1.1119666599,-0.3072120391,0.2770296632,0.4521095310,1.0999315055,-1.5666989036,-2.3479536034,-2.1457394865,3,0.5390208897
4,-0.2372000684,3.6360876602,0.9907504054,-1.1015319421,-1.3732421509,-1.3790446724,1.1127770292,1.7252840412,1.3420921626,0.6946783564,-1.2162290029,-1.5666989036,-2.6707957989,-2.7086125584,4,0.7186840286
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122775,-0.6297919312,-0.6652193518,-0.7137539294,-0.6235871705,-0.5368542767,-0.5172023756,-1.3917671005,-1.1518452670,-1.5081571622,0.8689131637,-1.2398669079,1.5666989036,0.7236507314,0.6517829598,12283,0.7949099907
122776,0.0931071377,0.2412806393,0.0354813671,-0.6453889716,-0.6714434932,-0.6273779690,-0.6983934776,-0.1362434958,-1.7063520463,1.0070079804,1.1884027264,1.5666989036,0.0154489326,-0.0700384081,12284,0.6373138791
122777,-0.6857716627,-0.2526146846,-0.5985820716,-1.0496655348,-0.9756683152,-1.0505215854,-1.2394819780,-0.7913317214,-0.5801855863,0.9589002217,-1.3116513655,1.5666989036,0.0670712121,-0.0278080201,12285,0.7139420826
122778,-0.9704468859,-0.6166997679,-1.0844430668,0.2860627172,0.8130774569,0.6806459689,-0.5911033299,-1.0488424022,-1.7832601421,0.7151989067,-1.1648991929,1.5666989036,0.9078388454,0.8745459430,12286,0.7398517162


In [28]:
# Function to normalize the probabilities 

def normalize(data):
    sum_prob = sum(data['prob'])
    data['prob'] = data['prob']/sum_prob
    return data

In [29]:
normalize(cal_data_scaled)

Unnamed: 0,B0,B1,B2,B3,B4,B5,B6,B7,A1,A2,A3,LAI,FAPAR,FCOVER,subset_id,prob
0,1.1600887942,1.1894959571,1.1005309674,-0.7574953553,-1.2136760897,-1.2551477212,-0.5123998591,0.0482701907,-1.5351947333,-1.2590753240,-0.7795178075,-1.5666989036,-1.8949148320,-1.6291084301,0,0.0000077210
1,0.3768666595,1.6844505369,0.8279254564,-0.1275628723,-0.4502226101,-0.3312429285,0.9573384489,1.5671047307,1.3154921199,-1.2529678973,-0.6517039023,-1.5666989036,-1.4265332698,-1.3709908078,1,0.0000048443
2,-0.8037611472,0.3964451147,-1.0070101806,-2.3196717910,-2.4729247349,-2.5265712936,-1.5494518415,-0.3972156885,-0.1884263875,-0.2249564670,0.8795852552,-1.5666989036,-2.9215301414,-2.8250917213,2,0.0000048401
3,-0.7207048794,-0.0809646948,-0.6055016769,-1.8074281556,-2.0083566857,-1.9850791466,-1.1119666599,-0.3072120391,0.2770296632,0.4521095310,1.0999315055,-1.5666989036,-2.3479536034,-2.1457394865,3,0.0000065575
4,-0.2372000684,3.6360876602,0.9907504054,-1.1015319421,-1.3732421509,-1.3790446724,1.1127770292,1.7252840412,1.3420921626,0.6946783564,-1.2162290029,-1.5666989036,-2.6707957989,-2.7086125584,4,0.0000087432
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122775,-0.6297919312,-0.6652193518,-0.7137539294,-0.6235871705,-0.5368542767,-0.5172023756,-1.3917671005,-1.1518452670,-1.5081571622,0.8689131637,-1.2398669079,1.5666989036,0.7236507314,0.6517829598,12283,0.0000096705
122776,0.0931071377,0.2412806393,0.0354813671,-0.6453889716,-0.6714434932,-0.6273779690,-0.6983934776,-0.1362434958,-1.7063520463,1.0070079804,1.1884027264,1.5666989036,0.0154489326,-0.0700384081,12284,0.0000077532
122777,-0.6857716627,-0.2526146846,-0.5985820716,-1.0496655348,-0.9756683152,-1.0505215854,-1.2394819780,-0.7913317214,-0.5801855863,0.9589002217,-1.3116513655,1.5666989036,0.0670712121,-0.0278080201,12285,0.0000086855
122778,-0.9704468859,-0.6166997679,-1.0844430668,0.2860627172,0.8130774569,0.6806459689,-0.5911033299,-1.0488424022,-1.7832601421,0.7151989067,-1.1648991929,1.5666989036,0.9078388454,0.8745459430,12286,0.0000090007


In [30]:
# Initialize the probability column in the calibration database #

cal_data_scaled.columns = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'A1', 'A2', 'A3', 'LAI', 'FAPAR', 'FCOVER', 'subset_id', 'prob']

In [31]:
# Assign max probability in each subset to every member of that subset 

cal_data_scaled['prob'] = cal_data_scaled.groupby('subset_id')['prob'].transform('max')

In [32]:
normalize(cal_data_scaled)

Unnamed: 0,B0,B1,B2,B3,B4,B5,B6,B7,A1,A2,A3,LAI,FAPAR,FCOVER,subset_id,prob
0,1.1600887942,1.1894959571,1.1005309674,-0.7574953553,-1.2136760897,-1.2551477212,-0.5123998591,0.0482701907,-1.5351947333,-1.2590753240,-0.7795178075,-1.5666989036,-1.8949148320,-1.6291084301,0,0.0000076834
1,0.3768666595,1.6844505369,0.8279254564,-0.1275628723,-0.4502226101,-0.3312429285,0.9573384489,1.5671047307,1.3154921199,-1.2529678973,-0.6517039023,-1.5666989036,-1.4265332698,-1.3709908078,1,0.0000077760
2,-0.8037611472,0.3964451147,-1.0070101806,-2.3196717910,-2.4729247349,-2.5265712936,-1.5494518415,-0.3972156885,-0.1884263875,-0.2249564670,0.8795852552,-1.5666989036,-2.9215301414,-2.8250917213,2,0.0000075626
3,-0.7207048794,-0.0809646948,-0.6055016769,-1.8074281556,-2.0083566857,-1.9850791466,-1.1119666599,-0.3072120391,0.2770296632,0.4521095310,1.0999315055,-1.5666989036,-2.3479536034,-2.1457394865,3,0.0000083298
4,-0.2372000684,3.6360876602,0.9907504054,-1.1015319421,-1.3732421509,-1.3790446724,1.1127770292,1.7252840412,1.3420921626,0.6946783564,-1.2162290029,-1.5666989036,-2.6707957989,-2.7086125584,4,0.0000076371
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122775,-0.6297919312,-0.6652193518,-0.7137539294,-0.6235871705,-0.5368542767,-0.5172023756,-1.3917671005,-1.1518452670,-1.5081571622,0.8689131637,-1.2398669079,1.5666989036,0.7236507314,0.6517829598,12283,0.0000082484
122776,0.0931071377,0.2412806393,0.0354813671,-0.6453889716,-0.6714434932,-0.6273779690,-0.6983934776,-0.1362434958,-1.7063520463,1.0070079804,1.1884027264,1.5666989036,0.0154489326,-0.0700384081,12284,0.0000076506
122777,-0.6857716627,-0.2526146846,-0.5985820716,-1.0496655348,-0.9756683152,-1.0505215854,-1.2394819780,-0.7913317214,-0.5801855863,0.9589002217,-1.3116513655,1.5666989036,0.0670712121,-0.0278080201,12285,0.0000083799
122778,-0.9704468859,-0.6166997679,-1.0844430668,0.2860627172,0.8130774569,0.6806459689,-0.5911033299,-1.0488424022,-1.7832601421,0.7151989067,-1.1648991929,1.5666989036,0.9078388454,0.8745459430,12286,0.0000085706


In [33]:
# Find median probability 

median_prob = numpy.median(cal_data_scaled['prob'])

print(median_prob)

8.259931182194922e-06


In [34]:
# Set weight that are below the median to zero 

cal_data_scaled['prob'] = cal_data_scaled['prob'].where(cal_data_scaled['prob'] > median_prob, 0.0)

In [35]:
cal_data_scaled

Unnamed: 0,B0,B1,B2,B3,B4,B5,B6,B7,A1,A2,A3,LAI,FAPAR,FCOVER,subset_id,prob
0,1.1600887942,1.1894959571,1.1005309674,-0.7574953553,-1.2136760897,-1.2551477212,-0.5123998591,0.0482701907,-1.5351947333,-1.2590753240,-0.7795178075,-1.5666989036,-1.8949148320,-1.6291084301,0,0.0000000000
1,0.3768666595,1.6844505369,0.8279254564,-0.1275628723,-0.4502226101,-0.3312429285,0.9573384489,1.5671047307,1.3154921199,-1.2529678973,-0.6517039023,-1.5666989036,-1.4265332698,-1.3709908078,1,0.0000000000
2,-0.8037611472,0.3964451147,-1.0070101806,-2.3196717910,-2.4729247349,-2.5265712936,-1.5494518415,-0.3972156885,-0.1884263875,-0.2249564670,0.8795852552,-1.5666989036,-2.9215301414,-2.8250917213,2,0.0000000000
3,-0.7207048794,-0.0809646948,-0.6055016769,-1.8074281556,-2.0083566857,-1.9850791466,-1.1119666599,-0.3072120391,0.2770296632,0.4521095310,1.0999315055,-1.5666989036,-2.3479536034,-2.1457394865,3,0.0000083298
4,-0.2372000684,3.6360876602,0.9907504054,-1.1015319421,-1.3732421509,-1.3790446724,1.1127770292,1.7252840412,1.3420921626,0.6946783564,-1.2162290029,-1.5666989036,-2.6707957989,-2.7086125584,4,0.0000000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122775,-0.6297919312,-0.6652193518,-0.7137539294,-0.6235871705,-0.5368542767,-0.5172023756,-1.3917671005,-1.1518452670,-1.5081571622,0.8689131637,-1.2398669079,1.5666989036,0.7236507314,0.6517829598,12283,0.0000000000
122776,0.0931071377,0.2412806393,0.0354813671,-0.6453889716,-0.6714434932,-0.6273779690,-0.6983934776,-0.1362434958,-1.7063520463,1.0070079804,1.1884027264,1.5666989036,0.0154489326,-0.0700384081,12284,0.0000000000
122777,-0.6857716627,-0.2526146846,-0.5985820716,-1.0496655348,-0.9756683152,-1.0505215854,-1.2394819780,-0.7913317214,-0.5801855863,0.9589002217,-1.3116513655,1.5666989036,0.0670712121,-0.0278080201,12285,0.0000083799
122778,-0.9704468859,-0.6166997679,-1.0844430668,0.2860627172,0.8130774569,0.6806459689,-0.5911033299,-1.0488424022,-1.7832601421,0.7151989067,-1.1648991929,1.5666989036,0.9078388454,0.8745459430,12286,0.0000085706


In [36]:
# Creates the training and validation sets from the calibration data

training_data, valid_data = sklearn.model_selection.train_test_split(cal_data_scaled, test_size=0.3, train_size=0.7, random_state=None, shuffle=True, stratify=None)

In [37]:
# Resets the indices in the training data so that they start from zero

training_data = training_data.reset_index(drop=True)

In [38]:
# Resets the indices in the validation data so that they start from zero

valid_data = valid_data.reset_index(drop=True)

In [39]:
# Extracts the LAI, FAPAR, and FCOVER data to be used for training

LAI_training = training_data['LAI']
FAPAR_training = training_data['FAPAR']
FCOVER_training = training_data['FCOVER']

In [40]:
# Extracts the LAI, FAPAR, and FCOVER data to be used for validation

LAI_valid = valid_data['LAI']
FAPAR_valid = valid_data['FAPAR']
FCOVER_valid = valid_data['FCOVER']

In [41]:
# Extracts the probabilites for training and validation 

training_weights = numpy.array(training_data['prob'])
valid_weights = numpy.array(valid_data['prob'])

In [42]:
gee = ['cosVZA', 'cosSZA', 'cosRAA', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8A', 'B11', 'B12']
#MatLab = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'A1', 'A2', 'A3']
MatLab = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
LAI_inputs = ['B3','B4', 'B5', 'B6', 'B7', 'B8A', 'B11', 'B12','cosVZA', 'cosSZA', 'cosRAA']

In [43]:
# Removes that isn't needed for training and validation 
# Explicitly subset the inputs
training_data = training_data[MatLab]
valid_data = valid_data[MatLab]

In [44]:
training_data

Unnamed: 0,B0,B1,B2,B3,B4,B5,B6,B7
0,-0.4262148424,-0.4796614504,-0.5716884747,-1.0730592345,-1.2290790031,-1.2840944262,-1.7172511418,-1.1385682059
1,-1.4096700439,-0.7124454641,-1.4879335880,-1.6159175756,-1.2559328884,-1.2405576255,-1.0001168402,-0.5180820854
2,1.4426641698,-0.1307646590,1.4210110228,0.9454059416,0.4755168154,0.4998250419,1.0506572400,0.5023795596
3,1.5846958372,-0.1897979553,1.6089934563,1.8946383950,1.7224476885,1.9114265520,3.0669145690,2.3034794989
4,-0.7812638314,0.2126152778,-0.7747040671,-1.4646867552,-1.6363231867,-1.6999644423,-1.2027011255,-0.4463152739
...,...,...,...,...,...,...,...,...
85941,0.6265828677,-0.4189754998,0.5584090341,0.5622087303,0.3013928585,0.2688971650,-0.1747625865,-0.5446638801
85942,-1.0531123085,-0.3559709367,-1.1908864669,-0.4668617114,-0.1308390294,-0.2069432346,0.6878868837,0.4920887664
85943,0.1778350299,0.4756714763,0.3717384904,-0.8355182407,-1.0703734360,-1.0454599183,-1.1596903717,-0.7089431599
85944,-0.5055289226,-0.3658727389,-0.5977826077,-1.4575826766,-1.7287754850,-1.7885332405,-0.7823833631,-0.3015723717


In [45]:
LAI_callback = tensorflow.keras.callbacks.EarlyStopping(monitor='loss', patience=3)

In [46]:
# Creation of the Neural Network models for LAI, FAPAR, and FCOVER 

LAI_model = tensorflow.keras.models.Sequential([
    tensorflow.keras.layers.Dense(10, activation=tensorflow.nn.relu, 
                                  input_shape=[len(training_data.keys())]),
    tensorflow.keras.layers.Dense(10, activation=tensorflow.nn.relu),
    tensorflow.keras.layers.Dense(1)
])

LAI_model.compile(
    optimizer=tensorflow.keras.optimizers.Nadam(),
    loss='mse',
    metrics=['mse', 'mae'])

In [47]:
# Runs NN Model for LAI

LAI_history = LAI_model.fit(x = numpy.array(training_data), y = numpy.array(LAI_training), 
                            sample_weight = training_weights,
                            epochs = 120,
                            validation_data = (numpy.array(valid_data), numpy.array(LAI_valid), valid_weights),
                            callbacks=[LAI_callback]
                           )

Train on 85946 samples, validate on 36834 samples
Epoch 1/120
Epoch 2/120
Epoch 3/120
Epoch 4/120
Epoch 5/120
Epoch 6/120
Epoch 7/120
Epoch 8/120
Epoch 9/120
Epoch 10/120
Epoch 11/120
Epoch 12/120
Epoch 13/120
Epoch 14/120
Epoch 15/120
Epoch 16/120
Epoch 17/120
Epoch 18/120
Epoch 19/120
Epoch 20/120
Epoch 21/120
Epoch 22/120
Epoch 23/120
Epoch 24/120
Epoch 25/120
Epoch 26/120
Epoch 27/120
Epoch 28/120
Epoch 29/120
Epoch 30/120
Epoch 31/120
Epoch 32/120
Epoch 33/120
Epoch 34/120
Epoch 35/120
Epoch 36/120
Epoch 37/120
Epoch 38/120
Epoch 39/120
Epoch 40/120
Epoch 41/120
Epoch 42/120
Epoch 43/120
Epoch 44/120
Epoch 45/120
Epoch 46/120
Epoch 47/120
Epoch 48/120
Epoch 49/120
Epoch 50/120
Epoch 51/120
Epoch 52/120
Epoch 53/120
Epoch 54/120
Epoch 55/120
Epoch 56/120
Epoch 57/120
Epoch 58/120
Epoch 59/120
Epoch 60/120
Epoch 61/120
Epoch 62/120
Epoch 63/120
Epoch 64/120
Epoch 65/120
Epoch 66/120
Epoch 67/120
Epoch 68/120
Epoch 69/120
Epoch 70/120
Epoch 71/120
Epoch 72/120
Epoch 73/120
Epoch 74/1

In [48]:
LAI_model.save('./models/LAI_models_no_angles')

# Save LAI mean and std to a CSV

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: ./models/LAI_models_no_angles/assets


In [49]:
print(max(cal_data_scaled['A1']))
print(min(cal_data_scaled['A1']))

1.3485388024357525
-2.1061824803859754


In [50]:
print(max(cal_data_scaled['A2']))
print(min(cal_data_scaled['A2']))

1.3984151512355845
-2.482717431098018


In [51]:
print(max(cal_data_scaled['A3']))
print(min(cal_data_scaled['A3']))

1.2846023306530838
-1.336023951152343
