In [None]:
# Import the necessary packages and libraries #

import scipy
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy 
import pandas 
import sklearn
from sklearn import preprocessing
from sklearn import linear_model
import tensorflow
from tensorflow import keras 
import os
from scipy.stats import norm
from scipy.spatial import distance
from IPython.display import clear_output 

In [None]:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

In [None]:
# Import the MatLab data #

matlabData = sio.loadmat(file_name='./steps_2_to_5/s2_sl2p_uniform_10_replicates_sobol_prosail_inout.mat', variable_names=['Input', 'Output'])

In [None]:
# Extract the input and output data and put the data into a Pandas dataframe #

bands = pandas.DataFrame(data=matlabData['Input']['Rho_Toc'][0][0])
angles = pandas.DataFrame(data=matlabData['Input']['Angles'][0][0])
LAI = pandas.Series(data=matlabData['Output']['LAI'][0][0].flatten())
FAPAR = pandas.Series(data=matlabData['Output']['FAPAR'][0][0].flatten())
FCOVER = pandas.Series(data=matlabData['Output']['FCOVER'][0][0].flatten())

cal_data = pandas.concat([bands, angles, LAI, FAPAR, FCOVER], axis=1, join='outer')

cal_data.columns = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'A1', 'A2', 'A3', 'LAI', 'FAPAR', 'FCOVER']

cal_data

In [None]:
# Standardize the calibration data #

input_df = pandas.concat([bands, angles], axis=1, join='outer')

input_df.columns = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'A1', 'A2', 'A3']

input_df_centred = input_df - input_df.mean()

input_df_normed = input_df_centred * input_df_centred.pow(2).sum().pow(-0.5)

LAI_mean = LAI.mean()
FAPAR_mean = FAPAR.mean()
FCOVER_mean = FCOVER.mean()

LAI_centred = LAI.subtract(LAI_mean)
FAPAR_centred = FAPAR.subtract(FAPAR_mean)
FCOVER_centred = FCOVER.subtract(FCOVER_mean)

cal_data_scaled = pandas.concat([input_df_normed, LAI_centred, FAPAR_centred, FCOVER_centred], axis=1, join='outer')

cal_data_scaled.columns = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'A1', 'A2', 'A3', 'LAI', 'FAPAR', 'FCOVER']

cal_data_scaled

In [None]:
# Sample data to create reference database # 

ref_data = cal_data_scaled.sample(n=100, ignore_index=False)

ref_data

In [None]:
# List of indices to remove from the calibration database #

index_list = ref_data.index.values

In [None]:
ref_data = ref_data.reset_index(drop=True)

In [None]:
# Removes the indices from calibration database that are in the reference database #

cal_data_scaled = cal_data_scaled.drop(index_list)

In [None]:
cal_data_scaled = cal_data_scaled.reset_index(drop=True)

In [None]:
# Initialize the probability column in the calibration database #

cal_data_scaled['prob'] = 0

cal_data_scaled.columns = ['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'A1', 'A2', 'A3', 'LAI', 'FAPAR', 'FCOVER', 'prob']

cal_data_scaled

In [None]:
# Calculates the conditional probability for each sample in the calibration database #

for i, sample in cal_data_scaled.iterrows(): # Iterates through calibration data 
    list_of_prob = []
    
    clear_output(wait=True) # Used for tracking progress of for loop 
    
    print("Current progress:", numpy.round((i/cal_data_scaled.shape[0])*100,2), '%') # Displays progress of for loop 
    
    for k, row in ref_data.iterrows(): # Iterates through reference database 
        
        ref_vec = numpy.array(ref_data[['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7','LAI']].iloc[[k]])[0]
        
        cal_vec = numpy.array(cal_data_scaled[['B0', 'B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7','LAI']].iloc[[i]])[0]
        
        dist = distance.euclidean(ref_vec,cal_vec) # Computes the euclidean distance 
        con_prob = numpy.exp(-dist) # Computes the conditional probability 
        
        list_of_prob.append(con_prob)
        
    cal_data_scaled.at[i,'prob'] = max(list_of_prob) # Finds maximum probability 