In [11]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [12]:
# 
cwd = "..\\..\\"
cwd_Images_Raw = cwd + "\\Sentinel-2 Images Raw"
cwd_Images_Processed = cwd + "\\Sentinel-2 Images Processed"
cwd_Images_Results = cwd + "\\Results"

In [13]:
df_HI = pd.read_excel(cwd + "\\Results\\Final (101 Sites) HI, Roman, Info.xlsx", sheet_name="Homogeneity", header = 1)
df_Roman = pd.read_excel(cwd + "\\Results\\Final (101 Sites) HI, Roman, Info.xlsx", sheet_name="Representativeness")
df = pd.merge(df_Roman,df_HI[['Site','CV 900','Sill 900']],on='Site')
df = df[['Site','Spatial Representativeness','RAW Score','ST Score','Roman Metrics','CV 900','Sill 900']].sort_values(['Site','Spatial Representativeness']).reset_index(drop = True)
df.head()

Unnamed: 0,Site,Spatial Representativeness,RAW Score,ST Score,Roman Metrics,CV 900,Sill 900
0,AT-Mmg,100-300,9.436081,0.676006,RAW,0.065007,5.14696
1,AT-Mmg,100-600,27.656817,0.433034,RAW,0.065007,5.14696
2,AT-Mmg,100-900,5.195374,0.259116,RAW,0.065007,5.14696
3,AT-Mmg,300-600,13.563862,0.734345,RAW,0.065007,5.14696
4,AT-Mmg,300-900,3.173046,0.466587,RAW,0.065007,5.14696


In [14]:
df_thresholds = pd.read_excel(os.path.join(cwd_Images_Results,"SP - AIO Thresholds V2.xlsx"))
df_thresholds

Unnamed: 0,ROI,RAW,ST
0,100-300,0.8,0.65
1,100-600,0.6,0.5
2,100-900,0.55,0.45
3,300-600,3.0,0.75
4,300-900,1.25,0.5


In [15]:
RAW_100_300 = df_thresholds['RAW'][0]
RAW_100_600 = df_thresholds['RAW'][1]
RAW_100_900 = df_thresholds['RAW'][2]
RAW_300_600 = df_thresholds['RAW'][3]
RAW_300_900 = df_thresholds['RAW'][4]
ST_100_300 = df_thresholds['ST'][0]
ST_100_600 = df_thresholds['ST'][1]
ST_100_900 = df_thresholds['ST'][2]
ST_300_600 = df_thresholds['ST'][3]
ST_300_900 = df_thresholds['ST'][4]

In [16]:
list_sites = list(df['Site'].unique())
list_CV = list(df['CV 900'].unique())
list_sill = list(df['Sill 900'].unique())

In [17]:
def threshold_check_goodfit(df):
    temp_list = []
    if df['RAW Score'][0] >= RAW_100_300 and df['ST Score'][0] >= ST_100_300:
        temp_list.append('100-300')
    if df['RAW Score'][1] >= RAW_100_600 and df['ST Score'][1] >= ST_100_600:
        temp_list.append('100-600')
    if df['RAW Score'][2] >= RAW_100_900 and df['ST Score'][2] >= ST_100_900:
        temp_list.append('100-900')
    if df['RAW Score'][3] >= RAW_300_600 and df['ST Score'][3] >= ST_300_600:
        temp_list.append('300-600')
    if df['RAW Score'][4] >= RAW_300_900 and df['ST Score'][4] >= ST_300_900:
        temp_list.append('300-900')
    return temp_list

In [18]:
def threshold_check_badfit(df):
    temp_list = []
    if df['RAW Score'][0] >= RAW_100_300:
        temp_list.append('100-300')
    if df['RAW Score'][1] >= RAW_100_600:
        temp_list.append('100-600')
    if df['RAW Score'][2] >= RAW_100_900:
        temp_list.append('100-900')
    if df['RAW Score'][3] >= RAW_300_600:
        temp_list.append('300-600')
    if df['RAW Score'][4] >= RAW_300_900:
        temp_list.append('300-900')
    return temp_list

In [20]:
list_homo_test = []
list_roman_metrics = []
list_check = []
list_RAW = []
list_ST = []
for i in range(len(list_sites)):
    temp_Site = list_sites[i]
    temp_CV = list_CV[i]
    temp_sill = list_CV[i]
    temp_df = df[df['Site'] == temp_Site].reset_index(drop = True)
    # Variogram Good Fit
    if temp_df.loc[0,'Roman Metrics'] == 'RAW or ST':
        list_roman_metrics.append('RAW or ST')
        temp_list = threshold_check_goodfit(temp_df)
        # Good homogeneity
        if temp_CV < 0.25 and temp_sill < 150:
            list_homo_test.append('Homogeneity')
            # Filter ROIs
            ## All lower than thresholds, not suitable
            if len(temp_list) == 0:
                list_check.append("Not suitable for validation!")
                list_RAW.append(None)
                list_ST.append(None)
            else:
                temp_df = temp_df[temp_df['Spatial Representativeness'].isin(temp_list)]
                temp_idMax_RAW = temp_df['RAW Score'].idxmax()
                temp_idMax_ST = temp_df['ST Score'].idxmax()
                ## Max RAW and max ST corresponds
                if temp_idMax_RAW == temp_idMax_ST:
                    temp_BestROI = temp_df['Spatial Representativeness'][temp_idMax_RAW]
                    list_check.append(temp_BestROI)
                    list_ST.append(temp_df['ST Score'][temp_idMax_ST])
                    list_RAW.append(temp_df['RAW Score'][temp_idMax_RAW])
                else:
                    ## Max RAW on a smaller ROI
                    if temp_idMax_RAW < temp_idMax_ST:
                        temp_BestROI_RAW = temp_df['Spatial Representativeness'][temp_idMax_RAW]
                        list_check.append(temp_BestROI_RAW)
                        list_ST.append(temp_df['ST Score'][temp_idMax_RAW])
                        list_RAW.append(temp_df['RAW Score'][temp_idMax_RAW])
                    ## Max ST on a smaller ROI
                    else:
                        temp_BestROI_ST = temp_df['Spatial Representativeness'][temp_idMax_ST]
                        list_check.append(temp_BestROI_ST)
                        list_ST.append(temp_df['ST Score'][temp_idMax_ST])
                        list_RAW.append(temp_df['RAW Score'][temp_idMax_ST])
        # Bad homogeneity
        else:
            list_homo_test.append('Heterogeneity')
            ## Not suitable
            if '300-600' not in temp_list and '300-900' not in temp_list:
                list_check.append("Not suitable for validation!")
                list_RAW.append(None)
                list_ST.append(None)
            ## 
            else:
                temp_df = temp_df[temp_df['Spatial Representativeness'].isin(temp_list)]
                temp_idMax_RAW = temp_df['RAW Score'].idxmax()
                temp_idMax_ST = temp_df['ST Score'].idxmax()
                # Only 300-600 present
                if '300-600' in temp_list and '300-900' not in temp_list:
                    temp_BestROI_ST = temp_df['Spatial Representativeness'][temp_idMax_ST]
                    list_check.append(temp_BestROI_ST)
                    list_ST.append(temp_df['ST Score'][temp_idMax_ST])
                    list_RAW.append(temp_df['RAW Score'][temp_idMax_ST])
                # Only 300-900 present
                elif '300-600' not in temp_list and '300-900' in temp_list:
                    temp_BestROI_ST = temp_df['Spatial Representativeness'][temp_idMax_ST]
                    list_check.append(temp_BestROI_ST)
                    list_ST.append(temp_df['ST Score'][temp_idMax_ST])
                    list_RAW.append(temp_df['RAW Score'][temp_idMax_ST])
                # Both present
                else:
                    ## Max RAW and max ST corresponds
                    if temp_idMax_RAW == temp_idMax_ST:
                        temp_BestROI = temp_df['Spatial Representativeness'][temp_idMax_RAW]
                        list_check.append(temp_BestROI)
                        list_ST.append(temp_df['ST Score'][temp_idMax_ST])
                        list_RAW.append(temp_df['RAW Score'][temp_idMax_RAW])
                    else:
                        ## Max RAW on a smaller ROI
                        if temp_idMax_RAW < temp_idMax_ST:
                            temp_BestROI_RAW = temp_df['Spatial Representativeness'][temp_idMax_RAW]
                            list_check.append(temp_BestROI_RAW)
                            list_ST.append(temp_df['ST Score'][temp_idMax_RAW])
                            list_RAW.append(temp_df['RAW Score'][temp_idMax_RAW])
                        ## Max ST on a smaller ROI
                        else:
                            temp_BestROI_ST = temp_df['Spatial Representativeness'][temp_idMax_ST]
                            list_check.append(temp_BestROI_ST)
                            list_ST.append(temp_df['ST Score'][temp_idMax_ST])
                            list_RAW.append(temp_df['RAW Score'][temp_idMax_ST])
    # Bad fit
    else:
        list_roman_metrics.append('RAW')
        temp_list = threshold_check_badfit(temp_df)
        # Good homogeneity
        if temp_CV < 0.25:
            list_homo_test.append('Homogeneity')
            # Filter ROIs
            ## All lower than thresholds, not suitable
            if len(temp_list) == 0:
                list_check.append("Not suitable for validation!")
                list_RAW.append(None)
                list_ST.append(None)
            else:
                temp_df = temp_df[temp_df['Spatial Representativeness'].isin(temp_list)]
                temp_idMax_RAW = temp_df['RAW Score'].idxmax()
                ## Max RAW
                temp_BestROI_RAW = temp_df['Spatial Representativeness'][temp_idMax_RAW]
                list_check.append(temp_BestROI_RAW)
                list_ST.append(None)
                list_RAW.append(temp_df['RAW Score'][temp_idMax_RAW])
        # Bad homogeneity
        else:
            list_homo_test.append('Heterogeneity')
            ## Not suitable
            if '300-600' not in temp_list and '300-900' not in temp_list:
                list_check.append("Not suitable for validation!")
                list_RAW.append(None)
                list_ST.append(None)
            ## 
            else:
                temp_df = temp_df[temp_df['Spatial Representativeness'].isin(temp_list)]
                temp_idMax_RAW = temp_df['RAW Score'].idxmax()
                # Only 300-600 present
                if '300-600' in temp_list and '300-900' not in temp_list:
                    temp_BestROI = temp_df['Spatial Representativeness'][temp_idMax_RAW]
                    list_check.append(temp_BestROI)
                    list_ST.append(None)
                    list_RAW.append(temp_df['RAW Score'][temp_idMax_RAW])
                # Only 300-900 present
                elif '300-600' not in temp_list and '300-900' in temp_list:
                    temp_BestROI= temp_df['Spatial Representativeness'][temp_idMax_RAW]
                    list_check.append(temp_BestROI)
                    list_ST.append(None)
                    list_RAW.append(temp_df['RAW Score'][temp_idMax_RAW])
                # Both present
                else:
                    # Directly select the ROI with the highest RAW Score
                    temp_idMax_RAW = temp_df['RAW Score'].idxmax()
                    temp_BestROI = temp_df['Spatial Representativeness'][temp_idMax_RAW]
                    list_check.append(temp_BestROI)
                    list_ST.append(None)
                    list_RAW.append(temp_df['RAW Score'][temp_idMax_RAW])
list_check

['100-600',
 '100-300',
 'Not suitable for validation!',
 '300-600',
 'Not suitable for validation!',
 '300-600',
 '300-600',
 '300-600',
 'Not suitable for validation!',
 '100-300',
 '300-600',
 '100-600',
 'Not suitable for validation!',
 '300-900',
 '100-300',
 'Not suitable for validation!',
 'Not suitable for validation!',
 'Not suitable for validation!',
 '300-600',
 '100-300',
 '300-600',
 '100-300',
 '300-600',
 '300-600',
 '300-600',
 '300-600',
 '300-600',
 'Not suitable for validation!',
 '300-600',
 '100-900',
 'Not suitable for validation!',
 'Not suitable for validation!',
 '300-600',
 '300-600',
 '300-900',
 '300-600',
 '100-300',
 'Not suitable for validation!',
 'Not suitable for validation!',
 '100-300',
 '100-300',
 'Not suitable for validation!',
 '100-300',
 'Not suitable for validation!',
 'Not suitable for validation!',
 'Not suitable for validation!',
 '300-600',
 '100-300',
 'Not suitable for validation!',
 '300-900',
 '300-900',
 '100-300',
 '300-600',
 '300-6

In [21]:
df_Final = pd.DataFrame({
    "Site": list_sites,
    "Homogeneity Test": list_homo_test,
    "Roman Metrics": list_roman_metrics,
    "Best ROI": list_check,
    "RAW Score": list_RAW,
    "ST Score": list_ST
})
df_Final

Unnamed: 0,Site,Homogeneity Test,Roman Metrics,Best ROI,RAW Score,ST Score
0,AT-Mmg,Homogeneity,RAW,100-600,27.656817,
1,ATGE,Heterogeneity,RAW or ST,100-300,4.855959,0.907602
2,ATLAS-Mohammed V,Heterogeneity,RAW or ST,Not suitable for validation!,,
3,ATLAS-Mohammed V New,Heterogeneity,RAW or ST,300-600,51.318989,0.799848
4,BASP,Heterogeneity,RAW or ST,Not suitable for validation!,,
...,...,...,...,...,...,...
96,US-OPE,Heterogeneity,RAW,Not suitable for validation!,,
97,US-SERC,Homogeneity,RAW or ST,100-300,4.158097,1.059717
98,US-SERC New,Homogeneity,RAW,Not suitable for validation!,,
99,Utqiagvik,Heterogeneity,RAW or ST,100-300,5.815504,0.865331


In [22]:
df_Final.to_csv(cwd_Images_Results + "\\Best ROI GJW.csv", index = False)