# Apply ICE CREAMS Model to Labelled Validation

In [29]:
from fastai.tabular.all import load_learner
import pandas as pd

This script uses a large labelled dataset created to validate ICE CREAMS. It consists of a selection of labelled sentinel-2 pixels from across Europe and across the classes the ICE CREAMS predicts. This script focuses on Magniolopsida (Seagrass) and has a cut off of 0.246 NDVI (equivalent to 20 % Seagrass cover). First we load the pickled model, then we load the labelled validation data. We apply the model to the labelled data and then compare the the ratios and numbers of True Positive (TP), True Negative (TN), False Positive (FP) and False Negative (FN). These are then used to calculate accuracy and F1 scores. 

In [23]:
learn = load_learner('ICECREAMS_V1_1.pkl')

If you only need to load model weights and optimizer state, use the safe `Learner.load` instead.
  warn("load_learner` uses Python's insecure pickle module, which can execute malicious arbitrary code when loading. Only load files you trust.\nIf you only need to load model weights and optimizer state, use the safe `Learner.load` instead.")


In [3]:
df_test = pd.read_csv('Data/Input/Validation/validation_Europe_LabelledS2.csv', low_memory=False,sep=",")

In [4]:
df_test

Unnamed: 0,x,y,Label_Char,Label_Date,Image_ID,Image_Date,Lat_Long_EPSG,Label_Method,Reflectance_B02,Reflectance_B03,...,Reflectance_Stan_B05,Reflectance_Stan_B06,Reflectance_Stan_B07,Reflectance_Stan_B11,Reflectance_Stan_B12,Reflectance_Stan_B8A,Reflectance_Stan_B01,Reflectance_Stan_B09,NDVI,NDWI
0,562545,5200985,Bare Sediment,20180914,S2B_MSIL2A_20180909T110609_N0500_R137_T30TWT_20230801T163945,20180909,32630,PhotoQuadrats,646,807,...,0.745232,0.814714,0.882834,0.527248,0.032698,0.942779,0.000000,1.000000,0.149590,-0.163297
1,562545,5201005,Bare Sediment,20180914,S2B_MSIL2A_20180909T110609_N0500_R137_T30TWT_20230801T163945,20180909,32630,PhotoQuadrats,668,811,...,0.717984,0.821526,0.886921,0.470027,0.024523,0.967302,0.000000,1.000000,0.162602,-0.170332
2,562545,5201035,Magnoliopsida,20180914,S2B_MSIL2A_20180909T110609_N0500_R137_T30TWT_20230801T163945,20180909,32630,PhotoQuadrats,518,632,...,0.608871,0.776882,0.896505,0.426075,0.000000,0.970430,0.013441,1.000000,0.336504,-0.280182
3,562545,5201055,Magnoliopsida,20180914,S2B_MSIL2A_20180909T110609_N0500_R137_T30TWT_20230801T163945,20180909,32630,PhotoQuadrats,292,396,...,0.403125,0.696875,0.793750,0.493750,0.110417,0.921875,0.000000,1.000000,0.573487,-0.467742
4,562545,5201085,Magnoliopsida,20180914,S2B_MSIL2A_20180909T110609_N0500_R137_T30TWT_20230801T163945,20180909,32630,PhotoQuadrats,269,334,...,0.376840,0.753680,0.848871,0.484789,0.105005,0.982336,0.057900,1.000000,0.673995,-0.535466
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61157,539395,5237445,Magnoliopsida,20240918,S2B_MSIL2A_20240917T110619_N0511_R137_T30TWT_20240917T142353,20240917,32630,Expert,468,626,...,0.414113,0.798289,0.877406,0.210264,0.000000,0.965788,0.063435,0.904490,0.606572,-0.495974
61158,539395,5238055,Magnoliopsida,20240918,S2B_MSIL2A_20240917T110619_N0511_R137_T30TWT_20240917T142353,20240917,32630,Expert,565,785,...,0.643262,0.929078,0.970213,0.139716,0.000000,0.952482,0.135461,0.975887,0.510168,-0.370237
61159,539395,5238065,Magnoliopsida,20240918,S2B_MSIL2A_20240917T110619_N0511_R137_T30TWT_20240917T142353,20240917,32630,Expert,575,778,...,0.565452,0.912955,0.974359,0.155196,0.000000,0.996626,0.125506,1.000000,0.508030,-0.392662
61160,539395,5238075,Magnoliopsida,20240918,S2B_MSIL2A_20240917T110619_N0511_R137_T30TWT_20240917T142353,20240917,32630,Expert,565,767,...,0.550591,0.888962,0.948752,0.151117,0.000000,0.970434,0.122208,0.973719,0.554622,-0.413833


In [5]:
caselist=[(df_test.Label_Char!="Magnoliopsida", 'Absence'), 
          ((df_test.Label_Char=="Magnoliopsida") & (df_test.NDVI.lt(0.246)), 'Absence'), 
          (df_test.Label_Char=="Magnoliopsida", 'Presence')]

df_test=df_test.assign(Label_Char_PvA = df_test.Label_Char.case_when(caselist=caselist))

In [6]:
dl = learn.dls.test_dl(df_test, bs=4000)
preds,_ = learn.get_preds(dl=dl)

In [7]:
class_idxs = preds.argmax(axis=1)
res = [learn.dls.vocab[c] for c in class_idxs]

In [8]:
class_probs= preds.max(axis=1)
class_probs=class_probs.values

In [9]:
NumPred= class_idxs.tolist()

In [10]:
PredProbs =class_probs.tolist()

In [11]:
res_df= pd.DataFrame(list(zip(df_test['x'],
                              df_test['y'],
                              df_test['Label_Char_PvA'],
                              df_test['NDVI'],
                              res,
                              PredProbs)),
                     columns =['x','y','Label_Char_PvA','NDVI','Pred_Class','Prob'])
res_df

Unnamed: 0,x,y,Label_Char_PvA,NDVI,Pred_Class,Prob
0,562545,5200985,Absence,0.149590,Bare Sediment,0.867221
1,562545,5201005,Absence,0.162602,Bare Sediment,0.913725
2,562545,5201035,Presence,0.336504,Magnoliopsida,0.965743
3,562545,5201055,Presence,0.573487,Magnoliopsida,0.938045
4,562545,5201085,Presence,0.673995,Magnoliopsida,0.984415
...,...,...,...,...,...,...
61157,539395,5237445,Presence,0.606572,Chlorophyta,0.716153
61158,539395,5238055,Presence,0.510168,Chlorophyta,0.988131
61159,539395,5238065,Presence,0.508030,Chlorophyta,0.990363
61160,539395,5238075,Presence,0.554622,Chlorophyta,0.978681


In [12]:
res_df.groupby(['Label_Char_PvA','Pred_Class'])['x'].count()

Label_Char_PvA  Pred_Class       
Absence         Bare Sand             5672
                Bare Sediment        17166
                Chlorophyta            275
                Magnoliopsida         7068
                Microphytobenthos    11233
                Phaeophyceae          4685
                Water                 4475
                Xanthophyceae          778
Presence        Bare Sediment           33
                Chlorophyta           2561
                Magnoliopsida         6438
                Microphytobenthos      312
                Phaeophyceae           465
                Xanthophyceae            1
Name: x, dtype: int64

In [13]:
caselist=[(res_df.Pred_Class!="Magnoliopsida", 'Absence'), 
          ((res_df.Pred_Class=="Magnoliopsida") & (res_df.NDVI.lt(0.246)), 'Absence'), 
          (res_df.Pred_Class=="Magnoliopsida", 'Presence')]
res_df=res_df.assign(Pred_Class_PvA = res_df.Pred_Class.case_when(caselist=caselist))

In [14]:
Results=res_df.groupby(['Label_Char_PvA','Pred_Class_PvA'])['x'].count().reset_index()

In [15]:
#Results.to_csv("Validation_PvA_Seagrass_61162Pixels_V1_1.csv")

In [16]:
TP=Results.query("Label_Char_PvA == 'Presence' and Pred_Class_PvA == 'Presence'")[['x']].iloc[0]
TN=Results.query("Label_Char_PvA == 'Absence' and Pred_Class_PvA == 'Absence'")[['x']].iloc[0]
FP=Results.query("Label_Char_PvA == 'Absence' and Pred_Class_PvA == 'Presence'")[['x']].iloc[0]
FN=Results.query("Label_Char_PvA == 'Presence' and Pred_Class_PvA == 'Absence'")[['x']].iloc[0]
Precision=TP/(TP+FP)
Recall=TP/(TP+FN)
F1=(2*Precision*Recall)/(Precision+Recall)

In [17]:
Accuracy=((TP+TN)/(TP+TN+FN+FP))*100

In [18]:
Accuracy

x    87.526569
dtype: float64

In [19]:
F1

x    0.627944
dtype: float64

In [20]:
Precision

x    0.601964
dtype: float64

In [21]:
Recall

x    0.656269
dtype: float64