# Reproducibility results

This notebook compute the errors on each of the patches downloaded with the script `reproducibility.py`. Therefore, before running this notebook, it is expected that the following code has been run:

```
mkdir reproducibility_results
python reproducibility.py DownloadAll --basepath reproducibility_results --method percentile
```

In [1]:
from ee_ipl_uv import local_image
from sklearn.metrics import accuracy_score,confusion_matrix
from glob import glob
import numpy as np

def compute_confusion(raster_img):
    band_names = raster_img.bandNames()

    cloud_score = raster_img.memmap[...,band_names.index("cloudscore")]
    fixedmask = raster_img.memmap[...,band_names.index("fixedmask")]
    fixedmask = np.ma.masked_array(fixedmask,np.isnan(fixedmask))
    fixedmask = np.int16((fixedmask == 192) | (fixedmask == 255))
    
    l8bqa = raster_img.memmap[...,band_names.index("BQA")]
    l8bqa = np.ma.masked_array(l8bqa,np.isnan(l8bqa))
    fmask = np.int16((l8bqa & (1 << 4)) != 0)

    cloud_score = np.ma.masked_array(cloud_score,np.isnan(cloud_score))
    
    mascara_inicial = np.isnan(raster_img.memmap[...,band_names.index("B2")])
    
    mascara = mascara_inicial | np.ma.getmaskarray(fixedmask) | np.ma.getmaskarray(cloud_score) | np.ma.getmaskarray(fmask)
    
    cm_multi = confusion_matrix(np.ma.getdata(fixedmask)[~mascara],
                                np.ma.getdata(cloud_score)[~mascara],labels=[0,1])
    cm_fmask = confusion_matrix(np.ma.getdata(fixedmask)[~mascara],
                                np.ma.getdata(fmask)[~mascara],labels=[0,1])
    return cm_multi,cm_fmask


raster_img = local_image.LocalImage.Load("reproducibility_results/LC80150312014226LGN00_009_006_percentile")

cm1,cm2 = compute_confusion(raster_img)

assert np.sum(cm1) == np.sum(cm2), "Same number of pixels taken into account"


In [2]:
def accuracy_score_confusion(cm):
    return float(np.sum(np.diag(cm))/np.sum(cm))

def commission_error(cm):
    """
    It is the False Positive Rate: False_positive/total_false.
    It is also known as type I error
    cm[i,j] known to be i that are predicted j
    """
    total_false = np.sum(cm[0,:])
    false_positive = cm[0,1]
    return float(false_positive/total_false)

def omission_error(cm):
    """
    It is: 1- True Positive Rate: True_positive/total_true
    It is also known as type II error
    cm[i,j] known to be i that are predicted j
    """
    total_true = np.sum(cm[1,:])
    false_negative = cm[1,0]
    return float(false_negative/total_true)


In [3]:
import os

files_read = glob("reproducibility_results/*_percentile")

cm_multi = np.zeros((2,2))
cm_fmask = np.zeros((2,2))

i = 0
for f in files_read:    
    if not os.path.exists(os.path.join(f,"raster.tif")):
        print("file %s does not exists"%f)
        continue
    i+=1
    raster_img = local_image.LocalImage.Load(f)
    cm1,cm2 = compute_confusion(raster_img)
    assert np.sum(cm1) == np.sum(cm2), "Same number of pixels taken into account"
    print("%d/%d %s %.4f %.4f"%(i,len(files_read),f,accuracy_score_confusion(cm1),
                               accuracy_score_confusion(cm2)))
    cm_multi+=cm1
    cm_fmask+=cm2
    
print("Processed %d files correctly"%i)    

1/2661 reproducibility_results/LC80290372013257LGN00_010_003_percentile 0.9719 0.9724
2/2661 reproducibility_results/LC80980762014216LGN00_003_010_percentile 1.0000 1.0000
3/2661 reproducibility_results/LC81320352013243LGN00_002_007_percentile 0.9433 0.9602
4/2661 reproducibility_results/LC80290372013257LGN00_011_012_percentile 0.9438 0.9090
5/2661 reproducibility_results/LC81360302014162LGN00_001_008_percentile 1.0000 1.0000
6/2661 reproducibility_results/LC81750732014035LGN00_010_004_percentile 0.9554 0.8834
7/2661 reproducibility_results/LC81180382014244LGN00_004_009_percentile 1.0000 1.0000
8/2661 reproducibility_results/LC81820302014180LGN00_005_012_percentile 1.0000 0.9998
9/2661 reproducibility_results/LC81590362014051LGN00_005_004_percentile 0.9954 1.0000
10/2661 reproducibility_results/LC81020802014100LGN00_003_008_percentile 0.9059 0.8406
11/2661 reproducibility_results/LC81990402014267LGN00_012_010_percentile 0.9994 0.9919
12/2661 reproducibility_results/LC81020802014100LGN0

In [4]:
accuracy_score_confusion(cm_multi),accuracy_score_confusion(cm_fmask)

(0.940353883684437, 0.8820711931375219)

In [5]:
commission_error(cm_multi),commission_error(cm_fmask)

(0.05995950778795814, 0.16584511672426025)

In [6]:
omission_error(cm_multi),omission_error(cm_fmask)

(0.05904677925427456, 0.026292553581977022)

In [7]:
np.sum(cm_multi)

663442793.0