## Modelamiento de cambios estructurales para detección de cambios en bosques y matorrales de Chile central usando series de tiempo de datos NDVI Landsat-5, -7, -8, y -9

### Este Notebook muestra el uso del modelo Continuous Change Detection and Classification (CCDC)

- **Interfáz**: Googe Earth Engine (GEE)
- **Lenguaje**: Python
- **Última actualización**: Julio 2023
- **Autor**: Ignacio fuentes San Roman \ ignacio.fuentes.sanroman@gmail.com \ Universidad de las Americas (UDLA)

In [None]:
# Import, authenticate and initialize the Earth Engine library.
import ee
ee.Authenticate()#auth_mode='paste'
ee.Initialize(project='intrepid-charge-305912')

# import ee
# ee.Authenticate()
# ee.Initialize(project='my-project')

In [None]:
# Folium setup.
import folium
import sys
import math
import re
import numpy as np
import pandas as pd
import time
from multiprocessing import Pool
import itertools
import datetime
import matplotlib.pyplot as plt
from scipy import interpolate, stats
import plotly.express as px
from collections import Counter
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


print(folium.__version__)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
fig, ax = plt.subplots()
ax.plot(np.linspace(1,100), np.linspace(1,100), c='red', ls='-', label='npphen')
ax.plot(np.linspace(1,100), np.linspace(1,100), c='orange', ls='--', label='PELT')
ax.plot(np.linspace(1,100), np.linspace(1,100), c='k', ls=':', label='CCDC')
ax.plot(np.linspace(1,100), np.linspace(1,100), c='green', ls='--', label='complaints')
ax.scatter(np.linspace(1,100), np.linspace(1,100), label='NDVI')
# fig.savefig('/content/drive/MyDrive/test.png', dpi=300)

figsize = (3, 3)
fig_leg = plt.figure(figsize=figsize)
ax_leg = fig_leg.add_subplot(111)
# add the legend from the previous axes
ax_leg.legend(*ax.get_legend_handles_labels(), loc='center')
# hide the axes frame and the x/y labels
ax_leg.axis('off')
fig_leg.savefig('/content/drive/MyDrive/legend.png', dpi=300)


In [None]:
def maskl(img):
    qua = ee.Image(img).select('QA_PIXEL')
    props = img.propertyNames()
    dilated = qua.bitwiseAnd(2).eq(0)
    cirrus = qua.bitwiseAnd(4).eq(0)
    clouds = qua.bitwiseAnd(8).eq(0)
    shadows = qua.bitwiseAnd(16).eq(0)
    snow = qua.bitwiseAnd(32).eq(0)
    return img.updateMask(dilated).updateMask(cirrus).updateMask(clouds).updateMask(shadows).updateMask(snow).multiply(0.0000275).add(-0.2).copyProperties(img, props)


def ndvil8(img):
    props = img.propertyNames()
    ix = img.normalizedDifference(['SR_B5', 'SR_B4'])
    return img.addBands(ix.rename('ndvi')).copyProperties(img, props)


def ndvil57(img):
    props = img.propertyNames()
    ix = img.normalizedDifference(['SR_B4', 'SR_B3'])
    return img.addBands(ix.rename('ndvi')).copyProperties(img, props)


def get_date(x):
    return ee.Date(x).format('YYYY-MM-dd')


def mosaicking(collection):
    def inner(date):
        coll = collection.select('ndvi').filterDate(ee.Date(date), ee.Date(date).advance(1, 'day'))
        img = ee.Image(coll.first())
        props = img.propertyNames()
        return ee.Image(-99.).where(coll.mosaic().gte(-1), coll.mosaic())
    return inner


def getting_mean(collection):
    def inner(date):
        coll = collection.filterDate(ee.Date(date), ee.Date(date).advance(1, 'day'))
        img = coll.first()
        # props = img.propertyNames()
        # return img.copyProperties(coll.first(), props)
        return img
    return inner


def set_id(fea):
    return fea.set('id', fea.id())


def to_year(img):
    year2000 = 730485
    year = img.subtract(year2000).divide(365)
    return ee.Image(2000).add(ee.Image(year))


def to_day(img):
    year = img.subtract(2016).multiply(365)
    return year


def resample(target):
    def inner(img):
        return img.reproject(ee.Image(target).select('ndvi').projection())
    return inner


def sample(geo):
    def inner(img):
        date = ee.Date(img.get('system:time_start')).format('YYYY-MM-dd')
        ndvi = img.reduceRegion('mean', geo.geometry(), scale=30).values().get(0)
        return ee.Feature(None, {'date':date, 'ndvi':ndvi})
    return inner


def sampleS2(geo):
    def inner(img):
        date = ee.Date(img.get('system:time_start')).format('YYYY-MM-dd')
        ndvi = img.reduceRegion('mean', geo.geometry(), scale=10).values().get(0)
        return ee.Feature(None, {'date':date, 'ndvi':ndvi})
    return inner

In [None]:
def getS2_CLOUD_PROBABILITY(geo):
    innerJoined = ee.Join.inner().apply(primary=ee.ImageCollection("COPERNICUS/S2_HARMONIZED").filterBounds(geo).filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20)),
                                        secondary=ee.ImageCollection("COPERNICUS/S2_CLOUD_PROBABILITY").filterBounds(geo),
                                        condition=ee.Filter.equals(leftField='system:index',
                                                                   rightField='system:index'))
    def mergeImageBands(joinResult):
        return ee.Image(joinResult.get('primary')).addBands(joinResult.get('secondary'))

    newCollection = innerJoined.map(mergeImageBands)
    return ee.ImageCollection(newCollection)


def projectShadows(cloudMask, sunAzimuth, offset):
    azimuth = ee.Number(sunAzimuth).multiply(np.pi).divide(180.0).add(ee.Number(0.5).multiply(np.pi))
    x = azimuth.cos().multiply(15.0).round();
    y = azimuth.sin().multiply(15.0).round();
    shadow = cloudMask.changeProj(cloudMask.projection(), cloudMask.projection().translate(x.multiply(ee.Number(offset)), y.multiply(ee.Number(offset))))
    return shadow


def scale(img):
    props = img.propertyNames()
    bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12']
    ob = ['B1', 'B9', 'B10', 'probability']
    scaled = img.select(bands).divide(10000)
    return scaled.addBands(img.select(ob)).copyProperties(img, props)


def masking(img):
    props = img.propertyNames()
    img_scaled = ee.Image(scale(img))
    clouds = img_scaled.select('probability').gte(30)
    shadows = projectShadows(clouds,
                             img_scaled.get('MEAN_SOLAR_AZIMUTH_ANGLE'),
                             img.get('MEAN_INCIDENCE_ZENITH_ANGLE_B10'))
    shadow_distance = shadows.fastDistanceTransform().sqrt()

    shadow_dilation = shadow_distance.lt(5)
    mask = clouds.Not().multiply(shadow_dilation.Not())
    return img_scaled.addBands(img_scaled.normalizedDifference(['B8', 'B4']).rename('ndvi')).updateMask(mask).copyProperties(img_scaled, props)


def getNDVI(img):
    props = img.propertyNames()
    return img.normalizedDifference(['B8', 'B4']).copyProperties(img, props)


def set_date(date):
    def inner(i):
        return i.set('date', date)
    return inner


def set_date2(img):
    return img.set('date', ee.Date(img.get('system:time_start')).format('YYYY-MM-dd'))


# **Landsat**

## Polygons for evaluation

In [None]:
forested = ee.FeatureCollection('users/ignaciofuentessanroman/IWF_forests').map(set_id)
# forested = ee.FeatureCollection('users/ignaciofuentessanroman/forests_non').map(set_id)
deforested = ee.FeatureCollection('users/ignaciofuentessanroman/IWF_deforestation').map(set_id)

ids_forested = forested.aggregate_array('id').distinct().getInfo()
ids_deforested = deforested.aggregate_array('id').distinct().getInfo()

aoi = ee.FeatureCollection('users/ignisfausto/regions5_7')

# coll = l8.filterBounds(denuncias_subset).map(mask).map(ndvi)
# dates = coll.aggregate_array('system:time_start').map(get_date).distinct()

In [None]:
reference = ee.FeatureCollection('users/ignaciofuentessanroman/PolValConsolidadosBosquePlantacion')
reference_id = reference.aggregate_array('ID').getInfo()

## CCDC segmentation outputs

In [None]:
# TStart = ee.ImageCollection([ee.Image('users/ignisfausto/ccdcTStart1'),
#                              ee.Image('users/ignisfausto/ccdcStart2')]).mosaic()
# TEnd = ee.ImageCollection([ee.Image('users/ignisfausto/ccdcEnd1'),
#                            ee.Image('users/ignisfausto/ccdcEnd2')]).mosaic()

TStart = ee.ImageCollection([ee.Image('users/ignisfausto/ccdcTStart7_1'),
                             ee.Image('users/ignisfausto/ccdcTStart6_1'),
                             ee.Image('users/ignisfausto/ccdcTStart5_1'),
                             ee.Image('users/ignisfausto/ccdcTStart13_1')]).mosaic()
LC = ee.Image('users/ignaciofuentessanroman/LC_CHILE_2014_b')


In [None]:
####
mag1 = ee.Image('users/ignisfausto/ccdc_magnitude_001083_095')
mag2 = ee.Image('users/ignisfausto/ccdc_magnitude_001084_095')
mag3 = ee.Image('users/ignisfausto/ccdc_magnitude_001085_095')
mag4 = ee.Image('users/ignisfausto/ccdc_magnitude_233083_095')
mag5 = ee.Image('users/ignisfausto/ccdc_magnitude_233084_095')
mag6 = ee.Image('users/ignisfausto/ccdc_magnitude_233085_095')
bks1 = ee.Image('users/ignisfausto/ccdc_tStart_001083_095')
bks2 = ee.Image('users/ignisfausto/ccdc_tStart_001084_095')
bks3 = ee.Image('users/ignisfausto/ccdc_tStart_001085_095')
bks4 = ee.Image('users/ignisfausto/ccdc_tStart_233083_095')
bks5 = ee.Image('users/ignisfausto/ccdc_tStart_233084_095')
bks6 = ee.Image('users/ignisfausto/ccdc_tStart_233085_095')
magMosaic = ee.ImageCollection([mag1, mag2, mag3, mag4, mag5, mag6]).mosaic()
bksMosaic = ee.ImageCollection([bks1, bks2, bks3, bks4, bks5, bks6]).mosaic()

In [None]:
####
bksMosaic = to_year(bksMosaic)
mask = bksMosaic.gte(2016)
bksMosaic = bksMosaic.updateMask(mask)

mask2 = magMosaic.lt(0)
mask2 = ee.Image(0).addBands(mask2.select([0,1,2,3]))

bksMosaic = bksMosaic.updateMask(mask).updateMask(mask2)
bksMosaic = bksMosaic.select([1,2,3,4])
magMosaic = magMosaic.updateMask(mask.select([1,2,3,4]).addBands(ee.Image(0))).updateMask(mask2.select([1,2,3,4]).addBands(ee.Image(0)))
bks_min = bksMosaic.reduce('min')

bksMosaic2 = bksMosaic.unmask()
mask3 = bksMosaic2.eq(bks_min.addBands(bks_min).addBands(bks_min).addBands(bks_min))
bksMosaic2 = bksMosaic2.where(bksMosaic2.eq(0), bks_min)
# bksMosaic2 = bksMosaic2.updateMask(bksMosaic2.neq(0))

magMosaic2 = magMosaic.updateMask(mask3.addBands(0))

In [None]:
length = TStart.reduce('count').toInt()
last = to_year(TStart.reduce('max'))#.focal_mode(3)

## Mapping breaks legth and last break

Zoom out to see changes and common spatial patterns in break periods.
In last breaks the palette is set from red to blue, being red dates lower and equal to 2016-01-01 while blue are dates equal to 2022-05-01.

In [None]:
fea = reference.filter(ee.Filter.eq('ID', 370)).first()

geo = ee.Feature(fea)
id = geo.getMapId()
length_id = length.getMapId({'min': 1, 'max': 5, 'palette': '0000FF, 00FFFF, 00FF00, FFFF00, FF0000'})
years_id = last.getMapId({'min': 2016, 'max': 2022.5, 'palette': 'FF0000, FFFF00, 00FF00, 00FFFF, 0000FF'})
defo_polys = reference.getMapId()
centroid = geo.centroid().getInfo()['geometry']['coordinates'][::-1]
map = folium.Map(location=centroid, zoom_start=16)
folium.TileLayer(
    tiles=length_id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='length_breaks',
  ).add_to(map)
folium.TileLayer(
    tiles=years_id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='last_break',
  ).add_to(map)
folium.TileLayer(
    tiles=id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='feature',
  ).add_to(map)
# folium.TileLayer(
#     tiles=defo_polys['tile_fetcher'].url_format,
#     attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
#     overlay=True,
#     name='defo_polys',
#   ).add_to(map)
map.add_child(folium.LayerControl())
map

## Checking length of breaks of non disturbed polygons (n-1 breaks)

In [None]:
ids, deforested0 = [], []
for n in ids_forested[:]:
    test = forested.filter(ee.Filter.eq('id', n)).first()
    val = last.reduceRegion(ee.Reducer.frequencyHistogram(), test.geometry(), 30).values().get(0)
    ids.append(n), deforested0.append(val.getInfo())

In [None]:
data = [(n['2000.0082191780823'], np.sum(list(n.values())) - n['2000.0082191780823']) if '2000.0082191780823' in n.keys() else (0, np.sum(list(n.values()))) for n in deforested0]
np.sum([1 for n in data if n[1] > n[0]])

In [None]:
low = [np.sum([n[m] for m in n.keys() if float(m) <= 2016]) for n in deforested0]
high = [np.sum([n[m] for m in n.keys() if float(m) > 2016]) for n in deforested0]
np.sum([int(low[i] > high[i]) for i, n in enumerate(range(100))])

In [None]:
ids, deforested1 = [], []
for n in ids_forested[:]:
    test = deforested.filter(ee.Filter.eq('id', n)).first()
    val = last.reduceRegion(ee.Reducer.frequencyHistogram(), test.geometry(), 30).values().get(0)
    ids.append(n), deforested1.append(val.getInfo())

In [None]:
low = [np.sum([n[m] for m in n.keys() if float(m) <= 2016]) for n in deforested1]
high = [np.sum([n[m] for m in n.keys() if float(m) > 2016]) for n in deforested1]
np.sum([int(low[i] < high[i]) for i, n in enumerate(range(100))])

In [None]:
[int(low[i] < high[i]) for i, n in enumerate(range(100))]


## counting polygons with breaks (n-1)

In [None]:
(Counter(deforested0).keys(), Counter(deforested0).values())

This means that 68 non-disturbed polygons don't present any structural break, while 32 do. Let's evaluate an histogram of the last dates in these structural breaks (the first bin, i.e. around 2000, implies no structural breaks, no change)

In [None]:
ids, deforested0 = [], []
for n in ids_forested[:]:
    test = forested.filter(ee.Filter.eq('id', n)).first()
    val = last.reduceRegion('mode', test.geometry(), 30).values().get(0)
    ids.append(n), deforested0.append(val.getInfo())

In [None]:
deforested0

In [None]:
deforested0_bins = [np.nan if n is None else n for n in deforested0]
plt.hist(deforested0_bins, 20)
plt.show()

In [None]:
bol = [1,1,1,1,1,1,1,1,1,1,
       1,1,1,1,1,1,1,1,1,1,
       1,1,1,1,1,1,1,1,1,1,
       1,1,1,1,1,1,1,1,1,1,
       1,1,1,1,1,1,1,1,1,0,
       1,0,1,0,1,1,1,0,1,0,
       0,0,1,1,0,0,1,0,1,1,1,
       0,0,0,0,1,0,0,0,0,0,0,0,0,0,
       1,1,1,1,1,1,0,0,0,0,1,1,1,1,1]
len([n for n in bol if n==0])

In [None]:
vals = [n if n > 2016.1 else None for n in deforested0_bins]
([bol[i] for i, n in enumerate(vals) if n != None])

In [None]:
np.sum(bol) - 18

A lot of these polygons present breaks before to the year 2016; therefore we need to count those polygons that present breaks after 2016.

In [None]:
np.sum([1 if n > 2016 else 0 for n in deforested0_bins])

this means that 19 polygons present breaks even thought they shouldn't based on our reference data.

## Evaluation on a single polygon
Let's bring the entire landsat collection and sample our polygon to get the mean ndvi for each scene

In [None]:
l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')

landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-05-01')

In [None]:
paths = landsat.aggregate_array('WRS_ROW').distinct().getInfo()

In [None]:
landsat = landsat.filter(ee.Filter.eq('WRS_ROW', paths[0]))

In [None]:
data = landsat.map(sample(fea)).getInfo()

In [None]:
dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
plt.scatter(dates, ndvi)
plt.xlim(pd.to_datetime('2000-01-01'), pd.to_datetime('2022-05-01'))
plt.grid()

In [None]:
l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')

landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
landsat = landsat.filter(ee.Filter.eq('WRS_PATH', 1)) #
# landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 233)).map(resample(landsat1.first()))

# landsat_again = landsat1#.merge(landsat2)
data = landsat.map(sample(fea)).getInfo()
dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
dectionary = to_year(TStart).reduceRegion('histogram', fea.geometry(), scale=30).getInfo()
bm = [dectionary[n]['bucketMeans'] for n in dectionary if dectionary[n] is not None]
bh = [np.argmax(dectionary[n]['histogram']) for n in dectionary if dectionary[n] is not None]
common = [bm[i][n] for i, n in enumerate(bh)]
# common = [common[0], np.mean(common[1:])] # comment
# df = pd.DataFrame(data={'year':dectionary['constant']['bucketMeans'], 'count':dectionary['constant']['histogram']})
# df = df[df['year'] > 2016]
fig = plt.figure(figsize=(10,4))
plt.scatter(dates, ndvi)

if len(common) > 0:
    # date = df[df['count'] > 0].mean()['year']
    floors = [math.floor(n) for n in common]
    decimals = [n - floors[i] for i,n in enumerate(common)]
    days = [365 * n for n in decimals]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i,n in enumerate(floors)]
    for i, n in enumerate(new_dates):
        if i != 0:
            print(n)
            plt.axvline(n, color='k', ls='--')
plt.show()

In [None]:
starts = last.sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()

In [None]:
dectionary = last.reduceRegion('histogram', fea.geometry(), scale=30).getInfo()

### What's the distribution of last structural breaks?

In [None]:
plt.bar(dectionary['constant']['bucketMeans'], dectionary['constant']['histogram'], width=0.2)
plt.ylabel('Frequency')
plt.show()

### Let's plot the time series and the mean date for the last break

In [None]:
df = pd.DataFrame(data={'year':dectionary['constant']['bucketMeans'], 'count':dectionary['constant']['histogram']})
df = df[df['year'] > 2000.5]
date = df[df['count'] > 0].mean()['year']
floor = math.floor(date)
decimal = date - floor
days = 365 * decimal
new_date = datetime.date(floor, 1, 1) + datetime.timedelta(days=days)

plt.scatter(dates, ndvi)
plt.xlim(pd.to_datetime('2000-01-01'), pd.to_datetime('2022-05-01'))
plt.axvline(new_date, color='k')
plt.grid()
plt.show()

## Checking breaks on disturbed polygons

In [None]:
ids2, deforested1 = [], []
for n in ids_deforested[:]:
    test = deforested.filter(ee.Filter.eq('id', n)).first()
    val = last.reduceRegion('mode', test.geometry(), 30).values().get(0) #length
    ids2.append(n), deforested1.append(val.getInfo())

In [None]:
ids, deforested0 = [], []
for n in ids_forested[:]:
    test = forested.filter(ee.Filter.eq('id', n)).first()
    val = last.reduceRegion('mode', test.geometry(), 30).values().get(0)
    ids.append(n), deforested0.append(val.getInfo())

In [None]:
deforested1_bin = [round(n) if n else np.nan for n in deforested1]

In [None]:
(Counter(deforested1_bin).keys(), Counter(deforested1_bin).values())

So, 82 polygons present one or more breaks, and 18 do not present any break or fall in masked data (classified as open shrublands, praeries, ...), but let's evaluate the dates of the last breaks

In [None]:
bins2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,1,0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,1,1,1,0,1,0,0,0,1,1,1,1,1,1,0,0,0,0,0,1,0,0,0,0,1,1,1,0,0,0,1,1,1,1,0,0,0,1,1,1,1,1,1,1,1,0,0,0]

In [None]:
np.sum(bins2)

In [None]:
deforested1

In [None]:
ids2, deforested1_year = [], []
for n in ids_deforested[:]:
    test = deforested.filter(ee.Filter.eq('id', n)).first()
    val = last.reduceRegion('mode', test.geometry(), 30).values().get(0)
    ids2.append(n), deforested1_year.append(val.getInfo())


In [None]:
len([n for n in deforested1_year if n > 2016])

In [None]:
deforested1_year_bin = [np.nan if n is None else n for n in deforested1_year]
deforested1_year_bin = [1 if n > 2016 else 0 for n in deforested1_year_bin]

In [None]:
deforested1_year_bin

In [None]:
[bins2[i] for i, n in enumerate(deforested1_year_bin) if n == 0]

In [None]:
np.sum(deforested1_year_bin)

In [None]:
(Counter(deforested1_year_bin).keys(), Counter(deforested1_year_bin).values())

Now, 78 polygons present structural breaks after 2016, let's plot a histogram

In [None]:
plt.hist([np.nan if n is None else n for n in deforested1_year], 20)
plt.show()

## Some general metrics

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


pred = np.hstack([np.repeat(0, 81), np.repeat(1, 19), np.repeat(1, 79), np.repeat(0, 21)])
obs = np.hstack([np.repeat(0, 100), np.repeat(1, 100)])

In [None]:
print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
ids_deforested[12:13]

In [None]:
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
for n in ids[:]:
    print(n)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(deforested.filter(ee.Filter.eq('id', n)).first().geometry())
    paths = l8.aggregate_histogram('WRS_PATH').getInfo()
    path = [i for i in paths if paths[i]==sorted(paths.values())[-1]][0]
    l8 = l8.filter(ee.Filter.eq('WRS_PATH', float(path))).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil8)
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filter(ee.Filter.eq('WRS_PATH', float(path))).filterBounds(deforested.filter(ee.Filter.eq('id', n)).first().geometry()).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filter(ee.Filter.eq('WRS_PATH', float(path))).filterBounds(deforested.filter(ee.Filter.eq('id', n)).first().geometry()).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil57)
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filter(ee.Filter.eq('WRS_PATH', float(path))).filterBounds(deforested.filter(ee.Filter.eq('id', n)).first().geometry()).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil57)

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start')
    # landsat = landsat.filter(ee.Filter.eq('WRS_PATH', 1)) #
    # landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 233)).map(resample(landsat1.first()))

    # landsat_again = landsat1#.merge(landsat2)
    data = landsat.map(sample(deforested.filter(ee.Filter.eq('id', n)).first())).getInfo()
    dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
    dectionary = bksMosaic2.reduceRegion('histogram', deforested.filter(ee.Filter.eq('id', n)).first().geometry(), scale=30).getInfo()
    bm = [dectionary[n]['bucketMeans'] for n in dectionary if dectionary[n] is not None]
    bh = [np.argmax(dectionary[n]['histogram']) for n in dectionary if dectionary[n] is not None]
    common = [bm[i][n] for i, n in enumerate(bh)]
    # common = [common[0], np.mean(common[1:])] # comment
    # df = pd.DataFrame(data={'year':dectionary['constant']['bucketMeans'], 'count':dectionary['constant']['histogram']})
    # df = df[df['year'] > 2016]
    fig = plt.figure(figsize=(10,4))
    plt.scatter(dates, ndvi)

    if len(common) > 0:
        # date = df[df['count'] > 0].mean()['year']
        floors = [math.floor(n) for n in common]
        decimals = [n - floors[i] for i,n in enumerate(common)]
        days = [365 * n for n in decimals]
        new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i,n in enumerate(floors)]
        for i, n in enumerate(new_dates):
            if i != 0:
                print(n)
                plt.axvline(n, color='k', ls='--')
    plt.show()

In [None]:
fea = reference.filter(ee.Filter.eq('ID', 370)).first()

geo = ee.Feature(fea)
id = geo.getMapId()
# length_id = length.getMapId({'min': 1, 'max': 5, 'palette': '0000FF, 00FFFF, 00FF00, FFFF00, FF0000'})
years_id = bksMosaic2.select(0).getMapId({'min': 2000, 'max': 2022.5, 'palette': 'FF0000, FFFF00, 00FF00, 00FFFF, 0000FF'})
defo_polys = reference.getMapId()
centroid = geo.centroid().getInfo()['geometry']['coordinates'][::-1]
map = folium.Map(location=centroid, zoom_start=16)
# folium.TileLayer(
#     tiles=length_id['tile_fetcher'].url_format,
#     attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
#     overlay=True,
#     name='length_breaks',
#   ).add_to(map)
folium.TileLayer(
    tiles=years_id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='last_break',
  ).add_to(map)
folium.TileLayer(
    tiles=id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='feature',
  ).add_to(map)
# folium.TileLayer(
#     tiles=defo_polys['tile_fetcher'].url_format,
#     attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
#     overlay=True,
#     name='defo_polys',
#   ).add_to(map)
map.add_child(folium.LayerControl())
map

In [None]:
bksMosaic = bksMosaic.updateMask(bksMosaic.gt(0))

In [None]:
[bm[i][n] for i, n in enumerate(bh)]

In [None]:
bh

In [None]:
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
for ñ, n in enumerate(ids[:]):
    print(n)
    if ñ == 4:
        fea = reference.filter(ee.Filter.eq('ID', 370)).first().geometry()
    else:
        fea = forested.filter(ee.Filter.eq('id', n)).first().geometry()
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea)
    paths = l8.aggregate_histogram('WRS_PATH').getInfo()
    path = [i for i in paths if paths[i]==sorted(paths.values())[-1]][0]


    if ñ < 6:
        l8 = l8.filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil8)
        l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil8)
        l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil57)
        l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil57)
    else:
        l8 = l8.filter(ee.Filter.eq('WRS_PATH', float(path))).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil8)
        l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filter(ee.Filter.eq('WRS_PATH', float(path))).filterBounds(fea).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil8)
        l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filter(ee.Filter.eq('WRS_PATH', float(path))).filterBounds(fea).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil57)
        l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filter(ee.Filter.eq('WRS_PATH', float(path))).filterBounds(fea).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil57)

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start')

    # landsat_again = landsat1#.merge(landsat2)
    data = landsat.map(sample(forested.filter(ee.Filter.eq('id', n)).first())).getInfo()
    dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]

    dectionary = bksMosaic2.reduceRegion('histogram', forested.filter(ee.Filter.eq('id', n)).first().geometry(), scale=30).getInfo()
    bm = [dectionary[n]['bucketMeans'] for n in dectionary if dectionary[n] is not None]
    bh = [np.argmax(dectionary[n]['histogram']) for n in dectionary if dectionary[n] is not None]
    common = [bm[i][n] for i, n in enumerate(bh)]

    fig = plt.figure(figsize=(10,4))
    plt.scatter(dates, ndvi)

    if len(common) > 0:
        # date = df[df['count'] > 0].mean()['year']
        floors = [math.floor(n) for n in common]
        decimals = [n - floors[i] for i,n in enumerate(common)]
        days = [365 * n for n in decimals]
        new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i,n in enumerate(floors)]
        for i, n in enumerate(new_dates):
            if i != 0:
                print(n)
                plt.axvline(n, color='k', ls='--')
    plt.show()


    # dectionary = bksMosaic2.reduceRegion('histogram', forested.filter(ee.Filter.eq('id', n)).first().geometry(), scale=30).getInfo()
    # bm = [dectionary[n]['bucketMeans'] for n in dectionary if dectionary[n] is not None]
    # bh = [np.argmax(dectionary[n]['histogram']) for n in dectionary if dectionary[n] is not None]
    # common = [bm[i][n] for i, n in enumerate(bh)]
    # # common = [common[0], np.mean(common[1:])]
    # # df = pd.DataFrame(data={'year':dectionary['constant']['bucketMeans'], 'count':dectionary['constant']['histogram']})
    # # df = df[df['year'] > 2016]
    # fig = plt.figure(figsize=(10,4))
    # plt.scatter(dates, ndvi)

    # if len(common) > 0:
    #     # date = df[df['count'] > 0].mean()['year']
    #     floors = [math.floor(n) for n in common]
    #     decimals = [n - floors[i] for i,n in enumerate(common)]
    #     days = [365 * n for n in decimals]
    #     new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i,n in enumerate(floors)]
    #     for i, n in enumerate(new_dates):
    #         if i != 0:
    #             print(n)
    #             plt.axvline(n, color='k', ls='--')
    # plt.show()

In [None]:
data = bksMosaic2.select(0).unmask(0).sample(forested.filter(ee.Filter.eq('id', '00000000000000000001')).first().geometry(), scale=30).getInfo()


In [None]:
[n['properties']['tStart_1'] for n in data['features']]

In [None]:
fea = reference.filter(ee.Filter.eq('ID', 370)).first().geometry()
l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea)
l8 = l8.filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil8)
l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil8)
l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil57)
l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea).filterDate('2000-01-01', '2022-06-01').map(maskl).map(ndvil57)
landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start')

data = landsat.map(sample(reference.filter(ee.Filter.eq('ID', 370)).first())).getInfo()
dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]

dectionary = bksMosaic2.selfMask().reduceRegion('histogram', fea, scale=30).getInfo()
bm = [dectionary[n]['bucketMeans'] for n in dectionary if dectionary[n] is not None]
bh = [np.argmax(dectionary[n]['histogram']) for n in dectionary if dectionary[n] is not None]
common = [bm[i][n] for i, n in enumerate(bh)]

fig = plt.figure(figsize=(10,4))
plt.scatter(dates, ndvi)

if len(common) > 0:
    # date = df[df['count'] > 0].mean()['year']
    floors = [math.floor(n) for n in common]
    decimals = [n - floors[i] for i,n in enumerate(common)]
    days = [365 * n for n in decimals]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i,n in enumerate(floors)]
    for i, n in enumerate(new_dates):
        if i != 0:
            print(n)
            plt.axvline(n, color='k', ls='--')
plt.show()

In [None]:
common

In [None]:

for n in ids_deforested[10:11]:
    print(n)
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(deforested.filter(ee.Filter.eq('id', n)).first().geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(deforested.filter(ee.Filter.eq('id', n)).first().geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(deforested.filter(ee.Filter.eq('id', n)).first().geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(deforested.filter(ee.Filter.eq('id', n)).first().geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
    # landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)) #
    # landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 233)).map(resample(landsat1.first()))

    # landsat_again = landsat1#.merge(landsat2)
    data = landsat.map(sample(deforested.filter(ee.Filter.eq('id', n)).first())).getInfo()
    dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
    dectionary = to_year(ee.Image('users/ignisfausto/ccdcTStart13_1')).reduceRegion('histogram', deforested.filter(ee.Filter.eq('id', n)).first().geometry(), scale=30).getInfo()
    bm = [dectionary[n]['bucketMeans'] for n in dectionary if dectionary[n] is not None]
    bh = [np.argmax(dectionary[n]['histogram']) for n in dectionary if dectionary[n] is not None]
    common = [bm[i][n] for i, n in enumerate(bh)]
    common = [common[0], np.mean(common[1:])]
    # df = pd.DataFrame(data={'year':dectionary['constant']['bucketMeans'], 'count':dectionary['constant']['histogram']})
    # df = df[df['year'] > 2016]
    fig = plt.figure(figsize=(10,4))
    plt.scatter(dates, ndvi)

    if len(common) > 0:
        # date = df[df['count'] > 0].mean()['year']
        floors = [math.floor(n) for n in common]
        decimals = [n - floors[i] for i,n in enumerate(common)]
        days = [365 * n for n in decimals]
        new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i,n in enumerate(floors)]
        for i, n in enumerate(new_dates):
            if i != 0:
                plt.axvline(n, color='k', ls='--')

    plt.xlim(pd.to_datetime('2000-01-01'), pd.to_datetime('2022-05-01'))
    plt.ylabel('NDVI', fontsize=16)
    plt.tick_params(axis='both', labelsize=14)
    plt.grid()
    fig.savefig('/content/drive/MyDrive/ccdc_defo_series000000a.png', dpi=300)
    plt.show()

In [None]:
common[1:]

In [None]:
bm

In [None]:
[type(dectionary[n]) for n in dectionary]

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
df

In [None]:
sample

In [None]:
def num2date(values):
    floors = [math.floor(n) for n in values]
    decimal = [n - floors[i] for i, n in enumerate(values)]
    days = [365 * n for n in decimal]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i, n in enumerate(floors)]
    return new_dates

In [None]:
labs = ['A', 'B', 'C', 'D', 'E']
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i == 0:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart6_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array < 2001] = np.nan

    values = np.sort(np.unique(array))[:-1]
    if len(values) > 1:
        values = np.linspace(np.min(values), np.max(values), 4)
    else:
        values = np.linspace(2000.0081787109375, 2022.5, 4)

    floors = [math.floor(n) for n in values]
    decimal = [n - floors[i] for i, n in enumerate(values)]
    days = [365 * n for n in decimal]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i, n in enumerate(floors)]

    coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
    longs = coords[:, 0]
    lats = coords[:, 1]


    f1 = ax[i].imshow(array, vmin=np.nanmin(values), vmax=np.nanmax(values), cmap='viridis')
    cb = fig.colorbar(f1, ticks=values, ax=ax[i])
    cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in new_dates])
    cb.ax.tick_params(labelsize=10)
    ax[i].set_xticks([0, 10, 20, 30])
    ax[i].set_xticklabels(labels=np.round(np.linspace(np.min(longs), np.max(longs), 4), 4))
    ax[i].set_yticks(np.linspace(0, 25, 4))
    ax[i].set_yticklabels(labels=np.round(np.linspace(np.max(lats), np.min(lats), 4), 4))
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].tick_params(axis='x', rotation=30)
    if (i == 2) | (i == 3):
        ax[i].text(-23, 0, labs[i], fontsize=12, weight='bold')
    else:
        ax[i].text(-25, 0, labs[i], fontsize=12, weight='bold')
    # plt.show()
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/ccdc_undbreaks.png', dpi=300)
# fig.tight_layout()

In [None]:
!pip install rasterio
import rasterio as rio

In [None]:
labs = ['A', 'B', 'C', 'D', 'E']
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
for i, n in enumerate(ids[:]):
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i == 0:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart6_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array <= 2016] = np.nan
    meta_out = rio.open('/content/drive/MyDrive/testing{}landsat.tif'.format(labs[i])).meta
    # days = (days - 5844.)/365 + 2016
    with rio.open('/content/drive/MyDrive/{}_{}.tif'.format('defoCCDC', '{}landsat'.format(labs[i])), "w", **meta_out) as dest:
        dest.write(array.reshape(1, array.shape[0], array.shape[1]))


In [None]:
import matplotlib.dates as mdates
from datetime import date, timedelta

ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i == 0:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart6_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array < 2001] = np.nan

    values = array.ravel()
    values = values[~np.isnan(values)]
    # if len(values) > 1:
    #     values = np.linspace(np.min(values), np.max(values), 4)
    # else:
    #     values = np.linspace(2000.0081787109375, 2022.5, 4)

    floors = [math.floor(n) for n in values]
    decimal = [n - floors[i] for i, n in enumerate(values)]
    days = [365 * n for n in decimal]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i, n in enumerate(floors)]
    new_dates = [n for n in new_dates if n > pd.to_datetime('2016-01-01')]
    if (i == 1) | (i == 4):
        ax[i].bar(pd.date_range('2016-01-01', '2022-01-01', 12), np.repeat(0, 12))
        ax[i].set_ylim(0, 50)
        ax[i].set_ylabel('Frequency', fontsize=11)
        # ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
        ax[i].tick_params(axis='both', labelsize=10)
        ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
        ax[i].tick_params(axis='x', rotation=30)
    elif i == 3:
        ax[i].bar(pd.date_range('2019-03-19', '2021-03, 19', 3), [0,1,0], width=30)
        start, end = ax[i].get_xlim()
        ax[i].set_ylim(0, 1.2)
        ax[i].set_ylabel('Frequency', fontsize=11)
        ax[i].tick_params(axis='both', labelsize=10)
        ax[i].set_xticks(pd.date_range(date(1,1,1)+timedelta(days=start), date(1,1,1)+timedelta(days=end), 6))
        ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
        ax[i].tick_params(axis='x', rotation=30)
    else:


        ax[i].hist(new_dates, 30)
        start, end = ax[i].get_xlim()
        ax[i].set_ylabel('Frequency', fontsize=11)
        ax[i].tick_params(axis='both', labelsize=10)

        ax[i].set_xticks(pd.date_range(date(1,1,1)+timedelta(days=start), date(1,1,1)+timedelta(days=end), 6))
        ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
        ax[i].tick_params(axis='x', rotation=30)

    # if (i == 2) | (i == 3):
    #     ax[i].text(-23, 0, labs[i], fontsize=12, weight='bold')
    # else:
    #     ax[i].text(-25, 0, labs[i], fontsize=12, weight='bold')
    # plt.show()
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/ccdc_undhist.png', dpi=300)
# fig.tight_layout()

In [None]:
labs = ['F', 'G', 'H', 'I', 'J']
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i < 2:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart6_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array < 2001] = np.nan
    print(array.shape)

    values = np.sort(np.unique(array))[:-1]
    if len(values) > 1:
        values = np.linspace(np.min(values), np.max(values), 4)
    else:
        values = np.linspace(2000.0081787109375, 2022.5, 4)

    floors = [math.floor(n) for n in values]
    decimal = [n - floors[i] for i, n in enumerate(values)]
    days = [365 * n for n in decimal]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i, n in enumerate(floors)]

    coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
    longs = coords[:, 0]
    lats = coords[:, 1]


    f1 = ax[i].imshow(array, vmin=np.nanmin(values), vmax=np.nanmax(values), cmap='viridis')
    cb = fig.colorbar(f1, ticks=values, ax=ax[i])
    cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in new_dates])
    cb.ax.tick_params(labelsize=10)
    ax[i].set_xticks(np.linspace(0, array.shape[1], 4))
    ax[i].set_xticklabels(labels=np.round(np.linspace(np.min(longs), np.max(longs), 4), 4))
    ax[i].set_yticks(np.linspace(0, array.shape[0], 4))
    ax[i].set_yticklabels(labels=np.round(np.linspace(np.max(lats), np.min(lats), 4), 4))
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].tick_params(axis='x', rotation=30)
    if (i == 0):
        ax[i].text(-39, 0, labs[i], fontsize=12, weight='bold')
    elif (i == 1):
        ax[i].text(-29, 0, labs[i], fontsize=12, weight='bold')
    elif (i == 2):
        ax[i].text(-26, 0, labs[i], fontsize=12, weight='bold')
    elif (i == 4):
        ax[i].text(-21, 0, labs[i], fontsize=12, weight='bold')
    else:
        ax[i].text(-23, 0, labs[i], fontsize=12, weight='bold')
    # plt.show()
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/ccdc_defobreaks.png', dpi=300)
# fig.tight_layout()

In [None]:
labs = ['F', 'G', 'H', 'I', 'J']
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
for i, n in enumerate(ids[:]):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i < 2:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart6_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array <= 2016] = np.nan
    meta_out = rio.open('/content/drive/MyDrive/testing{}landsat.tif'.format(labs[i])).meta
    # days = (days - 5844.)/365 + 2016
    with rio.open('/content/drive/MyDrive/{}_{}.tif'.format('defoCCDC', '{}landsat'.format(labs[i])), "w", **meta_out) as dest:
        dest.write(array.reshape(1, array.shape[0], array.shape[1]))

In [None]:
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i < 2:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcTStart6_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array < 2001] = np.nan

    values = array.ravel()
    values = values[~np.isnan(values)]
    # if len(values) > 1:
    #     values = np.linspace(np.min(values), np.max(values), 4)
    # else:
    #     values = np.linspace(2000.0081787109375, 2022.5, 4)

    floors = [math.floor(n) for n in values]
    decimal = [n - floors[i] for i, n in enumerate(values)]
    days = [365 * n for n in decimal]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i, n in enumerate(floors)]
    new_dates = [n for n in new_dates if n > pd.to_datetime('2016-01-01')]
    ax[i].hist(new_dates, 30)
    start, end = ax[i].get_xlim()
    ax[i].set_ylabel('Frequency', fontsize=11)
    ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].tick_params(axis='x', rotation=30)
    ax[i].set_xticks(pd.date_range(datetime.date(1,1,1)+datetime.timedelta(days=start), datetime.date(1,1,1)+datetime.timedelta(days=end), 6))

    # if (i == 2) | (i == 3):
    #     ax[i].text(-23, 0, labs[i], fontsize=12, weight='bold')
    # else:
    #     ax[i].text(-25, 0, labs[i], fontsize=12, weight='bold')
    # plt.show()
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/ccdc_defohist.png', dpi=300)
# fig.tight_layout()

In [None]:
def dict2list(c, p):
    return ee.List(p).add(ee.Dictionary(keys.get(c)).keys())

changes = to_year(TStart)
changes = changes.updateMask(changes.gt(2016))
changes = changes.updateMask(LC.eq(212).Or(LC.eq(241).Or(LC.eq(251)).Or(LC.eq(410)).Or(LC.eq(420))))


def addBands(c, p):
    name = ee.String('B').cat(ee.String(c).replace('[.]', '_'))
    bds = changes.eq(ee.Number.parse(c)).selfMask().reduce('max')
    return ee.Image(p).addBands(bds.rename(name))


def b2ic(ic):
    def inner(k):
        name = ee.String('B').cat(ee.String(k).replace('[.]', '_'))
        years = ee.Number.parse(k).subtract(2016)
        date = ee.Date.fromYMD(2016, 1, 1).advance(years, 'year')
        img = ic.select(name).selfMask()
        return img.setMulti({'system:time_start': date.millis(), 'date':k})
    return inner


def prepocessIc(img):
    def wrap(i):
        return ee.Image(i).rename('ig')
    img = ee.Image(img)
    name = ee.String('B').cat(ee.String(img.get('date')).replace('[.]', '_'))
    img1 = img.multiply(ee.Number.parse(img.get('date')))
    img = img1.addBands(ee.ImageCollection.fromImages(ee.List(img.get('inner')).map(wrap)).reduce('max').rename(name.cat('_mx')))
    return img.copyProperties(img, img.propertyNames())


keys = changes.reduceRegion(reducer=ee.Reducer.frequencyHistogram(),
                            geometry=aoi,
                            scale=30,
                            maxPixels=1e13)
ks = keys.keys()
keys = ee.List(ee.List(ks).iterate(dict2list, ee.List([]))).flatten().distinct()
bands = ee.Image(keys.iterate(addBands, ee.Image())).slice(1)
ic = keys.map(b2ic(bands))

# twoDaysMillis = 20 * 24 * 60 * 60 * 1000;


# timeFilter = ee.Filter.Or(ee.Filter.maxDifference(difference=twoDaysMillis,
#                                                 leftField='system:time_start',
#                                                 rightField='system:time_start'),
#                         ee.Filter.maxDifference(difference=twoDaysMillis,
#                                                 leftField='system:time_start',
#                                                 rightField='system:time_start'))

twoDaysMillis = 30 * 24 * 60 * 60 * 1000;


timeFilter = ee.Filter.And(ee.Filter.maxDifference(difference=twoDaysMillis,
                                                leftField='system:time_start',
                                                rightField='system:time_start'),
                        ee.Filter.greaterThanOrEquals(leftField='system:time_start',
                                              rightField='system:time_start'))

saveAllJoin = ee.Join.saveAll(matchesKey='inner',
                            ordering='system:time_start',
                            ascending=True)

IC = saveAllJoin.apply(ic, ic, timeFilter)
# print(IC.first().getInfo())

def get_dates(c, p):
    p = ee.List(p)
    c = ee.ImageCollection.fromImages(ee.Image(c).get('inner')).toBands().bandNames()
    return p.add(c)

# print(ee.List(IC.iterate(get_dates, ee.List([]))).slice(1).getInfo())
IC = IC.map(prepocessIc)
Bands = ee.ImageCollection(IC).toBands()
# print(Bands.bandNames().getInfo())
bs1 = ee.List.sequence(0, Bands.bandNames().size().subtract(1), 2)
bs2 = ee.List.sequence(1, Bands.bandNames().size().subtract(1), 2)
bands1 = Bands.select(bs1)
bands2 = Bands.select(bs2).reduceNeighborhood(reducer=ee.Reducer.count(),
                                            kernel=ee.Kernel.square(50),
                                            optimization='boxcar')
for size in range(1000, 1400, 100)[:]:
    print(size)
    bandsx = bands1.multiply(bands2.lt(size))
    bandsx = bandsx.selfMask().toDouble().reduce('max')

    ig = ee.Image(0).reproject(bandsx.projection()).where(bandsx.gt(0), bandsx)
    ids, deforested0 = [], []
    for n in ids_forested[:]:
        test = forested.filter(ee.Filter.eq('id', n)).first()
        val = ig.reduceRegion(ee.Reducer.frequencyHistogram(), test.geometry(), 30).values().get(0)
        ids.append(n), deforested0.append(val.getInfo())


    # ig = ee.Image(0).reproject(bands1.projection()).where(bands1.gt(0), bands1)
    ids, deforested1 = [], []
    for n in ids_forested[:]:
        test = deforested.filter(ee.Filter.eq('id', n)).first()
        val = ig.reduceRegion(ee.Reducer.frequencyHistogram(), test.geometry(), 30).values().get(0)
        ids.append(n), deforested1.append(val.getInfo())

    data0 = [(n['0.0'], 0) if (len(list(n.keys())) == 1) else (n['0.0'], np.sum(list(n.values())[1:])) if '0.0' in n.keys() else (0, np.sum(list(n.values())[1:])) for n in deforested0]
    data0 = np.sum([1 for n in data0 if n[1] < n[0]])

    data1 = [(n['0.0'], np.sum(list(n.values())) - n['0.0']) if '0.0' in n.keys() else (0, np.sum(list(n.values()))) for n in deforested1]
    data1 = np.sum([1 for n in data1 if n[1] > n[0]])




    pred = np.hstack([np.repeat(0, data0), np.repeat(1, 100-data0), np.repeat(1, data1), np.repeat(0, 100-data1)])
    obs = np.hstack([np.repeat(0, 100), np.repeat(1, 100)])

    print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

In [None]:
data0 = [(n['0.0'], 0) if (len(list(n.keys())) == 1) else (n['0.0'], np.sum(list(n.values())[1:])) if '0.0' in n.keys() else (0, np.sum(list(n.values())[1:])) for n in deforested0]
data0 = np.sum([1 for n in data0 if n[1] < n[0]])

data1 = [(n['0.0'], np.sum(list(n.values())) - n['0.0']) if '0.0' in n.keys() else (0, np.sum(list(n.values()))) for n in deforested1]
data1 = np.sum([1 for n in data1 if n[1] > n[0]])




pred = np.hstack([np.repeat(0, data0), np.repeat(1, 100-data0), np.repeat(1, data1), np.repeat(0, 100-data1)])
obs = np.hstack([np.repeat(0, 100), np.repeat(1, 100)])

print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

In [None]:
[(n['0.0'], 0) if (len(list(n.keys())) == 1) else (n['0.0'], np.sum(list(n.values())[1:])) if '0.0' in n.keys() else (0, np.sum(list(n.values())[1:])) for n in deforested0]

In [None]:
['x' for n in deforested0 if '0.0' in n.keys()]

In [None]:
reference_id = reference.aggregate_array('ID').getInfo()

In [None]:
def dict2list(c, p):
    return ee.List(p).add(ee.Dictionary(keys.get(c)).keys())


def addBands(c, p):
    name = ee.String('B').cat(ee.String(c).replace('[.]', '_'))
    bds = changes.eq(ee.Number.parse(c)).selfMask().reduce('max')
    return ee.Image(p).addBands(bds.rename(name))


def b2ic(ic):
    def inner(k):
        name = ee.String('B').cat(ee.String(k).replace('[.]', '_'))
        years = ee.Number.parse(k).subtract(2000)
        date = ee.Date.fromYMD(2000, 1, 1).advance(years, 'year')
        img = ic.select(name).selfMask()
        return img.setMulti({'system:time_start': date.millis(), 'date':k})
    return inner


changes = to_year(TStart)
changes = changes.updateMask(changes.gt(2002)) #2016

keys = changes.reduceRegion(reducer=ee.Reducer.frequencyHistogram(),
                            geometry=aoi,
                            scale=30,
                            maxPixels=1e13)
ks = keys.keys()
keys = ee.List(ee.List(ks).iterate(dict2list, ee.List([]))).flatten().distinct()
bands = ee.Image(keys.iterate(addBands, ee.Image())).slice(1)
ic = keys.map(b2ic(bands))
ic = ee.ImageCollection.fromImages(ic).toBands().selfMask().toDouble().reduce('max')
# print(ic.getInfo())
ig = ee.Image(0).reproject(ic.projection()).where(ic.gt(0), ic)
# print(ic.getInfo())

ids, deforested0 = [], []
for n in reference_id[:]:
    test = reference.filter(ee.Filter.eq('ID', n)).first()
    val = ig.reduceRegion(ee.Reducer.frequencyHistogram(), test.geometry(), 30).values().get(0)
    ids.append(n), deforested0.append(val.getInfo())

# data = [(n['0.0'], 0) if (len(list(n.keys())) == 1) else (n['0.0'], np.sum(list(n.values())[1:])) if '0.0' in n.keys() else (0, np.sum(list(n.values())[1:])) for n in deforested0]
# data = np.sum([1 for n in data if n[1] < n[0]])

In [None]:
for i, n in enumerate(deforested0):
    print(i, n,
          reference.filter(ee.Filter.eq('ID', i)).first().get('NAME').getInfo(),
          reference.filter(ee.Filter.eq('ID', i)).first().get('cobDesde').getInfo())

In [None]:
no_changes, changes, name, lc = [], [], [], []
for i, n in enumerate(deforested0):
    if '0.0' in n.keys():
        no_changes.append(n['0.0'])
    else:
        no_changes.append(0)
    if '1.0' in n.keys():
        changes.append(n['1.0'])
    else:
        changes.append(0)
    name.append(reference.filter(ee.Filter.eq('ID', i)).first().get('NAME').getInfo())
    lc.append(reference.filter(ee.Filter.eq('ID', i)).first().get('cobDesde').getInfo())


In [None]:
dfcha2 = pd.DataFrame(data={'changes':changes, 'no_change':no_changes, 'name':name, 'lc':lc})

In [None]:
dfcha2.iloc[:84][dfcha2['changes'] > dfcha2['no_change']]

In [None]:
dfcha.iloc[:84][dfcha['changes'] > dfcha['no_change']]

In [None]:
len(dfcha2.iloc[84:238][dfcha2['changes'] < dfcha2['no_change']])

In [None]:
dfcha.iloc[:84][dfcha['changes'] > dfcha['no_change']]

In [None]:
len(dfcha.iloc[84:238][dfcha['changes'] < dfcha['no_change']])

In [None]:
ref_estable = (84, 0)
estable = (77, 7)
ref_tala = (0, 148)
tala = (41, 107)

pred = np.hstack([np.repeat(0, estable[0]), np.repeat(1, estable[1]), np.repeat(1, tala[1]), np.repeat(0, tala[0])])
obs = np.hstack([np.repeat(0, ref_estable[0]), np.repeat(1, ref_tala[1])])
print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

In [None]:
#### native
ref_estable = (65, 0)
estable = (60, 5)
ref_tala = (0, 63)
tala = (17, 46)
pred = np.hstack([np.repeat(0, estable[0]), np.repeat(1, estable[1]), np.repeat(1, tala[1]), np.repeat(0, tala[0])])
obs = np.hstack([np.repeat(0, ref_estable[0]), np.repeat(1, ref_tala[1])])
print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

#### plantation
ref_estable = (19, 0)
estable = (17, 2)
ref_tala = (0, 75)
tala = (14, 61)
pred = np.hstack([np.repeat(0, estable[0]), np.repeat(1, estable[1]), np.repeat(1, tala[1]), np.repeat(0, tala[0])])
obs = np.hstack([np.repeat(0, ref_estable[0]), np.repeat(1, ref_tala[1])])
print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

In [None]:
types = reference.aggregate_array('NAME').getInfo()
last = reference.aggregate_array('Fultima').getInfo()

In [None]:
data1 = [(n['0.0'], np.sum(list(n.values())) - n['0.0']) if '0.0' in n.keys() else (0, np.sum(list(n.values()))) for n in deforested0]
data1 = np.sum([1 for n in data1 if n[1] > n[0]])

In [None]:
sequia = reference.filter(ee.Filter.eq('NAME', 'tala'))
sequia.size().getInfo()

In [None]:
estable, ref_estable = (78, 6), (84, 0)
tala = (28, 100), (0, 128)
sequia = (37, 54), (0, 91)


In [None]:
np.repeat(0, 78)

In [None]:
pred = np.hstack([np.repeat(0, 78), np.repeat(1, 6), np.repeat(0, 28), np.repeat(1, 100), np.repeat(0, 37), np.repeat(1, 54)])
obs = np.hstack([np.repeat(0, 84), np.repeat(1, 128), np.repeat(1, 91)])

print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

In [None]:
pred = np.hstack([np.repeat(0, data0), np.repeat(1, 100-data0), np.repeat(1, data1), np.repeat(0, 100-data1)])
obs = np.hstack([np.repeat(0, 100), np.repeat(1, 100)])

print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

In [None]:
len([(n, types[i], pd.to_datetime(last[i], format='%d/%m/%Y')) for i, n in enumerate(deforested0) if (types[i] == 'tala') & (pd.to_datetime(last[i], format='%d/%m/%Y') > pd.to_datetime('2016-01-01'))])

In [None]:
def dict2list(c, p):
    return ee.List(p).add(ee.Dictionary(keys.get(c)).keys())

changes = to_year(TStart)
changes = changes.updateMask(changes.gt(2016))
changes = changes.updateMask(LC.eq(212).Or(LC.eq(241).Or(LC.eq(251)).Or(LC.eq(410)).Or(LC.eq(420))))


def addBands(c, p):
    name = ee.String('B').cat(ee.String(c).replace('[.]', '_'))
    bds = changes.eq(ee.Number.parse(c)).selfMask().reduce('max')
    return ee.Image(p).addBands(bds.rename(name))


def b2ic(ic):
    def inner(k):
        name = ee.String('B').cat(ee.String(k).replace('[.]', '_'))
        years = ee.Number.parse(k).subtract(2016)
        date = ee.Date.fromYMD(2016, 1, 1).advance(years, 'year')
        img = ic.select(name).selfMask()
        return img.setMulti({'system:time_start': date.millis(), 'date':k})
    return inner


def prepocessIc(img):
    def wrap(i):
        return ee.Image(i).rename('ig')
    img = ee.Image(img)
    name = ee.String('B').cat(ee.String(img.get('date')).replace('[.]', '_'))
    img1 = img.multiply(ee.Number.parse(img.get('date')))
    img = img1.addBands(ee.ImageCollection.fromImages(ee.List(img.get('inner')).map(wrap)).reduce('max').rename(name.cat('_mx')))
    return img.copyProperties(img, img.propertyNames())


keys = changes.reduceRegion(reducer=ee.Reducer.frequencyHistogram(),
                            geometry=aoi,
                            scale=30,
                            maxPixels=1e13)
ks = keys.keys()
keys = ee.List(ee.List(ks).iterate(dict2list, ee.List([]))).flatten().distinct()
bands = ee.Image(keys.iterate(addBands, ee.Image())).slice(1)
ic = keys.map(b2ic(bands))

twoDaysMillis = 20 * 24 * 60 * 60 * 1000;


timeFilter = ee.Filter.Or(ee.Filter.maxDifference(difference=twoDaysMillis,
                                                leftField='system:time_start',
                                                rightField='system:time_start'),
                        ee.Filter.maxDifference(difference=twoDaysMillis,
                                                leftField='system:time_start',
                                                rightField='system:time_start'))

saveAllJoin = ee.Join.saveAll(matchesKey='inner',
                            ordering='system:time_start',
                            ascending=True)

IC = saveAllJoin.apply(ic, ic, timeFilter)
IC = IC.map(prepocessIc)
Bands = ee.ImageCollection(IC).toBands()

bs1 = ee.List.sequence(0, Bands.bandNames().size().subtract(1), 2)
bs2 = ee.List.sequence(1, Bands.bandNames().size().subtract(1), 2)
bands1 = Bands.select(bs1)
bands2 = Bands.select(bs2).reduceNeighborhood(reducer=ee.Reducer.count(),
                                            kernel=ee.Kernel.square(50),
                                            optimization='boxcar')
# for size in range(100, 2501, 200):
#     print(size)
bandsx = bands1.multiply(bands2.lt(2500))
bandsx = bandsx.selfMask().toDouble().reduce('max')

ig = ee.Image(0).reproject(bandsx.projection()).where(bandsx.gt(0), bandsx)
ee.batch.Export.image.toAsset(image=ig,
                              maxPixels=1e13,
                              scale=30,
                              assetId='users/ignaciofuentessanroman/filteredCCDC',
                              description='filteredCCDC_export',
                              region=aoi.geometry()).start()
ids, deforested0 = [], []
for n in ids_forested[:]:
    test = forested.filter(ee.Filter.eq('id', n)).first()
    val = ig.reduceRegion(ee.Reducer.frequencyHistogram(), test.geometry(), 30).values().get(0)
    ids.append(n), deforested0.append(val.getInfo())


# ig = ee.Image(0).reproject(bands1.projection()).where(bands1.gt(0), bands1)
ids, deforested1 = [], []
for n in ids_forested[:]:
    test = deforested.filter(ee.Filter.eq('id', n)).first()
    val = ig.reduceRegion(ee.Reducer.frequencyHistogram(), test.geometry(), 30).values().get(0)
    ids.append(n), deforested1.append(val.getInfo())

data0 = [(n['0.0'], 0) if (len(list(n.keys())) == 1) else (n['0.0'], np.sum(list(n.values())[1:])) for n in deforested0]
data0 = np.sum([1 for n in data0 if n[1] < n[0]])

data1 = [(n['0.0'], np.sum(list(n.values())) - n['0.0']) if '0.0' in n.keys() else (0, np.sum(list(n.values()))) for n in deforested1]
data1 = np.sum([1 for n in data1 if n[1] > n[0]])




pred = np.hstack([np.repeat(0, data0), np.repeat(1, 100-data0), np.repeat(1, data1), np.repeat(0, 100-data1)])
obs = np.hstack([np.repeat(0, 100), np.repeat(1, 100)])

print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

In [None]:
[(n['0.0'], np.sum(list(n.values())) - n['0.0']) if '0.0' in n.keys() else (0, np.sum(list(n.values()))) for n in deforested1]

In [None]:
deforested1

In [None]:
# def process(k):
#     name = ee.String('B').cat(ee.String(k).replace('[.]', '_'))
#     # name1 = ee.String('B').cat(ee.String(k).replace('[.]', '_')).cat('_count')
#     img = bands.select(name).multiply(ee.Number.parse(k)).selfMask()#.where(bands.select(name1).gt(100), 0)
#     return img.toDouble().rename('ccdc')
# ig = ee.ImageCollection.fromImages(keys.map(process))

In [None]:
fea = deforested.filter(ee.Filter.eq('id', ids_deforested[22])).first()
geo = ee.Feature(fea)
id = geo.getMapId()
length_id = length.getMapId({'min': 1, 'max': 5, 'palette': '0000FF, 00FFFF, 00FF00, FFFF00, FF0000'})
years_id = bands1.getMapId({'min': 2016, 'max': 2022.5, 'palette': 'FF0000, FFFF00, 00FF00, 00FFFF, 0000FF'})
defo_polys = deforested.getMapId()
centroid = geo.centroid().getInfo()['geometry']['coordinates'][::-1]
map = folium.Map(location=centroid, zoom_start=16)
folium.TileLayer(
    tiles=length_id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='length_breaks',
  ).add_to(map)
folium.TileLayer(
    tiles=years_id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='last_break',
  ).add_to(map)
folium.TileLayer(
    tiles=id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='feature',
  ).add_to(map)
# folium.TileLayer(
#     tiles=defo_polys['tile_fetcher'].url_format,
#     attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
#     overlay=True,
#     name='defo_polys',
#   ).add_to(map)
map.add_child(folium.LayerControl())
map


In [None]:
ig = ee.Image(0).reproject(bands1.projection()).where(bands1.gt(0), bands1)
ids, deforested0 = [], []
for n in ids_forested[:]:
    test = forested.filter(ee.Filter.eq('id', n)).first()
    val = ig.reduceRegion(ee.Reducer.frequencyHistogram(), test.geometry(), 30).values().get(0)
    ids.append(n), deforested0.append(val.getInfo())

In [None]:
deforested0

In [None]:
[n for n in deforested0 if (len(list(n.keys())) == 1)]

In [None]:
ig = ee.Image(0).reproject(bands1.projection()).where(bands1.gt(0), bands1)
ids, deforested1 = [], []
for n in ids_forested[:]:
    test = deforested.filter(ee.Filter.eq('id', n)).first()
    val = ig.reduceRegion(ee.Reducer.frequencyHistogram(), test.geometry(), 30).values().get(0)
    ids.append(n), deforested1.append(val.getInfo())

In [None]:
data = [(n['0.0'], np.sum(list(n.values())) - n['0.0']) if '0.0' in n.keys() else (0, np.sum(list(n.values()))) for n in deforested1]

In [None]:
data = [(n['0.0'], np.sum(list(n.values())) - n['0.0']) if '0.0' in n.keys() else (0, np.sum(list(n.values()))) for n in deforested1]
np.sum([1 for n in data if n[1] > n[0]])

In [None]:
[1 for n in data if n[1] > n[0]]

In [None]:
import datetime


def get_days(df):
    '''converts breaks to days from 2016-01-01'''
    return lambda x: (df.loc[x]['date']- pd.to_datetime('2016-01-01')).days + 0.0 if x > 0 else np.nan

In [None]:
df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
vfunc = np.vectorize(get_days(df_dates_anom))
days = vfunc(breaks)

values = np.sort(np.unique(days))[:-1]
values = np.linspace(np.min(values), np.max(values), 4)
dates = [pd.to_datetime('2016-01-01') + datetime.timedelta(n) for n in values]

In [None]:
image = ee.Image("users/ignaciofuentessanroman/ccdcS2corrTStart13_1")
image2 = ee.Image("users/ignaciofuentessanroman/ccdcS2corrTStart6_1")
image3 = ee.Image("users/ignaciofuentessanroman/ccdcS2corrTStart7_2")
image4 = ee.Image("users/ignaciofuentessanroman/ccdcS2corrTStart7_5")
image5 = ee.Image('users/ignisfausto/ccdcS2corrTStart7_1')
image7 = ee.Image('users/ignisfausto/ccdcS2corrTStart7_3')
image6 = ee.Image('users/ignisfausto/ccdcS2corrTStart13_1')
image8 = ee.Image('users/ignisfausto/ccdcS2corrTStart7_4')
image9 = ee.Image('users/ignisfausto/ccdcS2corrTStart13_2')
image10 = ee.Image('users/ignisfausto/ccdcS2corrTStart5_1')

In [None]:
TStartS2 = ee.ImageCollection([image, image2, image3, image4, image5, image6, image7, image8, image9, image10]).mosaic()
LC = ee.Image('users/ignaciofuentessanroman/LC_CHILE_2014_b')

In [None]:
lengthS2 = TStartS2.reduce('count').toInt()
lastS2 = to_year(TStartS2.reduce('max')).focal_mode()

In [None]:
fea = deforested.filter(ee.Filter.eq('id', ids_deforested[22])).first()

geo = ee.Feature(fea)
id = geo.getMapId()
length_id = lengthS2.getMapId({'min': 1, 'max': 5, 'palette': '0000FF, 00FFFF, 00FF00, FFFF00, FF0000'})
years_id = lastS2.getMapId({'min': 2016, 'max': 2022.5, 'palette': 'FF0000, FFFF00, 00FF00, 00FFFF, 0000FF'})
defo_polys = deforested.getMapId()
centroid = geo.centroid().getInfo()['geometry']['coordinates'][::-1]
map = folium.Map(location=centroid, zoom_start=16)
folium.TileLayer(
    tiles=length_id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='length_breaks',
  ).add_to(map)
folium.TileLayer(
    tiles=years_id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='last_break',
  ).add_to(map)
folium.TileLayer(
    tiles=id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='feature',
  ).add_to(map)
# folium.TileLayer(
#     tiles=defo_polys['tile_fetcher'].url_format,
#     attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
#     overlay=True,
#     name='defo_polys',
#   ).add_to(map)
map.add_child(folium.LayerControl())
map

In [None]:
labs = ['A', 'B', 'C', 'D', 'E']
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i == 0:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    elif i == 1:
        sample = to_year(ee.Image('users/ignaciofuentessanroman/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array < 2016.3] = np.nan

    values = np.sort(np.unique(array))[:-1]
    if len(values) > 1:
        values = np.linspace(np.min(values), np.max(values), 4)
    else:
        values = np.linspace(2000.0081787109375, 2022.5, 4)

    floors = [math.floor(n) for n in values]
    decimal = [n - floors[i] for i, n in enumerate(values)]
    days = [365 * n for n in decimal]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i, n in enumerate(floors)]

    coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
    longs = coords[:, 0]
    lats = coords[:, 1]

    f1 = ax[i].imshow(array, vmin=np.nanmin(values), vmax=np.nanmax(values), cmap='viridis')
    cb = fig.colorbar(f1, ticks=values, ax=ax[i])
    cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in new_dates])
    cb.ax.tick_params(labelsize=10)
    ax[i].set_xticks(np.linspace(0, array.shape[1], 4))
    ax[i].set_xticklabels(labels=np.round(np.linspace(np.min(longs), np.max(longs), 4), 4))
    ax[i].set_yticks(np.linspace(0, array.shape[0], 4))
    ax[i].set_yticklabels(labels=np.round(np.linspace(np.max(lats), np.min(lats), 4), 4))
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].tick_params(axis='x', rotation=30)
    if (i == 0):
        ax[i].text(array.shape[1]*-0.8, 0, labs[i], fontsize=12, weight='bold')
    elif (i==4):
        ax[i].text(array.shape[1]*-0.85, 0, labs[i], fontsize=12, weight='bold')
    elif (i==1):
        ax[i].text(array.shape[1]*-0.9, 0, labs[i], fontsize=12, weight='bold')
    elif (i==2):
        ax[i].text(array.shape[1]*-0.7, 0, labs[i], fontsize=12, weight='bold')
    else:
        ax[i].text(array.shape[1]*-0.8, 0, labs[i], fontsize=12, weight='bold')
    # plt.show()
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/ccdc_S2undbreaks.png', dpi=300)
# fig.tight_layout()

In [None]:
labs = ['A', 'B', 'C', 'D', 'E']
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']

for i, n in enumerate(ids):
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i == 0:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    elif i == 1:
        sample = to_year(ee.Image('users/ignaciofuentessanroman/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array < 2016.3] = np.nan
    meta_out = rio.open('/content/drive/MyDrive/testing{}sentinel.tif'.format(labs[i])).meta
    # days = (days - 5844.)/365 + 2016
    with rio.open('/content/drive/MyDrive/{}_{}.tif'.format('defoCCDC', '{}sentinel'.format(labs[i])), "w", **meta_out) as dest:
        dest.write(array.reshape(1, array.shape[0], array.shape[1]))

In [None]:
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i == 0:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    elif i == 1:
        sample = to_year(ee.Image('users/ignaciofuentessanroman/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array < 2016.3] = np.nan

    values = array.ravel()
    values = values[~np.isnan(values)]

    floors = [math.floor(n) for n in values]
    decimal = [n - floors[i] for i, n in enumerate(values)]
    days = [365 * n for n in decimal]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i, n in enumerate(floors)]

    ax[i].hist(new_dates, bins=30)
    start, end = ax[i].get_xlim()
    ax[i].set_ylabel('Frequency', fontsize=11)
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].set_xticks(pd.date_range(date(1,1,1)+timedelta(days=start), date(1,1,1)+timedelta(days=end), 6))
    ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    ax[i].tick_params(axis='x', rotation=30)
    # plt.show()
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/ccdc_S2undhist.png', dpi=300)

In [None]:
from datetime import date, timedelta
date(1,1,1) + timedelta(days=start)


In [None]:
labs = ['F', 'G', 'H', 'I', 'J']
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i == 0:
        sample = to_year(ee.Image('users/ignaciofuentessanroman/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    elif i == 1:
        sample = to_year(ee.Image('users/ignaciofuentessanroman/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    elif i == 2:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array < 2016.3] = np.nan

    values = np.sort(np.unique(array))[:-1]
    if len(values) > 1:
        values = np.linspace(np.min(values), np.max(values), 4)
    else:
        values = np.linspace(2000.0081787109375, 2022.5, 4)

    floors = [math.floor(n) for n in values]
    decimal = [n - floors[i] for i, n in enumerate(values)]
    days = [365 * n for n in decimal]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i, n in enumerate(floors)]

    coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
    longs = coords[:, 0]
    lats = coords[:, 1]

    f1 = ax[i].imshow(array, vmin=np.nanmin(values), vmax=np.nanmax(values), cmap='viridis')
    cb = fig.colorbar(f1, ticks=values, ax=ax[i])
    cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in new_dates])
    cb.ax.tick_params(labelsize=10)
    ax[i].set_xticks(np.linspace(0, array.shape[1], 4))
    ax[i].set_xticklabels(labels=np.round(np.linspace(np.min(longs), np.max(longs), 4), 4))
    ax[i].set_yticks(np.linspace(0, array.shape[0], 4))
    ax[i].set_yticklabels(labels=np.round(np.linspace(np.max(lats), np.min(lats), 4), 4))
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].tick_params(axis='x', rotation=30)
    if (i == 0):
        ax[i].text(array.shape[1]*-1.05, 0, labs[i], fontsize=12, weight='bold')
    elif (i==1):
        ax[i].text(array.shape[1]*-0.85, 0, labs[i], fontsize=12, weight='bold')
    elif (i==2):
        ax[i].text(array.shape[1]*-0.72, 0, labs[i], fontsize=12, weight='bold')
    elif (i==3):
        ax[i].text(array.shape[1]*-0.7, 0, labs[i], fontsize=12, weight='bold')
    else:
        ax[i].text(array.shape[1]*-0.6, 0, labs[i], fontsize=12, weight='bold')
    # plt.show()
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/ccdc_S2defobreaks.png', dpi=300)

In [None]:
labs = ['F', 'G', 'H', 'I', 'J']
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']

for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i == 0:
        sample = to_year(ee.Image('users/ignaciofuentessanroman/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    elif i == 1:
        sample = to_year(ee.Image('users/ignaciofuentessanroman/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    elif i == 2:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array < 2016.3] = np.nan
    meta_out = rio.open('/content/drive/MyDrive/testing{}sentinel.tif'.format(labs[i])).meta
    # days = (days - 5844.)/365 + 2016
    with rio.open('/content/drive/MyDrive/{}_{}.tif'.format('defoCCDC', '{}sentinel'.format(labs[i])), "w", **meta_out) as dest:
        dest.write(array.reshape(1, array.shape[0], array.shape[1]))

In [None]:
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    if i == 0:
        sample = to_year(ee.Image('users/ignaciofuentessanroman/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    elif i == 1:
        sample = to_year(ee.Image('users/ignaciofuentessanroman/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    elif i == 2:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    else:
        sample = to_year(ee.Image('users/ignisfausto/ccdcS2corrTStart13_1').reduce('max').toFloat()).focal_mode().sampleRectangle(buffered, defaultValue=-99.).getInfo()
    array = np.array(sample['properties']['constant']).astype('float')
    array[array < 2016.3] = np.nan

    values = array.ravel()
    values = values[~np.isnan(values)]

    floors = [math.floor(n) for n in values]
    decimal = [n - floors[i] for i, n in enumerate(values)]
    days = [365 * n for n in decimal]
    new_dates = [datetime.date(n, 1, 1) + datetime.timedelta(days=days[i]) for i, n in enumerate(floors)]

    ax[i].hist(new_dates, bins=30)
    start, end = ax[i].get_xlim()
    ax[i].set_ylabel('Frequency', fontsize=11)
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].set_xticks(pd.date_range(date(1,1,1)+timedelta(days=start), date(1,1,1)+timedelta(days=end), 6))
    ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    ax[i].tick_params(axis='x', rotation=30)
    # plt.show()
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/ccdc_S2defohist.png', dpi=300)

In [None]:
bksMosaic

In [None]:
for n in ids_forested[:]:
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking)


    # landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)) #
    # landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 233)).map(resample(landsat1.first()))

    # landsat_again = landsat1#.merge(landsat2)
    data = S2.select('ndvi').map(sampleS2(fea)).getInfo()
    dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
    sample = to_year(TStartS2).focal_mode().reduceRegion('mean', fea.geometry(), 10).getInfo()
    values = [sample[n] for n in sample]
    values = [n for n in values if n]
    values = num2date(values)

    plt.scatter(dates, ndvi)
    for z in values[1:]:
        plt.axvline(z, ls='--', color='k')
    plt.title(n)
    plt.show()


In [None]:
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
for n in ids[:]:
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking)


    # landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)) #
    # landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 233)).map(resample(landsat1.first()))

    # landsat_again = landsat1#.merge(landsat2)
    data = S2.select('ndvi').map(sampleS2(fea)).getInfo()
    dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
    sample = to_year(TStartS2).focal_mode().reduceRegion('mean', fea.geometry(), 10).getInfo()
    values = [sample[n] for n in sample]
    values = [n for n in values if n]
    values = num2date(values)

    plt.scatter(dates, ndvi)
    for z in values[1:]:
        print(z)
        plt.axvline(z, ls='--', color='k')
    plt.title(n)
    plt.show()

In [None]:
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
for n in ids[:]:
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking)


    # landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)) #
    # landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 233)).map(resample(landsat1.first()))

    # landsat_again = landsat1#.merge(landsat2)
    data = S2.select('ndvi').map(sampleS2(fea)).getInfo()
    dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
    sample = to_year(TStartS2).focal_mode().reduceRegion('mean', fea.geometry(), 10).getInfo()
    values = [sample[n] for n in sample]
    values = [n for n in values if n]
    values = num2date(values)

    plt.scatter(dates, ndvi)
    for z in values[1:]:
        print(z)
        plt.axvline(z, ls='--', color='k')
    plt.title(n)
    plt.show()

In [None]:
values = [sample[n] for n in sample]
values = [n for n in values if n]
values


In [None]:
ids, deforested0 = [], []
for n in ids_forested[:]:
    test = forested.filter(ee.Filter.eq('id', n)).first()
    val = lengthS2.reduceRegion('mode', test.geometry(), 10).values().get(0)
    ids.append(n), deforested0.append(val.getInfo())

In [None]:
(Counter(deforested0).keys(), Counter(deforested0).values())

In [None]:
ids, deforested0 = [], []
for n in ids_forested[:]:
    test = forested.filter(ee.Filter.eq('id', n)).first()
    val = lastS2.reduceRegion('mode', test.geometry(), 10).values().get(0)
    ids.append(n), deforested0.append(val.getInfo())

In [None]:
deforested0_bins = [np.nan if n is None else n for n in deforested0]
plt.hist(deforested0_bins, 20)
plt.show()

In [None]:
deforested0_bins

In [None]:
np.sum(bol) - 23

In [None]:
vals2 = [n if n > 2016.3 else None for n in deforested0_bins]
np.sum(([bol[i] for i, n in enumerate(vals2) if n != None]))

In [None]:
print(len([n for n in deforested0_bins if n > 2016.3]))
plt.hist([n for n in deforested0_bins if n > 2016.3], 20)

In [None]:
ids2, deforested1 = [], []
for n in ids_deforested[:]:
    test = deforested.filter(ee.Filter.eq('id', n)).first()
    val = lengthS2.reduceRegion('mode', test.geometry(), 10).values().get(0)
    ids2.append(n), deforested1.append(val.getInfo())

In [None]:
deforested1_bin = [round(n) if n else np.nan for n in deforested1]
(Counter(deforested1_bin).keys(), Counter(deforested1_bin).values())

In [None]:
ids2, deforested1_year = [], []
for n in ids_deforested[:]:
    test = deforested.filter(ee.Filter.eq('id', n)).first()
    val = lastS2.reduceRegion('mode', test.geometry(), 10).values().get(0)
    ids2.append(n), deforested1_year.append(val.getInfo())

In [None]:
deforested1_year_bin = [np.nan if n is None else n for n in deforested1_year]
deforested1_year_bin = [1 if n > 2016.3 else 0 for n in deforested1_year_bin]
(Counter(deforested1_year_bin).keys(), Counter(deforested1_year_bin).values())

In [None]:
[bins2[i] for i, n in enumerate(deforested1_year_bin) if n == 0]

In [None]:
np.sum(bins2)-19

In [None]:
plt.hist([np.nan if (n is None)| (n <= 2016.3) else n for n in deforested1_year], 20)
plt.show()

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


pred = np.hstack([np.repeat(0, 73), np.repeat(1, 27), np.repeat(1, 70), np.repeat(0, 30)])
obs = np.hstack([np.repeat(0, 100), np.repeat(1, 100)])
print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

In [None]:
stable_polys = reference.filter(ee.Filter.eq('NAME', 'estable'))
stable_data = last.sampleRegions(stable_polys, scale=30).getInfo()
stable_data1 = [n['properties']['constant'] for n in stable_data['features']]
stable_data_no = [n for n in stable_data1 if n <= 2016]
stable_data_yes = [n for n in stable_data1 if n > 2016]

stable_native = [n for n in stable_data['features'] if n['properties']['ID'] <= 64]
stable_native_no = [n['properties']['constant'] for n in stable_native if n['properties']['constant'] <= 2016]
stable_native_yes = [n['properties']['constant'] for n in stable_native if n['properties']['constant'] > 2016]

stable_plant = [n for n in stable_data['features'] if n['properties']['ID'] > 64]
stable_plant_no = [n['properties']['constant'] for n in stable_plant if n['properties']['constant'] <= 2016]
stable_plant_yes = [n['properties']['constant'] for n in stable_plant if n['properties']['constant'] > 2016]
print(len(stable_data_no), len(stable_data_yes), len(stable_native), len(stable_native_no), len(stable_native_yes), len(stable_plant), len(stable_plant_no), len(stable_plant_yes))


In [None]:
obs = np.hstack([np.repeat(0, 2131+178+2627+6643+2514+4152),
                 np.repeat(1, 1840+3742)])

pre = np.hstack([np.repeat(0, 2131), np.repeat(1, 178),
                 np.repeat(0,2627), np.repeat(1,6643),
                 np.repeat(0, 2514), np.repeat(1,4152),
                 np.repeat(0, 1840), np.repeat(1, 3742)])
recall_score(obs, pre)


In [None]:
lw2016 =  [85, 90, 113, 115, 116, 117, 118, 119, 128, 130, 138]

In [None]:
tala_polys = reference.filter(ee.Filter.eq('NAME', 'tala')).filter(ee.Filter.inList('ID', lw2016).Not())#.filter(ee.Filter.neq('ID', 90)).filter(ee.Filter.neq('ID', 113)).filter(ee.Filter.neq('ID', 85))
tala_data = last.sampleRegions(tala_polys, scale=30)
tala_data_no = tala_data.filter(ee.Filter.lte('constant', 2016)).size().getInfo()
tala_data_yes = tala_data.filter(ee.Filter.gt('constant', 2016)).size().getInfo()

tala_native = tala_polys.filter(ee.Filter.And(ee.Filter.gte('ID', 84), ee.Filter.lt('ID', 150)))
tala_native = last.sampleRegions(tala_native, scale=30).getInfo()
tala_native_no = [n['properties']['constant'] for n in tala_native['features'] if n['properties']['constant'] <= 2016]
tala_native_yes = [n['properties']['constant'] for n in tala_native['features'] if n['properties']['constant'] > 2016]

tala_plant = tala_polys.filter(ee.Filter.And(ee.Filter.gte('ID', 84), ee.Filter.lt('ID', 150)).Not())
tala_plant = last.sampleRegions(tala_plant, scale=30).getInfo()
tala_plant_no = [n['properties']['constant'] for n in tala_plant['features'] if n['properties']['constant'] <= 2016]
tala_plant_yes = [n['properties']['constant'] for n in tala_plant['features'] if n['properties']['constant'] > 2016]
print(tala_data_no, tala_data_yes, len(tala_native['features']), len(tala_native_no), len(tala_native_yes), len(tala_plant['features']), len(tala_plant_no), len(tala_plant_yes))


In [None]:
print(precision_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                                 np.repeat(1, tala_data_no+tala_data_yes)]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                            np.repeat(1, len(stable_data_yes))]),
                                 np.hstack([np.repeat(1, tala_data_yes),
                                            np.repeat(0, tala_data_no)])])),
      recall_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                              np.repeat(1, tala_data_no+tala_data_yes)]),
                   np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                         np.repeat(1, len(stable_data_yes))]),
                              np.hstack([np.repeat(1, tala_data_yes),
                                         np.repeat(0, tala_data_no)])])),
      f1_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                          np.repeat(1, tala_data_no+tala_data_yes)]),
               np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                     np.repeat(1, len(stable_data_yes))]),
                          np.hstack([np.repeat(1, tala_data_yes),
                                     np.repeat(0, tala_data_no)])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                                  np.repeat(1, tala_data_no+tala_data_yes)]),
                       np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                             np.repeat(1, len(stable_data_yes))]),
                                  np.hstack([np.repeat(1, tala_data_yes),
                                             np.repeat(0, tala_data_no)])])))
print('native = ',
        precision_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(tala_native['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(tala_native_yes)), np.repeat(0, len(tala_native_no))])])),
        recall_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(tala_native['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(tala_native_yes)), np.repeat(0, len(tala_native_no))])])),
        f1_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(tala_native['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(tala_native_yes)), np.repeat(0, len(tala_native_no))])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(tala_native['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(tala_native_yes)), np.repeat(0, len(tala_native_no))])])))
print('plantation = ',
        precision_score(np.hstack([np.repeat(0, len(stable_plant)),
                                   np.repeat(1, len(tala_plant['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)), np.repeat(1, len(stable_plant_yes))]),
                                   np.hstack([np.repeat(1, len(tala_plant_yes)), np.repeat(0, len(tala_plant_no))])])),
        recall_score(np.hstack([np.repeat(0, len(stable_plant)),
                                   np.repeat(1, len(tala_plant['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)), np.repeat(1, len(stable_plant_yes))]),
                                   np.hstack([np.repeat(1, len(tala_plant_yes)), np.repeat(0, len(tala_plant_no))])])),
        f1_score(np.hstack([np.repeat(0, len(stable_plant)),
                                   np.repeat(1, len(tala_plant['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)), np.repeat(1, len(stable_plant_yes))]),
                                   np.hstack([np.repeat(1, len(tala_plant_yes)), np.repeat(0, len(tala_plant_no))])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_plant)),
                                   np.repeat(1, len(tala_plant['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)), np.repeat(1, len(stable_plant_yes))]),
                                   np.hstack([np.repeat(1, len(tala_plant_yes)), np.repeat(0, len(tala_plant_no))])])))

In [None]:
fire_polys = reference.filter(ee.Filter.eq('NAME', 'incendio'))#.filter(ee.Filter.inList('ID', lw2016).Not())#.filter(ee.Filter.neq('ID', 90)).filter(ee.Filter.neq('ID', 113)).filter(ee.Filter.neq('ID', 85))
fire_data = last.sampleRegions(fire_polys, scale=30)
fire_data_no = fire_data.filter(ee.Filter.lte('constant', 2016)).size().getInfo()
fire_data_yes = fire_data.filter(ee.Filter.gt('constant', 2016)).size().getInfo()

fire_native = fire_polys.filter(ee.Filter.And(ee.Filter.gte('ID', 84), ee.Filter.lte('ID', 406)))
fire_native = last.sampleRegions(fire_native, scale=30).getInfo()
fire_native_no = [n['properties']['constant'] for n in fire_native['features'] if n['properties']['constant'] <= 2016]
fire_native_yes = [n['properties']['constant'] for n in fire_native['features'] if n['properties']['constant'] > 2016]

fire_plant = fire_polys.filter(ee.Filter.gt('ID', 406))
fire_plant1 = last.sampleRegions(fire_plant.filter(ee.Filter.lt('ID', 460)), scale=30).getInfo()
fire_plant2 = last.sampleRegions(fire_plant.filter(ee.Filter.gte('ID', 460)), scale=30).getInfo()
fire_plant_no1 = [n['properties']['constant'] for n in fire_plant1['features'] if n['properties']['constant'] <= 2016]
fire_plant_no2 = [n['properties']['constant'] for n in fire_plant2['features'] if n['properties']['constant'] <= 2016]
fire_plant_no = fire_plant_no1 + fire_plant_no2
fire_plant_yes1 = [n['properties']['constant'] for n in fire_plant1['features'] if n['properties']['constant'] > 2016]
fire_plant_yes2 = [n['properties']['constant'] for n in fire_plant2['features'] if n['properties']['constant'] > 2016]
fire_plant_yes = fire_plant_yes1 + fire_plant_yes2
print(fire_data_no, fire_data_yes)#, len(fire_native['features']), len(fire_native_no), len(fire_native_yes), len(fire_plant['features']), len(fire_plant_no), len(fire_plant_yes))

In [None]:
print(precision_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                                 np.repeat(1, fire_data_no+fire_data_yes)]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                            np.repeat(1, len(stable_data_yes))]),
                                 np.hstack([np.repeat(1, fire_data_yes),
                                            np.repeat(0, fire_data_no)])])),
      recall_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                              np.repeat(1, fire_data_no+fire_data_yes)]),
                   np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                         np.repeat(1, len(stable_data_yes))]),
                              np.hstack([np.repeat(1, fire_data_yes),
                                         np.repeat(0, fire_data_no)])])),
      f1_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                          np.repeat(1, fire_data_no+fire_data_yes)]),
               np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                     np.repeat(1, len(stable_data_yes))]),
                          np.hstack([np.repeat(1, fire_data_yes),
                                     np.repeat(0, fire_data_no)])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                                  np.repeat(1, fire_data_no+fire_data_yes)]),
                       np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                             np.repeat(1, len(stable_data_yes))]),
                                  np.hstack([np.repeat(1, fire_data_yes),
                                             np.repeat(0, fire_data_no)])])))

print('native = ',
        precision_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(fire_native['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(fire_native_yes)), np.repeat(0, len(fire_native_no))])])),
        recall_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(fire_native['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(fire_native_yes)), np.repeat(0, len(fire_native_no))])])),
        f1_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(fire_native['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(fire_native_yes)), np.repeat(0, len(fire_native_no))])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(fire_native['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(fire_native_yes)), np.repeat(0, len(fire_native_no))])])))
print('plantation = ',
        precision_score(np.hstack([np.repeat(0, len(stable_plant)),
                                   np.repeat(1, len(fire_plant1['features']) + len(fire_plant2['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)), np.repeat(1, len(stable_plant_yes))]),
                                   np.hstack([np.repeat(1, len(fire_plant_yes)), np.repeat(0, len(fire_plant_no))])])),
        recall_score(np.hstack([np.repeat(0, len(stable_plant)),
                                   np.repeat(1, len(fire_plant1['features']) + len(fire_plant2['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)), np.repeat(1, len(stable_plant_yes))]),
                                   np.hstack([np.repeat(1, len(fire_plant_yes)), np.repeat(0, len(fire_plant_no))])])),
        f1_score(np.hstack([np.repeat(0, len(stable_plant)),
                                   np.repeat(1, len(fire_plant1['features']) + len(fire_plant2['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)), np.repeat(1, len(stable_plant_yes))]),
                                   np.hstack([np.repeat(1, len(fire_plant_yes)), np.repeat(0, len(fire_plant_no))])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_plant)),
                                   np.repeat(1, len(fire_plant1['features']) + len(fire_plant2['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)), np.repeat(1, len(stable_plant_yes))]),
                                   np.hstack([np.repeat(1, len(fire_plant_yes)), np.repeat(0, len(fire_plant_no))])])))

In [None]:
sequia_polys = reference.filter(ee.Filter.eq('NAME', 'sequia'))#.filter(ee.Filter.inList('ID', lw2016).Not())#.filter(ee.Filter.neq('ID', 90)).filter(ee.Filter.neq('ID', 113)).filter(ee.Filter.neq('ID', 85))
sequia_data = last.sampleRegions(sequia_polys, scale=30)
sequia_data_no = sequia_data.filter(ee.Filter.lte('constant', 2016)).size().getInfo()
sequia_data_yes = sequia_data.filter(ee.Filter.gt('constant', 2016)).size().getInfo()

sequia_native = sequia_polys.filter(ee.Filter.And(ee.Filter.gte('ID', 84), ee.Filter.lte('ID', 328)))
sequia_native1 = last.sampleRegions(sequia_native.filter(ee.Filter.lt('ID', 300)), scale=30).getInfo()
sequia_native2 = last.sampleRegions(sequia_native.filter(ee.Filter.gte('ID', 300)), scale=30).getInfo()
# sequia_native = last.sampleRegions(sequia_native, scale=30).getInfo()
sequia_native_no1 = [n['properties']['constant'] for n in sequia_native1['features'] if n['properties']['constant'] <= 2016]
sequia_native_no2 = [n['properties']['constant'] for n in sequia_native2['features'] if n['properties']['constant'] <= 2016]
sequia_native_no = sequia_native_no1 + sequia_native_no2
sequia_native_yes1 = [n['properties']['constant'] for n in sequia_native1['features'] if n['properties']['constant'] > 2016]
sequia_native_yes2 = [n['properties']['constant'] for n in sequia_native2['features'] if n['properties']['constant'] > 2016]
sequia_native_yes = sequia_native_yes1 + sequia_native_yes2
# sequia_native_no = [n['properties']['constant'] for n in sequia_native['features'] if n['properties']['constant'] <= 2016]
# sequia_native_yes = [n['properties']['constant'] for n in sequia_native['features'] if n['properties']['constant'] > 2016]

sequia_plant = sequia_polys.filter(ee.Filter.And(ee.Filter.gte('ID', 84), ee.Filter.lte('ID', 328)).Not())
sequia_plant = last.sampleRegions(sequia_plant, scale=30).getInfo()
sequia_plant_no = [n['properties']['constant'] for n in sequia_plant['features'] if n['properties']['constant'] <= 2016]
sequia_plant_yes = [n['properties']['constant'] for n in sequia_plant['features'] if n['properties']['constant'] > 2016]
print(sequia_data_no, sequia_data_yes, len(sequia_native1['features']) + len(sequia_native2['features']), len(sequia_native_no), len(sequia_native_yes), len(sequia_plant['features']), len(sequia_plant_no), len(sequia_plant_yes))

In [None]:
print(precision_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                                 np.repeat(1, sequia_data_no+sequia_data_yes)]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                            np.repeat(1, len(stable_data_yes))]),
                                 np.hstack([np.repeat(1, sequia_data_yes),
                                            np.repeat(0, sequia_data_no)])])),
      recall_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                              np.repeat(1, sequia_data_no+sequia_data_yes)]),
                   np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                         np.repeat(1, len(stable_data_yes))]),
                              np.hstack([np.repeat(1, sequia_data_yes),
                                         np.repeat(0, sequia_data_no)])])),
      f1_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                          np.repeat(1, sequia_data_no+sequia_data_yes)]),
               np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                     np.repeat(1, len(stable_data_yes))]),
                          np.hstack([np.repeat(1, sequia_data_yes),
                                     np.repeat(0, sequia_data_no)])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes)),
                                  np.repeat(1, sequia_data_no+sequia_data_yes)]),
                       np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                             np.repeat(1, len(stable_data_yes))]),
                                  np.hstack([np.repeat(1, sequia_data_yes),
                                             np.repeat(0, sequia_data_no)])])))

print('native = ',
        precision_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(sequia_native1['features']) + len(sequia_native2['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(sequia_native_yes)), np.repeat(0, len(sequia_native_no))])])),
        recall_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(sequia_native1['features']) + len(sequia_native2['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(sequia_native_yes)), np.repeat(0, len(sequia_native_no))])])),
        f1_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(sequia_native1['features']) + len(sequia_native2['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(sequia_native_yes)), np.repeat(0, len(sequia_native_no))])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_native)),
                                   np.repeat(1, len(sequia_native1['features']) + len(sequia_native2['features']))]),
                        np.hstack([np.hstack([np.repeat(0, len(stable_native_no)), np.repeat(1, len(stable_native_yes))]),
                                   np.hstack([np.repeat(1, len(sequia_native_yes)), np.repeat(0, len(sequia_native_no))])])))

## All

In [None]:
print(precision_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes) + fire_data_no + fire_data_yes + sequia_data_no + sequia_data_yes),
                                 np.repeat(1, tala_data_no+tala_data_yes)]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                            np.repeat(1, len(stable_data_yes))]),
                                 np.hstack([np.repeat(1, sequia_data_yes),
                                            np.repeat(0, sequia_data_no)]),
                                 np.hstack([np.repeat(1, fire_data_yes),
                                            np.repeat(0, fire_data_no)]),
                                 np.hstack([np.repeat(1, tala_data_yes),
                                            np.repeat(0, tala_data_no)])])),
      recall_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes) + fire_data_no + fire_data_yes + sequia_data_no + sequia_data_yes),
                                 np.repeat(1, tala_data_no+tala_data_yes)]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                            np.repeat(1, len(stable_data_yes))]),
                                 np.hstack([np.repeat(1, sequia_data_yes),
                                            np.repeat(0, sequia_data_no)]),
                                 np.hstack([np.repeat(1, fire_data_yes),
                                            np.repeat(0, fire_data_no)]),
                                 np.hstack([np.repeat(1, tala_data_yes),
                                            np.repeat(0, tala_data_no)])])),
      f1_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes) + fire_data_no + fire_data_yes + sequia_data_no + sequia_data_yes),
                                 np.repeat(1, tala_data_no+tala_data_yes)]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                            np.repeat(1, len(stable_data_yes))]),
                                 np.hstack([np.repeat(1, sequia_data_yes),
                                            np.repeat(0, sequia_data_no)]),
                                 np.hstack([np.repeat(1, fire_data_yes),
                                            np.repeat(0, fire_data_no)]),
                                 np.hstack([np.repeat(1, tala_data_yes),
                                            np.repeat(0, tala_data_no)])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_data_no)+len(stable_data_yes) + fire_data_no + fire_data_yes + sequia_data_no + sequia_data_yes),
                                 np.repeat(1, tala_data_no+tala_data_yes)]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_data_no)),
                                            np.repeat(1, len(stable_data_yes))]),
                                 np.hstack([np.repeat(1, sequia_data_yes),
                                            np.repeat(0, sequia_data_no)]),
                                 np.hstack([np.repeat(1, fire_data_yes),
                                            np.repeat(0, fire_data_no)]),
                                 np.hstack([np.repeat(1, tala_data_yes),
                                            np.repeat(0, tala_data_no)])])))

In [None]:
print(precision_score(np.hstack([np.repeat(0, len(stable_native_no)+len(stable_native_yes) + len(fire_native_no) + len(fire_native_yes) + len(sequia_native_no) + len(sequia_native_yes)),
                                 np.repeat(1, len(tala_native_no)+len(tala_native_yes))]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_native_no)),
                                            np.repeat(1, len(stable_native_yes))]),
                                 np.hstack([np.repeat(1, len(sequia_native_yes)),
                                            np.repeat(0, len(sequia_native_no))]),
                                 np.hstack([np.repeat(1, len(fire_native_yes)),
                                            np.repeat(0, len(fire_native_no))]),
                                 np.hstack([np.repeat(1, len(tala_native_yes)),
                                            np.repeat(0, len(tala_native_no))])])),
      recall_score(np.hstack([np.repeat(0, len(stable_native_no)+len(stable_native_yes) + len(fire_native_no) + len(fire_native_yes) + len(sequia_native_no) + len(sequia_native_yes)),
                                 np.repeat(1, len(tala_native_no)+len(tala_native_yes))]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_native_no)),
                                            np.repeat(1, len(stable_native_yes))]),
                                 np.hstack([np.repeat(1, len(sequia_native_yes)),
                                            np.repeat(0, len(sequia_native_no))]),
                                 np.hstack([np.repeat(1, len(fire_native_yes)),
                                            np.repeat(0, len(fire_native_no))]),
                                 np.hstack([np.repeat(1, len(tala_native_yes)),
                                            np.repeat(0, len(tala_native_no))])])),
      f1_score(np.hstack([np.repeat(0, len(stable_native_no)+len(stable_native_yes) + len(fire_native_no) + len(fire_native_yes) + len(sequia_native_no) + len(sequia_native_yes)),
                                 np.repeat(1, len(tala_native_no)+len(tala_native_yes))]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_native_no)),
                                            np.repeat(1, len(stable_native_yes))]),
                                 np.hstack([np.repeat(1, len(sequia_native_yes)),
                                            np.repeat(0, len(sequia_native_no))]),
                                 np.hstack([np.repeat(1, len(fire_native_yes)),
                                            np.repeat(0, len(fire_native_no))]),
                                 np.hstack([np.repeat(1, len(tala_native_yes)),
                                            np.repeat(0, len(tala_native_no))])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_native_no)+len(stable_native_yes) + len(fire_native_no) + len(fire_native_yes) + len(sequia_native_no) + len(sequia_native_yes)),
                                 np.repeat(1, len(tala_native_no)+len(tala_native_yes))]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_native_no)),
                                            np.repeat(1, len(stable_native_yes))]),
                                 np.hstack([np.repeat(1, len(sequia_native_yes)),
                                            np.repeat(0, len(sequia_native_no))]),
                                 np.hstack([np.repeat(1, len(fire_native_yes)),
                                            np.repeat(0, len(fire_native_no))]),
                                 np.hstack([np.repeat(1, len(tala_native_yes)),
                                            np.repeat(0, len(tala_native_no))])])))



print(precision_score(np.hstack([np.repeat(0, len(stable_plant_no)+len(stable_plant_yes) + len(fire_plant_no) + len(fire_plant_yes) + len(sequia_plant_no) + len(sequia_plant_yes)),
                                 np.repeat(1, len(tala_plant_no)+len(tala_plant_yes))]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)),
                                            np.repeat(1, len(stable_plant_yes))]),
                                 np.hstack([np.repeat(1, len(sequia_plant_yes)),
                                            np.repeat(0, len(sequia_plant_no))]),
                                 np.hstack([np.repeat(1, len(fire_plant_yes)),
                                            np.repeat(0, len(fire_plant_no))]),
                                 np.hstack([np.repeat(1, len(tala_plant_yes)),
                                            np.repeat(0, len(tala_plant_no))])])),
      recall_score(np.hstack([np.repeat(0, len(stable_plant_no)+len(stable_plant_yes) + len(fire_plant_no) + len(fire_plant_yes) + len(sequia_plant_no) + len(sequia_plant_yes)),
                                 np.repeat(1, len(tala_plant_no)+len(tala_plant_yes))]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)),
                                            np.repeat(1, len(stable_plant_yes))]),
                                 np.hstack([np.repeat(1, len(sequia_plant_yes)),
                                            np.repeat(0, len(sequia_plant_no))]),
                                 np.hstack([np.repeat(1, len(fire_plant_yes)),
                                            np.repeat(0, len(fire_plant_no))]),
                                 np.hstack([np.repeat(1, len(tala_plant_yes)),
                                            np.repeat(0, len(tala_plant_no))])])),
      f1_score(np.hstack([np.repeat(0, len(stable_plant_no)+len(stable_plant_yes) + len(fire_plant_no) + len(fire_plant_yes) + len(sequia_plant_no) + len(sequia_plant_yes)),
                                 np.repeat(1, len(tala_plant_no)+len(tala_plant_yes))]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)),
                                            np.repeat(1, len(stable_plant_yes))]),
                                 np.hstack([np.repeat(1, len(sequia_plant_yes)),
                                            np.repeat(0, len(sequia_plant_no))]),
                                 np.hstack([np.repeat(1, len(fire_plant_yes)),
                                            np.repeat(0, len(fire_plant_no))]),
                                 np.hstack([np.repeat(1, len(tala_plant_yes)),
                                            np.repeat(0, len(tala_plant_no))])])),
        accuracy_score(np.hstack([np.repeat(0, len(stable_plant_no)+len(stable_plant_yes) + len(fire_plant_no) + len(fire_plant_yes) + len(sequia_plant_no) + len(sequia_plant_yes)),
                                 np.repeat(1, len(tala_plant_no)+len(tala_plant_yes))]),
                      np.hstack([np.hstack([np.repeat(0, len(stable_plant_no)),
                                            np.repeat(1, len(stable_plant_yes))]),
                                 np.hstack([np.repeat(1, len(sequia_plant_yes)),
                                            np.repeat(0, len(sequia_plant_no))]),
                                 np.hstack([np.repeat(1, len(fire_plant_yes)),
                                            np.repeat(0, len(fire_plant_no))]),
                                 np.hstack([np.repeat(1, len(tala_plant_yes)),
                                            np.repeat(0, len(tala_plant_no))])])))

In [None]:
len(tala_native_no), len(tala_native_yes), len(fire_native_no), len(fire_native_yes), len(sequia_native_no), len(sequia_native_yes)

In [None]:
df = pd.read_csv('/content/drive/MyDrive/vali_inde.csv')

In [None]:
df

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

In [None]:
precision_score(df['change'], df['classification']), recall_score(df['change'], df['classification']), f1_score(df['change'], df['classification']), accuracy_score(df['change'], df['classification'])

In [None]:
precisions, recalls, f1s, accuracies = [], [], [], []
for n in range(10):
    df = pd.read_csv('/content/drive/MyDrive/vali{}.csv'.format(n))
    precisions.append(precision_score(df['change'], df['classification']))
    recalls.append(recall_score(df['change'], df['classification']))
    f1s.append(f1_score(df['change'], df['classification']))
    accuracies.append(accuracy_score(df['change'], df['classification']))


In [None]:
np.mean(precisions), np.mean(recalls), np.mean(f1s), np.mean(accuracies)

In [None]:
precisions, recalls, f1s, accuracies = [], [], [], []
for n in range(10):
    df = pd.read_csv('/content/drive/MyDrive/cali{}.csv'.format(n))
    precisions.append(precision_score(df['change'], df['classification']))
    recalls.append(recall_score(df['change'], df['classification']))
    f1s.append(f1_score(df['change'], df['classification']))
    accuracies.append(accuracy_score(df['change'], df['classification']))

In [None]:
np.mean(precisions), np.mean(recalls), np.mean(f1s), np.mean(accuracies)

In [None]:
df.columns[1:11]

In [None]:
df[df.columns[1:11]].corr()

In [None]:
plt.imshow(df[df.columns[1:11]].corr(), vmin=0.5, vmax=1)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.inspection import permutation_importance
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer

from scipy.stats import spearmanr
from scipy.cluster import hierarchy
from scipy.spatial.distance import squareform

In [None]:
data = load_breast_cancer()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
X

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(df[df.columns[1:11]].values, df['change'].values)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))
corr = spearmanr(df[df.columns[1:11]].values).correlation

# Ensure the correlation matrix is symmetric
corr = (corr + corr.T) / 2
np.fill_diagonal(corr, 1)

# We convert the correlation matrix to a distance matrix before performing
# hierarchical clustering using Ward's linkage.
distance_matrix = 1 - np.abs(corr)
dist_linkage = hierarchy.ward(squareform(distance_matrix))
dendro = hierarchy.dendrogram(
    dist_linkage, labels=df.columns[1:11], ax=ax1, leaf_rotation=90
)
dendro_idx = np.arange(0, len(dendro["ivl"]))

ax2.imshow(corr[dendro["leaves"], :][:, dendro["leaves"]])
ax2.set_xticks(dendro_idx)
ax2.set_yticks(dendro_idx)
ax2.set_xticklabels(dendro["ivl"], rotation="vertical")
ax2.set_yticklabels(dendro["ivl"])
fig.tight_layout()
plt.show()

In [None]:
from collections import defaultdict

cluster_ids = hierarchy.fcluster(dist_linkage, 0.1, criterion="distance")
cluster_id_to_feature_ids = defaultdict(list)
for idx, cluster_id in enumerate(cluster_ids):
    cluster_id_to_feature_ids[cluster_id].append(idx)
selected_features = [v[0] for v in cluster_id_to_feature_ids.values()]

X_train_sel = X_train[:, selected_features]
# X_test_sel = X_test[:, selected_features]

clf_sel = RandomForestClassifier(n_estimators=100, random_state=42)
clf_sel.fit(X_train_sel, y_train)

In [None]:
selected_features

In [None]:
result = permutation_importance(clf_sel, df[df.columns[1:11]].values, df['change'].values, n_repeats=10, random_state=42)
perm_sorted_idx = result.importances_mean.argsort()

tree_importance_sorted_idx = np.argsort(clf.feature_importances_)
tree_indices = np.arange(0, len(clf.feature_importances_)) + 0.5

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))
ax1.barh(tree_indices, clf.feature_importances_[tree_importance_sorted_idx], height=0.7)
ax1.set_yticks(tree_indices)
ax1.set_yticklabels(df.columns[1:11][tree_importance_sorted_idx])
ax1.set_ylim((0, len(clf.feature_importances_)))
ax2.boxplot(
    result.importances[perm_sorted_idx].T,
    vert=False,
    labels=df.columns[1:11][perm_sorted_idx],
)
fig.tight_layout()
plt.show()

In [None]:
TStart = ee.ImageCollection([ee.Image('users/ignisfausto/ccdc_tStart_001083_095'),
                             ee.Image('users/ignisfausto/ccdc_tStart_001084_095'),
                             ee.Image('users/ignisfausto/ccdc_tStart_001085_095'),
                             ee.Image('users/ignisfausto/ccdc_tStart_233083_095'),
                             ee.Image('users/ignisfausto/ccdc_tStart_233084_095'),
                             ee.Image('users/ignisfausto/ccdc_tStart_233085_095')]).mosaic()
LC = ee.Image('users/ignaciofuentessanroman/LC_CHILE_2014_b')


In [None]:
length = TStart.reduce('count').toInt()
last = to_year(TStart.reduce('max'))#.focal_mode(3)

In [None]:
fea = reference.filter(ee.Filter.eq('ID', 370)).first()

geo = ee.Feature(fea)
id = geo.getMapId()
length_id = length.getMapId({'min': 1, 'max': 5, 'palette': '0000FF, 00FFFF, 00FF00, FFFF00, FF0000'})
years_id = last.getMapId({'min': 2016, 'max': 2022.5, 'palette': 'FF0000, FFFF00, 00FF00, 00FFFF, 0000FF'})
defo_polys = reference.getMapId()
centroid = geo.centroid().getInfo()['geometry']['coordinates'][::-1]
map = folium.Map(location=centroid, zoom_start=16)
folium.TileLayer(
    tiles=length_id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='length_breaks',
  ).add_to(map)
folium.TileLayer(
    tiles=years_id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='last_break',
  ).add_to(map)
folium.TileLayer(
    tiles=id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='feature',
  ).add_to(map)
# folium.TileLayer(
#     tiles=defo_polys['tile_fetcher'].url_format,
#     attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
#     overlay=True,
#     name='defo_polys',
#   ).add_to(map)
map.add_child(folium.LayerControl())
map