## Modelamiento de cambios estructurales para detección de cambios en bosques y matorrales de Chile central usando series de tiempo de datos NDVI Landsat-5, -7, -8, y -9

### Este Notebook muestra el uso del modelo non-parametric pehnology (npphen) 

- **Interfáz**: Google Earth Engine (GEE)
- **Lenguaje**: Python
- **Última actualización**: Julio 2023
- **Autor**: Ignacio fuentes San Roman \ ignacio.fuentes.sanroman@gmail.com \ Universidad de las Americas (UDLA)

In [None]:
# Import, authenticate and initialize the Earth Engine library.
import ee
ee.Authenticate()#auth_mode='paste'
ee.Initialize()

In [None]:
# instalar libreria para Kernel Density Estimation (KDE)
!pip install KDEpy

In [None]:
# Folium setup.
import folium
import sys
import re
import datetime
import numpy as np
import pandas as pd
import time
from multiprocessing import Pool
import itertools
from datetime import datetime
from KDEpy import FFTKDE
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from scipy import interpolate, stats
import plotly.express as px
from sklearn.cluster import KMeans
from skimage import filters
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score

print(folium.__version__)

In [None]:
def mask(img):
    """
    Applies a series of masks to an image to remove unwanted pixels.

    Parameters:
    img (ee.Image): The input image to be masked.

    Returns:
    ee.Image: The masked image.

    Notes:
    - The function applies the following masks to the input image:
        - Dilated mask: Removes pixels with a dilated quality assessment value of 2.
        - Cirrus mask: Removes pixels with a cirrus quality assessment value of 4.
        - Clouds mask: Removes pixels with a clouds quality assessment value of 8.
        - Shadows mask: Removes pixels with a shadows quality assessment value of 16.
        - Snow mask: Removes pixels with a snow quality assessment value of 32.
    - The function then scales the masked image by a factor of 0.0000275 and subtracts 0.2.
    - Finally, the function copies the properties of the original image to the masked image.

    """
    qua = ee.Image(img).select('QA_PIXEL')
    props = img.propertyNames()
    dilated = qua.bitwiseAnd(2).eq(0)
    cirrus = qua.bitwiseAnd(4).eq(0)
    clouds = qua.bitwiseAnd(8).eq(0)
    shadows = qua.bitwiseAnd(16).eq(0)
    snow = qua.bitwiseAnd(32).eq(0)
    return img.updateMask(dilated).updateMask(cirrus).updateMask(clouds).updateMask(shadows).updateMask(snow).multiply(0.0000275).add(-0.2).copyProperties(img, props)


def ndvil8(img):
    props = img.propertyNames()
    ix = img.normalizedDifference(['SR_B5', 'SR_B4'])
    return img.addBands(ix.rename('ndvi')).copyProperties(img, props)


def ndvil57(img):
    props = img.propertyNames()
    ix = img.normalizedDifference(['SR_B4', 'SR_B3'])
    return img.addBands(ix.rename('ndvi')).copyProperties(img, props)

In [None]:
def get_date(x):
    """
    Converts a date to the 'YYYY-MM-dd' format.

    Parameters:
    x (ee.Date): The input date.

    Returns:
    str: The date in 'YYYY-MM-dd' format.
    """
    return ee.Date(x).format('YYYY-MM-dd')


def mosaicking(collection):
    """
    Creates a mosaicked image from a collection of images.

    Parameters:
    collection (ee.ImageCollection): The input image collection.

    Returns:
    ee.Image: The mosaicked image.
    """
    def inner(date):
        coll = collection.select('ndvi').filterDate(ee.Date(date), ee.Date(date).advance(1, 'day'))
        img = ee.Image(coll.first())
        props = img.propertyNames()
        return ee.Image(-999).where(coll.mosaic().gte(-1), coll.mosaic())
    return inner


def set_id(fea):
    """
    Sets the 'id' property of a feature.

    Parameters:
    fea (ee.Feature): The input feature.

    Returns:
    ee.Feature: The feature with the 'id' property set.
    """
    return fea.set('id', fea.id())


def resample(target):
    """
    Resamples an image to match the projection of a target image.

    Parameters:
    target (ee.Image): The target image.

    Returns:
    ee.Image: The resampled image.
    """
    def inner(img):
        return img.reproject(ee.Image(target).select('ndvi').projection())
    return inner


def help(func):
    """
    Prints the docstring of a function.

    Parameters:
    func (function): The function to get the docstring of.
    """
    print(func.__doc__)


In [None]:
def sample(geo):
    def inner(img):
        date = ee.Date(img.get('system:time_start')).format('YYYY-MM-dd')
        ndvi = img.reduceRegion('mean', geo.geometry(), scale=30).values().get(0)
        return ee.Feature(None, {'date':date, 'ndvi':ndvi})
    return inner
def help(func):
    """
    Prints the docstring of a function.

    Parameters:
    func (function): The function to get the docstring of.
    """
    print(func.__doc__)


In [None]:
forested = ee.FeatureCollection('users/ignaciofuentessanroman/IWF_forests').map(set_id)
# forested = ee.FeatureCollection('users/ignaciofuentessanroman/forests_non').map(set_id)
deforested = ee.FeatureCollection('users/ignaciofuentessanroman/IWF_deforestation').map(set_id)

ids_forested = forested.aggregate_array('id').distinct().getInfo()
ids_deforested = deforested.aggregate_array('id').distinct().getInfo()

In [None]:
final = ee.FeatureCollection('users/ignaciofuentessanroman/PolValConsolidadosBosquePlantacion')

## Testing deforested polygon

In [None]:
fea = deforested.filter(ee.Filter.eq('id', '00000000000000000048')).first()

l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')

landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')

In [None]:
geo = ee.Feature(fea)
id = geo.getMapId()
l8_2016 = l8.filterDate('2016-01-01', '2017-01-01').mean().getMapId({'bands':['SR_B5', 'SR_B4', 'SR_B3'], 'min': 0, 'max': 0.3})
l8_2021 = l9.filterDate('2021-01-01', '2022-01-01').mean().getMapId({'bands':['SR_B5', 'SR_B4', 'SR_B3'], 'min': 0, 'max': 0.3})
centroid = geo.centroid().getInfo()['geometry']['coordinates'][::-1]
map = folium.Map(location=centroid, zoom_start=16)
folium.TileLayer(
    tiles=l8_2016['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='s2_2018',
  ).add_to(map)
folium.TileLayer(
    tiles=l8_2021['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='s2_2021',
  ).add_to(map)
folium.TileLayer(
    tiles=id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='feature',
  ).add_to(map)
map.add_child(folium.LayerControl())
map

In [None]:
paths = landsat.aggregate_array('WRS_PATH').distinct().getInfo()
paths

## Reprojecting different tiles to single projection

In [None]:
ids = []

In [None]:
landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 233))
landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)).map(resample(landsat1.first()))

landsat_again = landsat1.merge(landsat2)

In [None]:
landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)) #
landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 233)).map(resample(landsat1.first()))

landsat_again = landsat1.merge(landsat2)

In [None]:
data = landsat_again.map(sample(fea)).getInfo()

In [None]:
dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]

In [None]:
fig = plt.figure(figsize=(10,4))
plt.scatter(dates, ndvi)
plt.xlim(pd.to_datetime('2000-01-01'), pd.to_datetime('2022-06-01'))
plt.grid()
plt.ylabel('NDVI', fontsize=16)
plt.tick_params(axis='both', labelsize=14)
plt.savefig('/content/drive/MyDrive/def_00000048.png', dpi=300,)

In [None]:
stack = landsat_again.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()

In [None]:
stack = landsat_again.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()

In [None]:
imgs = [n for n in stack['properties']]
dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
df_dates = df_dates.sort_values('date')
ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
reorder_ix = df_dates['ix'].values
reorder_imgs = [imgs[n] for n in reorder_ix]
stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
stacked[stacked < 0] = np.nan

In [None]:
plt.imshow(stacked[1, :, :])
plt.colorbar()

In [None]:
plt.scatter(df_dates['date'].values, np.apply_over_axes(np.mean, stacked, [1,2]))

## npphen Functions

In [None]:
def matched_indices(x, y, masked=False):
    # Flattened x
    x_flat = x.ravel()

    # Indices to sort y
    y_argsort = y.argsort()

    # Indices in sorted y of corresponding x elements, flat
    x_in_y_sort_flat = y.searchsorted(x_flat, sorter=y_argsort)

    # Indices in y of corresponding x elements, flat
    x_in_y_flat = y_argsort[x_in_y_sort_flat]

    if not masked:
        # Reshape to shape of x
        return x_in_y_flat.reshape(x.shape)
    else:
        # Check for inequality at each y index to mask invalid indices
        mask = x_flat != y[x_in_y_flat]
        # Reshape to shape of x
        return np.ma.array(x_in_y_flat.reshape(x.shape), mask=mask.reshape(x.shape))


def calc_kernel(kernel, sample):
    return kernel(sample)


def Phen(array, dates, nGS, rge=[0, 1], h=2, plot=False, dask='multiprocessing'):
    """
    Calculate the Phenology index.

    Parameters:
    - array: numpy array
        The input array.
    - dates: list
        The list of dates corresponding to the array.
    - nGS: numpy array
        The array of growing season days.
    - rge: list, optional
        The range of values for the index. Default is [0, 1].
    - h: int, optional
        The value of h. Default is 2.
    - plot: bool, optional
        Whether to plot the results. Default is False.
    - dask: str, optional
        The dask method to use. Default is 'multiprocessing'.

    Returns:
    - ref: numpy array
        The calculated Phenology index.
    - fig: matplotlib figure
        The plotted figure (if plot=True).
    """
    # Function code here


def PhenKplot(array, dates, nGS, rge=[0, 1], h=2, ylim=(0, 0.15)):
    """
    Plot the Phenology index.

    Parameters:
    - array: numpy array
        The input array.
    - dates: list
        The list of dates corresponding to the array.
    - nGS: numpy array
        The array of growing season days.
    - rge: list, optional
        The range of values for the index. Default is [0, 1].
    - h: int, optional
        The value of h. Default is 2.
    - ylim: tuple, optional
        The y-axis limits for the plot. Default is (0, 0.15).

    Returns:
    - ref: numpy array
        The calculated Phenology index.
    """
    # Function code here


def PhenKplot(array, dates, nGS, rge=[0, 1], h=2, ylim=(0, 0.15)):
    """
    Plot the Phenology index.

    Parameters:
    - array: numpy array
        The input array.
    - dates: list
        The list of dates corresponding to the array.
    - nGS: numpy array
        The array of growing season days.
    - rge: list, optional
        The range of values for the index. Default is [0, 1].
    - h: int, optional
        The value of h. Default is 2.
    - ylim: tuple, optional
        The y-axis limits for the plot. Default is (0, 0.15).

    Returns:
    - ref: numpy array
        The calculated Phenology index.
    """
    if (len(rge)!=2):
        sys.exit("rge must be a vector of maximum 2")
    if (rge[0] > rge[1]):
        sys.exit("rge vector order must be minimum/maximum")
    if (len(dates) != len(array)):
        sys.exit("N of dates and files do not match")
    if np.isnan(array).all():
        return(np.repeat(np.nan, len(nGS)))

    df = pd.DataFrame(data={'x':dates, 'y':array})
    df['x'] = pd.to_datetime(df['x'], format='%Y-%m-%d')
    df['doy'] = df['x'].dt.dayofyear
    if (len(df)<10) | (len(df.dropna(subset=['y']))) < (0.1 * len(df)):
        return(np.repeat(np.nan, len(nGS)))
    if (h!=1) & (h!=2):
        sys.exit("Invalid h")
    dogs = np.vstack([np.arange(1,366,1),
                      np.array(np.arange(185,366,1).tolist() + np.arange(1,185,1).tolist())])

    if (h==2):
        df['doy'] = df['doy'].apply(lambda x: x + 182 if x < 184 else x - 183)
    xmin = 1

    # Function code here


def PhenAnoma(array, dates, nGS, anop, refp, h=2, rge=[0, 1], plot=False, dask='multiprocessing'):
    if (len(rge)!=2):
        sys.exit("rge must be a vector of maximum 2")
    if (rge[0] > rge[1]):
        sys.exit("rge vector order must be minimum/maximum")
    if (len(dates) != len(array)):
        sys.exit("N of dates and files do not match")
    if np.isnan(array).all():
        return(np.repeat(np.nan, len(dates) - anop[0]))

    ref_min = min(refp)
    ref_max = max(refp)
    ano_min = min(anop)
    ano_max = max(anop)
    ano_len = ano_max-ano_min+1

    if (ref_min >= ref_max) | (ano_min >= ano_max):
        sys.exit("for refp or anop, lower value > upper value")

    df = pd.DataFrame(data={'x':dates, 'y':array})
    df['x'] = pd.to_datetime(df['x'], format='%Y-%m-%d')
    df['doy'] = df['x'].dt.dayofyear
    if (len(df)<10) | (len(df.dropna(subset=['y']))) < (0.1 * len(df)):
        return(np.repeat(np.nan, len(dates) - anop[0]))
    if (h!=1) & (h!=2):
        sys.exit("Invalid h")
    dogs = np.vstack([np.arange(1,366,1),
                      np.array(np.arange(185,366,1).tolist() + np.arange(1,185,1).tolist())])

    if (h==2):
        df['doy'] = df['doy'].apply(lambda x: x + 182 if x < 184 else x - 183)

    df1 = df.loc[ref_min:ref_max]
    df2 = df.loc[ano_min:ano_max]

    xmin = 0
    xmax = 366
    ymin = rge[0]
    ymax = rge[1]
    x, y = np.mgrid[xmin:xmax:365j, ymin:ymax:500j]
    positions = np.vstack([x.ravel(), y.ravel()])
    values = df1[['doy', 'y']].dropna(how='any').values.T


    if dask == 'FFT':
        fftkde = FFTKDE(kernel="gaussian", bw=0.5).fit(values.T)
        fft_res = fftkde.evaluate(positions.T)
        f = np.reshape(fft_res.T, x.shape)
    elif dask == 'multiprocessing':
        kernel = stats.gaussian_kde(values, bw_method='silverman')
        #Choose number of cores and split input array.
        cores = 4
        torun = np.array_split(positions, cores, axis=1)
        pool = Pool(processes=cores)
        results = pool.starmap(calc_kernel, zip(itertools.repeat(kernel), torun))
        f = np.reshape(np.concatenate(results).T, x.shape)
    elif dask == 'dask':
        kernel = stats.gaussian_kde(values, bw_method='silverman')
        f = np.reshape(kernel(positions).T, x.shape)

    eval_points = np.linspace(rge[0], rge[1], 500)
    k1con = f/f.sum(axis=1).reshape((f.shape[0], 1))
    maxy = k1con.max(axis=1)
    maxy = np.array([np.median(eval_points[np.where(k1con[n,:] == maxy[n])]) for n in range(k1con.shape[0])])
    anom = df2['y'].values - maxy[df2['doy'].values.astype(int)-1]
    if plot:
        fig = plt.figure()
        plt.plot(df['x'].iloc[ano_min:ano_max], anom[0:ano_len])
        plt.axhline(0, c="red")
        plt.xlabel('Time')
        plt.ylabel('Anomaly')
        return anom, fig
    else:
        return anom

In [None]:
PhenKplot(stacked[:, 8, 8], df_dates['date'].values, np.arange(1, 366, round(365/48)), ylim=(0, 1), h=1)
plt.show()

In [None]:
PhenKplot(stacked[:, 8, 8], df_dates['date'].values, np.arange(1, 366, round(365/48)), ylim=(0, 1), h=1)
plt.show()

In [None]:
fig = PhenKplot(np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel(), df_dates['date'].values, np.arange(1, 366, round(365/48)), ylim=(0, 1), h=2)
fig.savefig('/content/drive/MyDrive/npphen_def_0000000a.png', dpi=300, )
plt.show()

In [None]:
ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
std_all = np.nanstd(stacked[:ix_anom, :, :])
anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel(),
                        df_dates['date'].values,
                        np.arange(1, 366, round(365/48)),
                        [ix_anom, len(df_dates)],
                        [0, ix_anom], h=1,
                        rge=[0, 1],
                        plot=True,
                        dask='multiprocessing')[0]

In [None]:
test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates.reset_index().iloc[ix_anom: len(df_dates)]['date']})
test_mean2 = test_mean.dropna(subset=['an'])
test_mean2['bool'] = [int(n) for n in (np.abs(test_mean2['an']) > 2 * std_all)] # & an < 0
test_mean2['cumsum'] = test_mean2.groupby(test_mean2['bool'].eq(0).cumsum()).cumcount().tolist()
test_mean2 = test_mean2[test_mean2['an'] < 0]
test_mean2 = test_mean2.reset_index()
fig = plt.figure(figsize=(10, 4))
plt.scatter(test_mean['date'], test_mean['an'])
plt.axhline(0, color='red')
plt.ylabel('Anomalies', fontsize=16)
plt.tick_params(axis='both', labelsize=14)
if(len(test_mean2[test_mean2['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean2['cumsum'] == 5)[0][0]
        plt.axvline(test_mean2.iloc[ix_mean-4]['date'], color='k', ls='--')
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/npphen_def_anom_00000048.png', dpi=300)

In [None]:
def defo_break(array, dates, nGS, ix_anom, h=2, rge=[0,1], plot=False, dask='FFT', ano_number=5):
    """
    Calculate the deforestation break.

    Parameters:
    - array: numpy array
        The input array.
    - dates: pandas DataFrame
        The DataFrame containing the dates.
    - nGS: numpy array
        The array of growing season days.
    - ix_anom: int
        The index of the anomaly.
    - h: int, optional
        The value of h. Default is 2.
    - rge: list, optional
        The range of values for the index. Default is [0, 1].
    - plot: bool, optional
        Whether to plot the results. Default is False.
    - dask: str, optional
        The dask method to use. Default is 'FFT'.
    - ano_number: int, optional
        The number of anomalies. Default is 5.

    Returns:
    - ix: int
        The index of the deforestation break.
    - date: datetime
        The date of the deforestation break.
    """
    anomalies_array = PhenAnoma(array, dates['date'].values, nGS, [ix_anom, len(dates)], [0, ix_anom], h=h, rge=rge, plot=plot, dask=dask)
    std = np.nanstd(array[:ix_anom])
    df = pd.DataFrame(data={'an':anomalies_array,
                            'date':dates.reset_index().iloc[ix_anom: len(dates)]['date'],
                            'ix':dates.reset_index().iloc[ix_anom: len(dates)]['ix']})
    df = df.reset_index()
    df2 = df.copy().dropna(subset=['an'])
    df2['bool'] = [int(n) for n in (np.abs(df2['an']) > 2 * std)]
    df2['cumsum'] = df2.groupby(df2['bool'].eq(0).cumsum()).cumcount().tolist()
    df2 = df2[df2['an'] < 0]

    bks = df2[df2['cumsum'] == ano_number]
    if len(bks) > 0:
        return df.loc[bks.iloc[0].name - 4]['ix'], df.loc[bks.iloc[0].name - 4]['date']
    else:
        return np.nan, pd.to_datetime('1900-01-01')


def defo_breakS2(array, dates, nGS, ix_anom, h=2, rge=[0,1], plot=False, dask='FFT', ano_number=5):
    """
    Calculate the deforestation break for Sentinel-2 data.

    Parameters:
    - array: numpy array
        The input array.
    - dates: pandas DataFrame
        The DataFrame containing the dates.
    - nGS: numpy array
        The array of growing season days.
    - ix_anom: int
        The index of the anomaly.
    - h: int, optional
        The value of h. Default is 2.
    - rge: list, optional
        The range of values for the index. Default is [0, 1].
    - plot: bool, optional
        Whether to plot the results. Default is False.
    - dask: str, optional
        The dask method to use. Default is 'FFT'.
    - ano_number: int, optional
        The number of anomalies. Default is 5.

    Returns:
    - ix: int
        The index of the deforestation break.
    - date: datetime
        The date of the deforestation break.
    """
    anomalies_array = PhenAnoma(array, dates['date'].values, nGS, [ix_anom, len(dates)], [0, len(dates)], h=h, rge=rge, plot=plot, dask=dask)
    iqr = np.nanquantile(array, 0.75) - np.nanquantile(array, 0.25)
    df = pd.DataFrame(data={'an':anomalies_array,
                            'date':dates.reset_index().iloc[ix_anom: len(dates)]['date'],
                            'ix':dates.reset_index().iloc[ix_anom: len(dates)]['ix'],
                            'ndvi':array})

    df = df.dropna(subset=['an'])
    df = df.reset_index()
    df['bool'] = [int(n) for n in (np.abs(df['an']) > 1.25 * iqr)]
    df['cumsum'] = df.groupby(df['bool'].eq(0).cumsum()).cumcount().tolist()
    df['diff'] = df['ndvi'].diff(1)
    df['ma'] = df['diff'].rolling(5).mean()
    df2 = df[(df['cumsum'] >= 3) & (df['ma'] < 0)]
    df2 = df2.reset_index()
    if len(df2) > 0:
        if df2.iloc[0]['an'] > iqr:
            idx_mean = df2['ma'].idxmin()
            idx_mean = df2.loc[idx_mean]['level_0']
            return df.loc[idx_mean]['ix'], df.loc[idx_mean]['date']
        else:
            idx_mean = df2.iloc[0]['level_0']
            return df.loc[idx_mean-4]['ix'], df.loc[idx_mean-4]['date']
    else:
        return np.nan, pd.to_datetime('1900-01-01')

def help():
    """
    Display the help information for the deforestation functions.
    """
    print("defo_break:")
    print(defo_break.__doc__)
    print("\ndefo_breakS2:")
    print(defo_breakS2.__doc__)


In [None]:
import datetime


def get_days(df):
    '''converts breaks to days from 2016-01-01'''
    return lambda x: (df.loc[x]['date']- pd.to_datetime('2016-01-01')).days + 0.0 if x > 0 else np.nan

In [None]:
stacked[stacked == 0] = 0.001
breaks, break_dates = np.apply_along_axis(defo_break,
                                          0,
                                          stacked,
                                          df_dates,
                                          np.arange(1, 366, round(365/48)),
                                          ix_anom,
                                          h=2,
                                          rge=[0, 1],
                                          plot=False,
                                          dask='FFT')

In [None]:
# Reset the index of the DataFrame df_dates and store the result in df_dates_anom
df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')

# Define a function get_days that converts breaks to days from '2016-01-01'
def get_days(df):
    return lambda x: (df.loc[x]['date'] - pd.to_datetime('2016-01-01')).days + 0.0 if x > 0 else np.nan

# Create a vectorized version of the get_days function using np.vectorize
vfunc = np.vectorize(get_days(df_dates_anom))

# Apply the vectorized function to the array breaks and store the result in days
days = vfunc(breaks)

# Sort the unique values in days in ascending order and remove the last value
values = np.sort(np.unique(days))[:-1]

# Create an array of 4 evenly spaced values between the minimum and maximum values in values
values = np.linspace(np.min(values), np.max(values), 4)

# Create a list of dates by adding the values as days to '2016-01-01'
dates = [pd.to_datetime('2016-01-01') + datetime.timedelta(n) for n in values]

In [None]:
coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
longs = coords[:, 0]
lats = coords[:, 1]

In [None]:
fig, ax = plt.subplots()
f1 = ax.imshow(days, vmin=np.nanmin(days), vmax=np.nanmax(days))
cb = fig.colorbar(f1, ticks=values)
cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in dates])
plt.xticks([0, 5, 10, 15, 20, 25], labels=np.round(np.linspace(np.min(longs), np.max(longs), 6), 4))
plt.yticks([0, 5, 10, 15, 20, 25], labels=np.round(np.linspace(np.max(lats), np.min(lats), 6), 4))
plt.tick_params(axis='x', rotation=30)
fig.savefig('/content/drive/MyDrive/npphen_defobreaks_0000048.png', dpi=300)


In [None]:
# Define the labels and ids
labs = ['A', 'B', 'C', 'D', 'E']
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']

# Create a figure with subplots
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))

# Iterate over the ids
for i, n in enumerate(ids):
    # Filter the forested data based on the id
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    
    # Create a buffered geometry
    buffered = fea.geometry().bounds().buffer(300)
    
    # Retrieve the Landsat images for different collections and apply filters
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

    # Merge the Landsat collections and select the 'ndvi' band
    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
    
    # Apply different filters based on the index
    if i == 2:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1))
        landsat_again = landsat1
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 233))
        landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)).map(resample(landsat1.first()))
        landsat_again = landsat1.merge(landsat2)
    
    # Convert the Landsat images to a stack and extract the information
    stack = landsat_again.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan

    # Calculate the deforestation breaks
    breaks, break_dates = np.apply_along_axis(defo_break,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    # Reset the index of the dates dataframe
    df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
    
    # Define a function to calculate the number of days from a specific date
    vfunc = np.vectorize(get_days(df_dates_anom))
    days = vfunc(breaks)

    # Sort and normalize the values
    values = np.sort(np.unique(days))[:-1]
    values = np.linspace(np.min(values), np.max(values), 4)
    dates = [pd.to_datetime('2016-01-01') + datetime.timedelta(n) for n in values]

    # Get the coordinates of the geometry
    coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
    longs = coords[:, 0]
    lats = coords[:, 1]

    # Plot the deforestation breaks
    f1 = ax[i].imshow(days, vmin=np.nanmin(days), vmax=np.nanmax(days), cmap='viridis')
    cb = fig.colorbar(f1, ticks=values, ax=ax[i])
    cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in dates])
    cb.ax.tick_params(labelsize=10)
    ax[i].set_xticks(np.linspace(0, days.shape[1], 4))
    ax[i].set_xticklabels(labels=np.round(np.linspace(np.min(longs), np.max(longs), 4), 4))
    ax[i].set_yticks(np.linspace(0, days.shape[0], 4))
    ax[i].set_yticklabels(labels=np.round(np.linspace(np.max(lats), np.min(lats), 4), 4))
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].tick_params(axis='x', rotation=30)
    
    # Add labels to the subplots
    if (i == 2) | (i == 3):
        ax[i].text(-23, 0, labs[i], fontsize=12, weight='bold')
    else:
        ax[i].text(-25, 0, labs[i], fontsize=12, weight='bold')

# Adjust the layout of the subplots
fig.tight_layout()

# Save the figure
fig.savefig('/content/drive/MyDrive/npphen_undbreaks.png', dpi=300)

In [None]:
import rasterio as rio

In [None]:
labs = ['A', 'B', 'C', 'D', 'E']
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']

for i, n in enumerate(ids[:]):


    fea = forested.filter(ee.Filter.eq('id', n)).first()
    if i == 4:
        fea = final.filter(ee.Filter.eq('ID', 370)).first()
    buffered = fea.geometry().bounds().buffer(300)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry())
    paths = l8.aggregate_histogram('WRS_PATH').getInfo()
    path = [i for i in paths if paths[i]==sorted(paths.values())[-1]][0]
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil8)
    l8 = l8.filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
    # if i == 2:
    #     landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1))
    #     landsat_again = landsat1
    # else:
    #     landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 233))
    #     landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)).map(resample(landsat1.first()))
    #     landsat_again = landsat1.merge(landsat2)
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan

    breaks, break_dates = np.apply_along_axis(defo_break,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
    vfunc = np.vectorize(get_days(df_dates_anom))
    days = vfunc(breaks)
    days = days/365 + 2016

    meta_out = rio.open('/content/drive/MyDrive/testing{}landsat.tif'.format(labs[i])).meta
    with rio.open('/content/drive/MyDrive/{}_{}.tif'.format('defoNpphen', '{}landsat2'.format(labs[i])), "w", **meta_out) as dest:
        dest.write(days.reshape(1, days.shape[0], days.shape[1]))

In [None]:
fea = final.filter(ee.Filter.eq('ID', 370)).first()
buffered = fea.geometry().bounds().buffer(300)
l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry())
paths = l8.aggregate_histogram('WRS_PATH').getInfo()
path = [i for i in paths if paths[i]==sorted(paths.values())[-1]][0]
l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil8)
l8 = l8.filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil8)
l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')

In [None]:
stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
imgs = [n for n in stack['properties']]
dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
df_dates = df_dates.sort_values('date')
ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
reorder_ix = df_dates['ix'].values
reorder_imgs = [imgs[n] for n in reorder_ix]
stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
stacked[stacked <= 0] = np.nan

breaks, break_dates = np.apply_along_axis(defo_break,
                                            0,
                                            stacked,
                                            df_dates,
                                            np.arange(1, 366, round(365/48)),
                                            ix_anom,
                                            h=2,
                                            rge=[0, 1],
                                            plot=False,
                                            dask='FFT')

df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
vfunc = np.vectorize(get_days(df_dates_anom))
days = vfunc(breaks)
days = days/365 + 2016

In [None]:
meta_out = rio.open('/content/drive/MyDrive/testing{}landsat2.tif'.format(labs[i])).meta
with rio.open('/content/drive/MyDrive/{}_{}.tif'.format('defoNpphen', '{}landsat2'.format('E')), "w", **meta_out) as dest:
    dest.write(days.reshape(1, days.shape[0], days.shape[1]))

In [None]:
data = []
fea = final.filter(ee.Filter.eq('ID', 370)).first()
l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry())
paths = l8.aggregate_histogram('WRS_PATH').getInfo()
path = [i for i in paths if paths[i]==sorted(paths.values())[-1]][0]
l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil8)
l8 = l8.filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil8)
l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
imgs2 = [n for n in stack['properties']]
dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
df_dates2 = df_dates2.sort_values('date')
reorder_ix2 = df_dates2['ix'].values
reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
# stacked2[stacked2 == -99] = np.nan
stacked2[stacked2 < 0] = np.nan
# df_dates2['ndvi'] = np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel()
# df_dates2[['date', 'ndvi']].to_csv('/content/drive/MyDrive/non-disturbed/defo_{}.csv'.format(n))
fig = plt.figure(figsize=(12, 4))
plt.scatter(df_dates2['date'], np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel())
plt.title(n)
plt.grid()
plt.show()
ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]
std_all = np.nanstd(stacked2[:ix_anom, :, :])
anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(),
                        df_dates2['date'].values,
                        np.arange(1, 366, round(365/48)),
                        [ix_anom, len(df_dates2)],
                        [0, ix_anom], h=1,
                        rge=[0, 1],
                        plot=True,
                        dask='multiprocessing')[0]
test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
test_mean = test_mean.dropna(subset=['an'])
test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
test_mean = test_mean[test_mean['an'] < 0]
test_mean = test_mean.reset_index()
plt.scatter(test_mean['date'], test_mean['an'])
plt.axhline(0, color='red')
if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
    ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
    plt.axvline(test_mean.iloc[ix_mean-4]['date'])
    plt.show()
    data.append(1)
else:
    plt.show()
    data.append(0)
print(n, data[-1], test_mean.iloc[ix_mean-4]['date'])

In [None]:
import matplotlib.dates as mdates
from datetime import date, timedelta


ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
    if i == 2:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1))
        landsat_again = landsat1
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 233))
        landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)).map(resample(landsat1.first()))
        landsat_again = landsat1.merge(landsat2)
    stack = landsat_again.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan

    breaks, break_dates = np.apply_along_axis(defo_break,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    # df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
    # vfunc = np.vectorize(get_days(df_dates_anom))
    # days = vfunc(breaks)

    # values = np.sort(np.unique(days))[:-1]
    # values = np.linspace(np.min(values), np.max(values), 4)
    # dates = [pd.to_datetime('2016-01-01') + datetime.timedelta(n) for n in values]


    # coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
    # longs = coords[:, 0]
    # lats = coords[:, 1]

    # f1 = ax[i].imshow(days, vmin=np.nanmin(days), vmax=np.nanmax(days), cmap='viridis')
    # cb = fig.colorbar(f1, ticks=values, ax=ax[i])
    # cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in dates])
    # cb.ax.tick_params(labelsize=10)
    break_dates = [n for n in break_dates.ravel() if n > pd.to_datetime('2016-01-01')]
    ax[i].hist(break_dates, 30)
    start, end = ax[i].get_xlim()
    ax[i].set_ylabel('Frequency', fontsize=11)
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    ax[i].set_xticks(pd.date_range(date(1,1,1)+timedelta(days=start), date(1,1,1)+timedelta(days=end), 6))
    ax[i].tick_params(axis='x', rotation=30)
    # ax[i]
    # ax[i].set_xticks(np.linspace(0, days.shape[1], 4))
    # ax[i].set_xticklabels(labels=np.round(np.linspace(np.min(longs), np.max(longs), 4), 4))
    # ax[i].set_yticks(np.linspace(0, days.shape[0], 4))
    # ax[i].set_yticklabels(labels=np.round(np.linspace(np.max(lats), np.min(lats), 4), 4))
    # ax[i].tick_params(axis='both', labelsize=10)
    # ax[i].tick_params(axis='x', rotation=30)
    # if (i == 2) | (i == 3):
    #     ax[i].text(-23, 0, labs[i], fontsize=12, weight='bold')
    # else:
    #     ax[i].text(-25, 0, labs[i], fontsize=12, weight='bold')
    # plt.show()
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/npphen_undhist.png', dpi=300)

In [None]:
labs = ['F', 'G', 'H', 'I', 'J']
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
    if i == 3:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1))
        landsat_again = landsat1
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 233))
        landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)).map(resample(landsat1.first()))
        landsat_again = landsat1.merge(landsat2)
    stack = landsat_again.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan

    breaks, break_dates = np.apply_along_axis(defo_break,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
    vfunc = np.vectorize(get_days(df_dates_anom))
    days = vfunc(breaks)

    values = np.sort(np.unique(days))[:-1]
    values = np.linspace(np.min(values), np.max(values), 4)
    dates = [pd.to_datetime('2016-01-01') + datetime.timedelta(n) for n in values]


    coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
    longs = coords[:, 0]
    lats = coords[:, 1]

    f1 = ax[i].imshow(days, vmin=np.nanmin(days), vmax=np.nanmax(days), cmap='viridis')
    cb = fig.colorbar(f1, ticks=values, ax=ax[i])
    cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in dates])
    cb.ax.tick_params(labelsize=10)
    ax[i].set_xticks(np.linspace(0, days.shape[1], 4))
    ax[i].set_xticklabels(labels=np.round(np.linspace(np.min(longs), np.max(longs), 4), 4))
    ax[i].set_yticks(np.linspace(0, days.shape[0], 4))
    ax[i].set_yticklabels(labels=np.round(np.linspace(np.max(lats), np.min(lats), 4), 4))
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].tick_params(axis='x', rotation=30)
    if (i == 0):
        ax[i].text(-39, 0, labs[i], fontsize=12, weight='bold')
    elif (i == 1):
        ax[i].text(-29, 0, labs[i], fontsize=12, weight='bold')
    elif (i == 2):
        ax[i].text(-26, 0, labs[i], fontsize=12, weight='bold')
    elif (i == 4):
        ax[i].text(-21, 0, labs[i], fontsize=12, weight='bold')
    else:
        ax[i].text(-23, 0, labs[i], fontsize=12, weight='bold')
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/npphen_defobreaks.png', dpi=300)

In [None]:
labs = ['F', 'G', 'H', 'I', 'J']
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
    if i == 3:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1))
        landsat_again = landsat1
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 233))
        landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)).map(resample(landsat1.first()))
        landsat_again = landsat1.merge(landsat2)
    stack = landsat_again.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan

    breaks, break_dates = np.apply_along_axis(defo_break,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
    vfunc = np.vectorize(get_days(df_dates_anom))
    days = vfunc(breaks)
    days = days/365 + 2016

    meta_out = rio.open('/content/drive/MyDrive/testing{}landsat.tif'.format(labs[i])).meta
    with rio.open('/content/drive/MyDrive/{}_{}.tif'.format('defoNpphen', '{}landsat'.format(labs[i])), "w", **meta_out) as dest:
        dest.write(days.reshape(1, days.shape[0], days.shape[1]))

In [None]:
labs = ['F', 'G', 'H', 'I', 'J']
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
    if i == 3:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1))
        landsat_again = landsat1
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 233))
        landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)).map(resample(landsat1.first()))
        landsat_again = landsat1.merge(landsat2)
    stack = landsat_again.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan

    breaks, break_dates = np.apply_along_axis(defo_break,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
    vfunc = np.vectorize(get_days(df_dates_anom))
    days = vfunc(breaks)
    days = days/365 + 2016

    meta_out = rio.open('/content/drive/MyDrive/testing{}landsat.tif'.format(labs[i])).meta
    with rio.open('/content/drive/MyDrive/{}_{}.tif'.format('defoNpphen', '{}landsat'.format(labs[i])), "w", **meta_out) as dest:
        dest.write(days.reshape(1, days.shape[0], days.shape[1]))

In [None]:
import matplotlib.dates as mdates
from datetime import date, timedelta


ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(buffered).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
    if i == 3:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1))
        landsat_again = landsat1
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 233))
        landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)).map(resample(landsat1.first()))
        landsat_again = landsat1.merge(landsat2)
    stack = landsat_again.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan

    breaks, break_dates = np.apply_along_axis(defo_break,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    # df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
    # vfunc = np.vectorize(get_days(df_dates_anom))
    # days = vfunc(breaks)

    # values = np.sort(np.unique(days))[:-1]
    # values = np.linspace(np.min(values), np.max(values), 4)
    # dates = [pd.to_datetime('2016-01-01') + datetime.timedelta(n) for n in values]


    # coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
    # longs = coords[:, 0]
    # lats = coords[:, 1]

    # f1 = ax[i].imshow(days, vmin=np.nanmin(days), vmax=np.nanmax(days), cmap='viridis')
    # cb = fig.colorbar(f1, ticks=values, ax=ax[i])
    # cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in dates])
    # cb.ax.tick_params(labelsize=10)
    break_dates = [n for n in break_dates.ravel() if n > pd.to_datetime('2016-01-01')]
    ax[i].hist(break_dates, 30)
    start, end = ax[i].get_xlim()
    ax[i].set_ylabel('Frequency', fontsize=11)
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    ax[i].set_xticks(pd.date_range(date(1,1,1)+timedelta(days=start), date(1,1,1)+timedelta(days=end), 6))
    ax[i].tick_params(axis='x', rotation=30)
    # ax[i]
    # ax[i].set_xticks(np.linspace(0, days.shape[1], 4))
    # ax[i].set_xticklabels(labels=np.round(np.linspace(np.min(longs), np.max(longs), 4), 4))
    # ax[i].set_yticks(np.linspace(0, days.shape[0], 4))
    # ax[i].set_yticklabels(labels=np.round(np.linspace(np.max(lats), np.min(lats), 4), 4))
    # ax[i].tick_params(axis='both', labelsize=10)
    # ax[i].tick_params(axis='x', rotation=30)
    # if (i == 2) | (i == 3):
    #     ax[i].text(-23, 0, labs[i], fontsize=12, weight='bold')
    # else:
    #     ax[i].text(-25, 0, labs[i], fontsize=12, weight='bold')
    # plt.show()
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/npphen_defohist.png', dpi=300)

In [None]:
labs = ['A', 'B', 'C', 'D', 'E']
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking).select('ndvi')
    # tile = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
    # S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tile[0]))
    stack = S2.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    # ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    breaks, break_dates = np.apply_along_axis(defo_breakS2,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    df_dates_anom = df_dates.reset_index().set_index('ix')
    vfunc = np.vectorize(get_days(df_dates_anom))
    days = vfunc(breaks)

    values = np.sort(np.unique(days))[:-1]
    values = np.linspace(np.min(values), np.max(values), 4)
    dates2 = [pd.to_datetime('2016-01-01') + datetime.timedelta(days=n) for n in values]
    coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
    longs = coords[:, 0]
    lats = coords[:, 1]

    f1 = ax[i].imshow(days, vmin=np.nanmin(days), vmax=np.nanmax(days), cmap='viridis')
    cb = fig.colorbar(f1, ticks=values, ax=ax[i])
    cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in dates2])
    cb.ax.tick_params(labelsize=10)
    ax[i].set_xticks(np.linspace(0, days.shape[1], 4))
    ax[i].set_xticklabels(labels=np.round(np.linspace(np.min(longs), np.max(longs), 4), 4))
    ax[i].set_yticks(np.linspace(0, days.shape[0], 4))
    ax[i].set_yticklabels(labels=np.round(np.linspace(np.max(lats), np.min(lats), 4), 4))
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].tick_params(axis='x', rotation=30)
    if (i == 0):
        ax[i].text(days.shape[1]*-0.8, 0, labs[i], fontsize=12, weight='bold')
    elif (i==4):
        ax[i].text(days.shape[1]*-0.85, 0, labs[i], fontsize=12, weight='bold')
    elif (i==1):
        ax[i].text(days.shape[1]*-0.9, 0, labs[i], fontsize=12, weight='bold')
    elif (i==2):
        ax[i].text(days.shape[1]*-0.7, 0, labs[i], fontsize=12, weight='bold')
    else:
        ax[i].text(days.shape[1]*-0.8, 0, labs[i], fontsize=12, weight='bold')




fig.tight_layout()
fig.savefig('/content/drive/MyDrive/npphen_S2undbreaks.png', dpi=300)

### Estimación para zonas estables

In [None]:
labs = ['A', 'B', 'C', 'D', 'E']
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']

for i, n in enumerate(ids):
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking).select('ndvi')
    # tile = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
    # S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tile[0]))
    stack = S2.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    # ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    breaks, break_dates = np.apply_along_axis(defo_breakS2,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    df_dates_anom = df_dates.reset_index().set_index('ix')
    vfunc = np.vectorize(get_days(df_dates_anom))
    days = vfunc(breaks)
    days = days/365 + 2016

    meta_out = rio.open('/content/drive/MyDrive/testing{}sentinel.tif'.format(labs[i])).meta
    with rio.open('/content/drive/MyDrive/{}_{}.tif'.format('defoNpphen', '{}sentinel'.format(labs[i])), "w", **meta_out) as dest:
        dest.write(days.reshape(1, days.shape[0], days.shape[1]))

In [None]:
for i in range(5):
    fig.axes[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
fig.savefig('/content/drive/MyDrive/npphen_S2undhisto.png', dpi=300)

### Estimación para zonas con cambios

In [None]:
labs = ['F', 'G', 'H', 'I', 'J']
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking).select('ndvi')
    if (i == 3) | (i == 2):
        tile = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
        S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tile[0]))
    # tile = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
    # S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tile[0]))
    stack = S2.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    # ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    breaks, break_dates = np.apply_along_axis(defo_breakS2,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    df_dates_anom = df_dates.reset_index().set_index('ix')
    vfunc = np.vectorize(get_days(df_dates_anom))
    days = vfunc(breaks)
    days = days/365 + 2016
    meta_out = rio.open('/content/drive/MyDrive/testing{}sentinel.tif'.format(labs[i])).meta
    with rio.open('/content/drive/MyDrive/{}_{}.tif'.format('defoNpphen', '{}sentinel'.format(labs[i])), "w", **meta_out) as dest:
        dest.write(days.reshape(1, days.shape[0], days.shape[1]))

In [None]:
labs = ['A', 'B', 'C', 'D', 'E']
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking).select('ndvi')
    if (i == 3) | (i == 2):
        tile = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
        S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tile[0]))
    # tile = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
    # S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tile[0]))
    stack = S2.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    # ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    breaks, break_dates = np.apply_along_axis(defo_breakS2,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    df_dates_anom = df_dates.reset_index().set_index('ix')
    vfunc = np.vectorize(get_days(df_dates_anom))
    days = vfunc(breaks)

    values = np.sort(np.unique(days))[:-1]
    values = np.linspace(np.min(values), np.max(values), 4)
    dates2 = [pd.to_datetime('2016-01-01') + datetime.timedelta(days=n) for n in values]
    coords = np.array(fea.geometry().bounds().buffer(300).coordinates().getInfo())[0, :, :]
    longs = coords[:, 0]
    lats = coords[:, 1]

    f1 = ax[i].imshow(days, vmin=np.nanmin(days), vmax=np.nanmax(days), cmap='viridis')
    cb = fig.colorbar(f1, ticks=values, ax=ax[i])
    cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in dates2])
    cb.ax.tick_params(labelsize=10)
    ax[i].set_xticks(np.linspace(0, days.shape[1], 4))
    ax[i].set_xticklabels(labels=np.round(np.linspace(np.min(longs), np.max(longs), 4), 4))
    ax[i].set_yticks(np.linspace(0, days.shape[0], 4))
    ax[i].set_yticklabels(labels=np.round(np.linspace(np.max(lats), np.min(lats), 4), 4))
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].tick_params(axis='x', rotation=30)
    # if (i == 0):
    #     ax[i].text(days.shape[1]*-0.8, 0, labs[i], fontsize=12, weight='bold')
    # elif (i==4):
    #     ax[i].text(days.shape[1]*-0.85, 0, labs[i], fontsize=12, weight='bold')
    # elif (i==1):
    #     ax[i].text(days.shape[1]*-0.9, 0, labs[i], fontsize=12, weight='bold')
    # elif (i==2):
    #     ax[i].text(days.shape[1]*-0.7, 0, labs[i], fontsize=12, weight='bold')
    # else:
    #     ax[i].text(days.shape[1]*-0.8, 0, labs[i], fontsize=12, weight='bold')




fig.tight_layout()
fig.savefig('/content/drive/MyDrive/npphen_S2defobreaks.png', dpi=300)

In [None]:
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
fig, ax = plt.subplots(5, 1, figsize=(3.6, 10))
for i, n in enumerate(ids):
    print(n)
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    buffered = fea.geometry().bounds().buffer(300)
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking).select('ndvi')
    if (i == 3) | (i == 2):
        tile = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
        S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tile[0]))
    # tile = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
    # S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tile[0]))
    stack = S2.toBands().toFloat().sampleRectangle(fea.geometry().bounds().buffer(300), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    # ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    breaks, break_dates = np.apply_along_axis(defo_breakS2,
                                                0,
                                                stacked,
                                                df_dates,
                                                np.arange(1, 366, round(365/48)),
                                                ix_anom,
                                                h=2,
                                                rge=[0, 1],
                                                plot=False,
                                                dask='FFT')

    break_dates = [n for n in break_dates.ravel() if n > pd.to_datetime('2016-01-01')]
    ax[i].hist(break_dates, 30)
    start, end = ax[i].get_xlim()
    ax[i].set_ylabel('Frequency', fontsize=11)
    ax[i].tick_params(axis='both', labelsize=10)
    ax[i].xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
    ax[i].set_xticks(pd.date_range(date(1,1,1)+timedelta(days=start), date(1,1,1)+timedelta(days=end), 6))
    ax[i].tick_params(axis='x', rotation=30)
fig.tight_layout()
fig.savefig('/content/drive/MyDrive/npphen_S2defohisto.png', dpi=300)

In [None]:
df_dates['ndvi'] = np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel()
df_dates

In [None]:
days

In [None]:
fig = plt.figure()
plt.hist([n for n in break_dates.ravel() if (type(n) is not float) & (n > pd.to_datetime('1990-01-01'))], 30)
plt.ylabel('Frequency', fontsize=16)
plt.tick_params(axis='both', labelsize=14)
# plt.tick_params(axis='x', rotation=30)
# plt.tight_layout()
fig.savefig('/content/drive/MyDrive/hist0000c.png', dpi=300)

In [None]:
break_dates

In [None]:
test_mean = test_mean.dropna(subset=['an'])
test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
test_mean = test_mean[test_mean['an'] < 0]
test_mean = test_mean.reset_index()
plt.scatter(test_mean['date'], test_mean['an'])
plt.axhline(0, color='red')


In [None]:
std = np.nanstd(stacked[:, 8, 8])
std

In [None]:
landsat_testing = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-05-01')

landsat1_test = landsat_testing.filter(ee.Filter.eq('WRS_PATH', 233))
landsat2_test = landsat_testing.filter(ee.Filter.eq('WRS_PATH', 1)).map(resample(landsat1_test.first()))

landsat_test = landsat1_test.merge(landsat2_test)



In [None]:
data = landsat_test.map(sample(fea)).getInfo()
dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
plt.scatter(dates, ndvi)
# plt.xlim(pd.to_datetime('2000-01-01'), pd.to_datetime('2016-01-01'))
plt.grid()

In [None]:
stack_test = landsat_test.toBands().toFloat().sampleRectangle(fea.geometry().buffer(50), defaultValue=-99.).getInfo()

In [None]:
imgs2 = [n for n in stack_test['properties']]
dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
df_dates2 = df_dates2.sort_values('date')
reorder_ix2 = df_dates2['ix'].values
reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
stacked2 = np.stack([stack_test['properties'][n] for n in reorder_imgs2])
stacked2[stacked2 == -99] = np.nan
stacked2[stacked2 < 0] = np.nan
ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]

In [None]:
plt.scatter(df_dates2['date'], stacked2[:, 8, 8])
plt.grid()

In [None]:
stacked2[:, 8, 8]

In [None]:
anom = PhenAnoma(stacked2[:, 8, 8],
                 df_dates2['date'].values,
                 np.arange(1, 366, round(365/48)),
                 [ix_anom, len(df_dates2)],
                 [0, ix_anom], h=1,
                 rge=[0, 1],
                 plot=True,
                 dask='multiprocessing')[0]

In [None]:
# anom = anom[~np.isnan(anom)]
test = pd.DataFrame(data={'an':anom, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
test = test.dropna(subset=['an'])
test['bool'] = [int(n) for n in (np.abs(test['an']) > 2 * std)]
test['cumsum'] = test.groupby(test['bool'].eq(0).cumsum()).cumcount().tolist()
test = test.reset_index()
plt.scatter(test['date'], test['an'])
plt.axhline(0, color='red')
if(len(test[test['cumsum'] == 5]) > 0):
    ix = np.where(test['cumsum'] == 5)[0][0]
    plt.axvline(test.iloc[ix-4]['date'])
else:
    pass


In [None]:
ans = np.apply_along_axis(PhenAnoma,
                          0,
                          stacked2,
                          df_dates2['date'].values,
                          np.arange(1, 366, round(365/48)),
                          [ix_anom, len(df_dates2)],
                          [0, ix_anom],
                          h=2,
                          rge=[0, 1],
                          plot=False,
                          dask='FFT')

In [None]:
# plt.scatter(df_dates2.reset_index().iloc[973: len(df_dates2)]['date'], ans[:, 8, 8])
test = pd.DataFrame(data={'an':ans[:, 8, 8], 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
test = test.dropna(subset=['an'])
test['bool'] = [int(n) for n in (np.abs(test['an']) > 2 * std)]
test['cumsum'] = test.groupby(test['bool'].eq(0).cumsum()).cumcount().tolist()
test = test.reset_index()
plt.scatter(test['date'], test['an'])
plt.axhline(0, color='red')
if(len(test[test['cumsum'] == 5]) > 0):
    ix = np.where(test['cumsum'] == 5)[0][0]
    plt.axvline(test.iloc[ix-4]['date'])
else:
    pass

In [None]:
df_dates2['date'].tolist().index(test.iloc[ix]['date']) - ix_anom

In [None]:
plt.imshow(ans[300, :, :])
plt.colorbar()

In [None]:
 test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
    test_mean = test_mean.dropna(subset=['an'])
    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]

In [None]:
breaks, break_dates = np.apply_along_axis(defo_break,
                                          0,
                                          stacked2,
                                          df_dates2,
                                          np.arange(1, 366, round(365/48)),
                                          ix_anom,
                                          h=2,
                                          rge=[0, 1],
                                          plot=False,
                                          dask='FFT')

In [None]:
plt.imshow(breaks.astype(float))
plt.colorbar()

In [None]:
plt.hist(break_dates[break_dates > pd.to_datetime('1900-01-01')].ravel(), bins=20)
plt.tick_params(axis='x', rotation=45)
plt.ylabel('frequency (pixels)')
plt.show()

In [None]:
df_dates_anom = df_dates2.reset_index().iloc[ix_anom:].reset_index()
vfunc = np.vectorize(get_days(df_dates_anom))
days = vfunc(breaks)

values = np.sort(np.unique(days))[:-1]
values = np.linspace(np.min(values), np.max(values), 4)
dates = [pd.to_datetime('2016-01-01') + datetime.timedelta(n) for n in values]

In [None]:
fig, ax = plt.subplots()
f1 = ax.imshow(days, vmin=np.nanmin(days), vmax=np.nanmax(days))
cb = fig.colorbar(f1, ticks=values)
cb.ax.set_yticklabels([n.strftime('%Y %m %d') for n in dates])
fig.show()



In [None]:
init = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2019-01-01'].index[0]
end = df_dates2.reset_index()[df_dates2.reset_index()['date'] < '2020-01-01'].index[-1]
series = np.apply_over_axes(np.nanmean, stacked2[init:end, :, :], (1 ,2))
sub_dates = df_dates2.reset_index().iloc[init:end]
sub_dates['doy'] = sub_dates['date'].dt.dayofyear
PhenKplot(np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel(), df_dates['date'].values, np.arange(1, 366, round(365/48)), ylim=(0, 1), h=1)
plt.scatter(sub_dates['doy'], series, marker='x', color='red')
plt.show()

## Testing non-disturbed forests

### Select feature and create landsat ImageCollection

In [None]:
fea_non = forested.filter(ee.Filter.eq('id', ids_deforested[0])).first()

l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea_non.geometry()).map(mask).map(ndvil8)
l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea_non.geometry()).map(mask).map(ndvil8)
l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea_non.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea_non.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')

landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-05-01')

### How many tiles?


In [None]:
paths = landsat.aggregate_array('WRS_PATH').distinct().getInfo()
paths

### Let's map the selected feature and aggregated series for 2016 and 2018

In [None]:
geo_non = ee.Feature(fea_non)
id = geo_non.getMapId()
l8_2016 = l8.filterDate('2016-01-01', '2017-01-01').mean().getMapId({'bands':['SR_B5', 'SR_B4', 'SR_B3'], 'min': 0, 'max': 0.3})
l8_2021 = l9.filterDate('2021-01-01', '2022-01-01').mean().getMapId({'bands':['SR_B5', 'SR_B4', 'SR_B3'], 'min': 0, 'max': 0.3})
centroid = geo_non.centroid().getInfo()['geometry']['coordinates'][::-1]
map = folium.Map(location=centroid, zoom_start=16)
folium.TileLayer(
    tiles=l8_2016['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='s2_2018',
  ).add_to(map)
folium.TileLayer(
    tiles=l8_2021['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='s2_2021',
  ).add_to(map)
folium.TileLayer(
    tiles=id['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='feature',
  ).add_to(map)
map.add_child(folium.LayerControl())
map

## Plotting mean of sampled pixel series

In [None]:
data = landsat.map(sample(fea_non)).getInfo()
dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
plt.scatter(dates, ndvi)
# plt.xlim(pd.to_datetime('2000-01-01'), pd.to_datetime('2016-01-01'))
plt.grid()

### Creating a stacked numpy array and parameters for further analysis (breaking points from 2016-01-01

In [None]:
stack_non = landsat.toBands().toFloat().sampleRectangle(fea_non.geometry().buffer(50), defaultValue=-99.).getInfo()

In [None]:
imgs_non = [n for n in stack_non['properties']]
dates_imgs_non = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs_non]
df_dates_non = pd.DataFrame(data={'date':dates_imgs_non, 'ix':range(len(dates_imgs_non))})
df_dates_non = df_dates_non.sort_values('date')
reorder_ix_non = df_dates_non['ix'].values
reorder_imgs_non = [imgs_non[n] for n in reorder_ix_non]
stacked_non = np.stack([stack_non['properties'][n] for n in reorder_imgs_non])
stacked_non[stacked_non == -99] = np.nan
stacked_non[stacked_non < 0] = np.nan
ix_anom_non = df_dates_non.reset_index()[df_dates_non.reset_index()['date'] > '2016-01-01'].index[0]

# Anomalies single pixel

In [None]:
std_non = np.nanstd(stacked_non[:, 5, 5])
anom_non = PhenAnoma(stacked_non[:, 5, 5],
                     df_dates_non['date'].values,
                     np.arange(1, 366, round(365/48)),
                     [ix_anom_non, len(df_dates_non)],
                     [0, ix_anom_non], h=1,
                     rge=[0, 1],
                     plot=True,
                     dask='multiprocessing')[0]

## Any breaking point?

In [None]:
test_non = pd.DataFrame(data={'an':anom_non, 'date':df_dates_non.reset_index().iloc[ix_anom_non: len(df_dates_non)]['date']})
test_non = test_non.dropna(subset=['an'])
test_non['bool'] = [int(n) for n in (np.abs(test_non['an']) > 2 * std_non)]
test_non['cumsum'] = test_non.groupby(test_non['bool'].eq(0).cumsum()).cumcount().tolist()
test_non = test_non.reset_index()
plt.scatter(test_non['date'], test_non['an'])
plt.axhline(0, color='red')
if(len(test_non[test_non['cumsum'] == 5]) > 0):
    ix_non = np.where(test_non['cumsum'] == 5)[0][0]
    plt.axvline(test_non.iloc[ix_non-4]['date'])
else:
    pass

# Anomalies for aggregated series

In [None]:
std_all_non = np.nanstd(stacked_non)
anom_mean_non = PhenAnoma(np.apply_over_axes(np.nanmean, stacked_non, (1, 2)).ravel(),
                          df_dates_non['date'].values,
                          np.arange(1, 366, round(365/48)),
                          [ix_anom_non, len(df_dates_non)],
                          [0, ix_anom_non], h=1,
                          rge=[0, 1],
                          plot=True,
                          dask='multiprocessing')[0]

## Any breaking point?

In [None]:
test_mean_non = pd.DataFrame(data={'an':anom_mean_non, 'date':df_dates_non.reset_index().iloc[ix_anom_non: len(df_dates_non)]['date']})
test_mean_non = test_mean_non.dropna(subset=['an'])
test_mean_non['bool'] = [int(n) for n in (np.abs(test_mean_non['an']) > 2 * std_all_non)]
test_mean_non['cumsum'] = test_mean_non.groupby(test_mean_non['bool'].eq(0).cumsum()).cumcount().tolist()
test_mean_non = test_mean_non.reset_index()
plt.scatter(test_mean_non['date'], test_mean_non['an'])
plt.axhline(0, color='red')
if(len(test_mean_non[test_mean_non['cumsum'] == 5]) > 0):
    ix_mean_non = np.where(test_mean_non['cumsum'] == 5)[0][0]
    plt.axvline(test_mean_non.iloc[ix_mean_non-4]['date'])
else:
    pass

## Array of structural breaks

In [None]:
breaks_non, break_dates_non = np.apply_along_axis(defo_break,
                                                  0,
                                                  stacked_non,
                                                  df_dates_non,
                                                  np.arange(1, 366, round(365/48)),
                                                  ix_anom_non,
                                                  h=2,
                                                  rge=[0, 1],
                                                  plot=False,
                                                  dask='FFT')

In [None]:
plt.imshow(breaks_non.astype(float))
plt.colorbar()

## Histogram of breaking dates

In [None]:
plt.hist(break_dates_non[break_dates_non > pd.to_datetime('1900-01-01')].ravel(), bins=20)
plt.tick_params(axis='x', rotation=45)
plt.ylabel('frequency (pixels)')
plt.show()

## Array of structural breaks and dates associated

In [None]:
df_dates_anom_non = df_dates_non.reset_index().iloc[ix_anom_non:].reset_index()
vfunc_non = np.vectorize(get_days(df_dates_anom_non))
days_non = vfunc(breaks_non)

values_non = np.sort(np.unique(days_non))[:-1]
values_non = np.linspace(np.min(values_non), np.max(values_non), 4)
dates_non = [pd.to_datetime('2016-01-01') + datetime.timedelta(n) for n in values_non]

fig_non, ax_non = plt.subplots()
f1_non = ax_non.imshow(days_non, vmin=np.nanmin(days_non), vmax=np.nanmax(days_non))
cb_non = fig_non.colorbar(f1_non, ticks=values_non)
cb_non.ax.set_yticklabels([n.strftime('%Y %m %d') for n in dates_non])
fig_non.show()


In [None]:
init = df_dates_non.reset_index()[df_dates_non.reset_index()['date'] > '2021-01-01'].index[0]
end = df_dates_non.reset_index()[df_dates_non.reset_index()['date'] < '2022-01-01'].index[-1]
series = np.apply_over_axes(np.nanmean, stacked_non[init:end, :, :], (1 ,2))
sub_dates = df_dates_non.reset_index().iloc[init:end]
sub_dates['doy'] = sub_dates['date'].dt.dayofyear
PhenKplot(np.apply_over_axes(np.nanmean, stacked_non, (1, 2)).ravel(), df_dates_non['date'].values, np.arange(1, 366, round(365/48)), ylim=(0, 1), h=1)
plt.scatter(sub_dates['doy'], series, marker='x', color='red')
plt.show()

# Running for all polygons

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## non-disturbance

In [None]:
ids, deforested = [], []
for n in ids_forested[:]:
    print(n)
    ids.append(n)
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-05-01')
    paths = np.sort(landsat.aggregate_array('WRS_PATH').distinct().getInfo())
    if len(paths) == 1:
        landsat = landsat
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', int(paths[0])))
        for z in paths[1:]:
            landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', int(z))).map(resample(landsat1.first()))
            landsat1 = landsat1.merge(landsat2)
        landsat = landsat1
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan
    # df_dates2['ndvi'] = np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel()
    # df_dates2[['date', 'ndvi']].to_csv('/content/drive/MyDrive/non-disturbed/defo_{}.csv'.format(n))
    fig = plt.figure(figsize=(12, 4))
    plt.scatter(df_dates2['date'], np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel())
    plt.title(n)
    plt.grid()
    plt.show()
    ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked2[:ix_anom, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(),
                            df_dates2['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates2)],
                            [0, ix_anom], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
    test_mean = test_mean.dropna(subset=['an'])
    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
        plt.axvline(test_mean.iloc[ix_mean-4]['date'])
        plt.show()
        deforested.append(1)
    else:
        plt.show()
        deforested.append(0)
    print(n, deforested[-1])





In [None]:
def sample_fea(fea):
    def inner(img):
        sampled = ee.FeatureCollection(ee.Image(-999).where(img.gte(-1), img).sample(fea.geometry(), 30)).first()
        return ee.Feature(sampled).set('date', ee.Date(img.get('system:time_start')).format('YYYY-MM-dd'))
    return inner


denuncias_subset = ee.FeatureCollection('users/ignaciofuentessanroman/defo_points') #bosques_subset)
denuncias_subset = denuncias_subset.filter(ee.Filter.neq('Latitud', -33.1199321483453))
ids = [1753, 1745, 1366, 1808, 1357, 2288,
       101, 1730, 1722, 2180, 1613, 2322,
       1750, 2250, 1607, 1449, 1814, 1823,
       1027, 1515, 1360, 1220, 2183]

In [None]:
for n in ids[11:]:
    print(n)
    try:
        fea = denuncias_subset.filter(ee.Filter.eq('Field_1', n)).first()
        fecha = ee.Date(fea.get('FechaIngre')).format('YYYY-MM-dd').getInfo()
        l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
        l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
        l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
        l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

        landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')

        data = landsat.map(sample(fea)).getInfo()
        dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
        ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
        df_dates = pd.DataFrame(data={'date':dates, 'ix':range(len(dates)), 'ndvi':ndvi})
        df_dates = df_dates.sort_values('date')
        ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
        std_all =df_dates.iloc[:ix_anom]['ndvi'].std()
        anom_mean = PhenAnoma(df_dates['ndvi'].values,
                                df_dates['date'].values,
                                np.arange(1, 366, round(365/48)),
                                [ix_anom, len(df_dates)],
                                [0, ix_anom], h=1,
                                rge=[0, 1],
                                plot=True,
                                dask='multiprocessing')[0]
        test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates.reset_index().iloc[ix_anom: len(df_dates)]['date']})
        test_mean = test_mean.dropna(subset=['an'])
        test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
        test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
        test_mean = test_mean[test_mean['an'] < 0]
        test_mean = test_mean.reset_index()
        plt.scatter(df_dates['date'], df_dates['ndvi'])
        if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
            ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
            plt.axvline(test_mean.iloc[ix_mean-4]['date'])
            plt.show()
        else:
            plt.show()
        print(n, test_mean.iloc[ix_mean-4]['date'])
    except:
        fea = denuncias_subset.filter(ee.Filter.eq('Field_1', n)).first()
        fecha = ee.Date(fea.get('FechaIngre')).format('YYYY-MM-dd').getInfo()
        l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
        l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
        l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
        l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
        landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
        tiles = landsat.aggregate_array('WRS_PATH').distinct().getInfo()
        landsat = landsat.filter(ee.Filter.eq('WRS_PATH', tiles[0]))
        data = landsat.map(sample(fea)).getInfo()
        dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
        ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
        df_dates = pd.DataFrame(data={'date':dates, 'ix':range(len(dates)), 'ndvi':ndvi})
        df_dates = df_dates.sort_values('date')
        ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
        std_all =df_dates.iloc[:ix_anom]['ndvi'].std()
        anom_mean = PhenAnoma(df_dates['ndvi'].values,
                                df_dates['date'].values,
                                np.arange(1, 366, round(365/48)),
                                [ix_anom, len(df_dates)],
                                [0, ix_anom], h=1,
                                rge=[0, 1],
                                plot=True,
                                dask='multiprocessing')[0]
        test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates.reset_index().iloc[ix_anom: len(df_dates)]['date']})
        test_mean = test_mean.dropna(subset=['an'])
        test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
        test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
        test_mean = test_mean[test_mean['an'] < 0]
        test_mean = test_mean.reset_index()
        plt.scatter(df_dates['date'], df_dates['ndvi'])
        if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
            ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
            plt.axvline(test_mean.iloc[ix_mean-4]['date'])
            plt.show()
            print(n, test_mean.iloc[ix_mean-4]['date'])
        else:
            plt.show()


In [None]:
for n in ids[16:]:
    print(n)
    fea = denuncias_subset.filter(ee.Filter.eq('Field_1', n)).first()
    fecha = ee.Date(fea.get('FechaIngre')).format('YYYY-MM-dd').getInfo()
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
    landsat = landsat.filter(ee.Filter.eq('WRS_PATH', 233))
    data = landsat.map(sample(fea)).getInfo()
    dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
    df_dates = pd.DataFrame(data={'date':dates, 'ix':range(len(dates)), 'ndvi':ndvi})
    df_dates = df_dates.sort_values('date')
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    std_all =df_dates.iloc[:ix_anom]['ndvi'].std()
    anom_mean = PhenAnoma(df_dates['ndvi'].values,
                            df_dates['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates)],
                            [0, ix_anom], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates.reset_index().iloc[ix_anom: len(df_dates)]['date']})
    test_mean = test_mean.dropna(subset=['an'])
    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(df_dates['date'], df_dates['ndvi'])
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
        plt.axvline(test_mean.iloc[ix_mean-4]['date'])
        plt.show()
        print(n, test_mean.iloc[ix_mean-4]['date'])
        del ix_mean
    else:
        plt.show()



In [None]:
ids[13:]

In [None]:
ids[7:]

### How many of each one?

In [None]:
np.histogram(deforested)[0] / len(deforested) * 100

### Using landtrenR as well

In [None]:
ids, deforested = [], []
for n in ids_forested[96:]:
    print(n)
    ids.append(n)
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-05-01')
    paths = np.sort(landsat.aggregate_array('WRS_PATH').distinct().getInfo())
    if len(paths) == 1:
        landsat = landsat
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', int(paths[0])))
        for z in paths[1:]:
            landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', int(z))).map(resample(landsat1.first()))
            landsat1 = landsat1.merge(landsat2)
        landsat = landsat1
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan
    fig = plt.figure(figsize=(12, 4))
    plt.scatter(df_dates2['date'], np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel())
    plt.title(n)
    plt.grid()
    plt.show()
    ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked2[:ix_anom, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(),
                            df_dates2['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates2)],
                            [0, ix_anom], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
    test_mean = test_mean.dropna(subset=['an'])
    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
        plt.axvline(test_mean.iloc[ix_mean-4]['date'])
        plt.show()
        deforested.append(1)
    else:
        plt.show()
        deforested.append(0)
    print(n, deforested[-1])

In [None]:
ids, deforested = [], []
for n in ids_forested[96:]:
    print(n)
    ids.append(n)
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-05-01')
    paths = np.sort(landsat.aggregate_array('WRS_PATH').distinct().getInfo())
    if len(paths) == 1:
        landsat = landsat
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', int(paths[0])))
        for z in paths[1:]:
            landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', int(z))).map(resample(landsat1.first()))
            landsat1 = landsat1.merge(landsat2)
        landsat = landsat1
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan
    fig = plt.figure(figsize=(12, 4))
    plt.scatter(df_dates2['date'], np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel())
    plt.title(n)
    plt.grid()
    plt.show()
    ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked2[:ix_anom, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(),
                            df_dates2['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates2)],
                            [0, ix_anom], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
    test_mean = test_mean.dropna(subset=['an'])
    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
        plt.axvline(test_mean.iloc[ix_mean-4]['date'])
        plt.show()
        deforested.append(1)
    else:
        plt.show()
        deforested.append(0)
    print(n, deforested[-1])

In [None]:
103 -2

## Deforested

In [None]:
ids2, deforested2 = [], []
for i, n in enumerate(ids_deforested[:]):
    print(n)
    # if n == '0000000000000000003d':
    #     pass
    # else:
    ids2.append(n)
    # n = '00000000000000000000'
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-05-01')
    if i >= 60:
        paths = landsat.aggregate_array('WRS_PATH').distinct().getInfo()
    else:
        paths = np.sort(landsat.aggregate_array('WRS_PATH').distinct().getInfo())
    if len(paths) == 1:
        landsat = landsat
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', int(paths[0])))
        for z in paths[1:]:
            landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', int(z))).map(resample(landsat1.first()))
            landsat1 = landsat1.merge(landsat2)
        landsat = landsat1
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan
    # df_dates2['ndvi'] = np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel()
    # df_dates2[['date', 'ndvi']].to_csv('/content/drive/MyDrive/disturbed/defo_{}.csv'.format(n))
    fig = plt.figure(figsize=(12, 4))
    plt.scatter(df_dates2['date'], np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel())
    plt.title(n)
    plt.grid()
    plt.show()
    ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked2[:ix_anom, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(),
                            df_dates2['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates2)],
                            [0, ix_anom], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
    test_mean = test_mean.dropna(subset=['an'])

    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
        plt.axvline(test_mean.iloc[ix_mean-4]['date'])
        plt.show()
        deforested2.append(1)
    else:
        plt.show()
        deforested2.append(0)
    print(n, deforested2[-1])

In [None]:
ids2, deforested2 = [], []
for i, n in enumerate(ids_deforested[80:]):
    print(n)
    # if n == '0000000000000000003d':
    #     pass
    # else:
    ids2.append(n)
    # n = '00000000000000000000'
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-07-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-07-01')
    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-07-01')
    if i >= 60:
        paths = landsat.aggregate_array('WRS_PATH').distinct().getInfo()
    else:
        paths = np.sort(landsat.aggregate_array('WRS_PATH').distinct().getInfo())
    if len(paths) == 1:
        landsat = landsat
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', int(paths[0])))
        for z in paths[1:]:
            landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', int(z))).map(resample(landsat1.first()))
            landsat1 = landsat1.merge(landsat2)
        landsat = landsat1
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan
    fig = plt.figure(figsize=(12, 4))
    plt.scatter(df_dates2['date'], np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel())
    plt.title(n)
    plt.grid()
    plt.show()
    ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked2[:ix_anom, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(),
                            df_dates2['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates2)],
                            [0, ix_anom], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
    test_mean = test_mean.dropna(subset=['an'])

    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
        plt.axvline(test_mean.iloc[ix_mean-4]['date'])
        plt.show()
        deforested2.append(1)
    else:
        plt.show()
        deforested2.append(0)
    print(n, deforested2[-1])

In [None]:
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
for i, n in enumerate(ids):
    print(n)
    # if n == '0000000000000000003d':
    #     pass
    # else:
    # ids2.append(n)
    # n = '00000000000000000000'
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-07-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-07-01')
    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-07-01')
    if i >= 60:
        paths = landsat.aggregate_array('WRS_PATH').distinct().getInfo()
    else:
        paths = np.sort(landsat.aggregate_array('WRS_PATH').distinct().getInfo())
    if len(paths) == 1:
        landsat = landsat
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', int(paths[0])))
        for z in paths[1:]:
            landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', int(z))).map(resample(landsat1.first()))
            landsat1 = landsat1.merge(landsat2)
        landsat = landsat1
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan
    fig = plt.figure(figsize=(12, 4))
    plt.scatter(df_dates2['date'], np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel())
    plt.title(n)
    plt.grid()
    plt.show()
    ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked2[:ix_anom, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(),
                            df_dates2['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates2)],
                            [0, ix_anom], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
    test_mean = test_mean.dropna(subset=['an'])

    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
        plt.axvline(test_mean.iloc[ix_mean-4]['date'])
        plt.show()
        # deforested2.append(1)
        print(test_mean.iloc[ix_mean-4]['date'])
    else:
        plt.show()
        # deforested2.append(0)
    print(n)

In [None]:

for i, n in enumerate(ids):
    print(n)
    # if n == '0000000000000000003d':
    #     pass
    # else:
    # ids2.append(n)
    # n = '00000000000000000000'
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-07-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-07-01')
    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-07-01')
    if i >= 60:
        paths = landsat.aggregate_array('WRS_PATH').distinct().getInfo()
    else:
        paths = np.sort(landsat.aggregate_array('WRS_PATH').distinct().getInfo())
    if len(paths) == 1:
        landsat = landsat
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', int(paths[0])))
        for z in paths[1:]:
            landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', int(z))).map(resample(landsat1.first()))
            landsat1 = landsat1.merge(landsat2)
        landsat = landsat1
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan
    fig = plt.figure(figsize=(12, 4))
    plt.scatter(df_dates2['date'], np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel())
    plt.title(n)
    plt.grid()
    plt.show()
    ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked2[:ix_anom, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(),
                            df_dates2['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates2)],
                            [0, ix_anom], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
    test_mean = test_mean.dropna(subset=['an'])

    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
        plt.axvline(test_mean.iloc[ix_mean-4]['date'])
        plt.show()
        # deforested2.append(1)
        print(test_mean.iloc[ix_mean-4]['date'])
    else:
        plt.show()
        # deforested2.append(0)
    print(n)

### Scores

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


pred = np.hstack([np.repeat(0, 67), np.repeat(1, 33), np.repeat(1, 78), np.repeat(0, 22)])
obs = np.hstack([np.repeat(0, 100), np.repeat(1, 100)])

In [None]:
precision_score(obs, pred)

In [None]:
recall_score(obs, pred)

In [None]:
f1_score(obs, pred)

In [None]:
accuracy_score(obs, pred)

## Runing for V- VII regions

In [None]:
boundary = ee.FeatureCollection('users/ignisfausto/regions5_7')

In [None]:
coords = boundary.geometry().coordinates().getInfo()

In [None]:
xmin = np.min(np.array(coords[0])[:, 0])
xmax = np.max(np.array(coords[0])[:, 0])
ymin = np.min(np.array(coords[0])[:, 1])
ymax = np.max(np.array(coords[0])[:, 1])
xmin, xmax, ymin, ymax

In [None]:
def generateGrid(xmin, ymin, xmax, ymax, dx, dy, marginx, marginy):
  xx = ee.List.sequence(xmin, ee.Number(xmax).subtract(ee.Number(dx).multiply(0.9)), dx)
  yy = ee.List.sequence(ymin, ee.Number(ymax).subtract(ee.Number(dy).multiply(0.9)), dy)

  def innerX(x):
    def innerY(y):
      x1 = ee.Number(x).subtract(marginx)
      x2 = ee.Number(x).add(ee.Number(dx)).add(marginx)
      y1 = ee.Number(y).subtract(marginy)
      y2 = ee.Number(y).add(ee.Number(dy)).add(marginy)
      coords = ee.List([x1, y1, x2, y2])
      rect = ee.Algorithms.GeometryConstructors.Rectangle(coords, 'EPSG:4326', False)
      return ee.Feature(rect)
    return yy.map(innerY)
  cells = xx.map(innerX).flatten()

  return ee.FeatureCollection(cells)


def parse_id(fea):
  return fea.set('id', ee.Number.parse(fea.id()))

In [None]:
grid = generateGrid(xmin, ymin, xmax, ymax, 0.06, 0.06, 0, 0)
grid = grid.map(parse_id)
grid = grid.filterBounds(boundary)

In [None]:
mapid = ee.FeatureCollection(grid).getMapId({'opacity':0.1})
centroid = boundary.geometry().centroid().coordinates().getInfo()[::-1]
map = folium.Map(location=centroid, zoom_start=7)
folium.TileLayer(
    tiles = 'https://server.arcgisonline.com/ArcGIS/rest/services/World_Imagery/MapServer/tile/{z}/{y}/{x}',
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='satellite',
  ).add_to(map)
folium.TileLayer(
    tiles=mapid['tile_fetcher'].url_format,
    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
    overlay=True,
    name='grid',
  ).add_to(map)
map.add_child(folium.LayerControl())
map

In [None]:
grid.size().getInfo()

In [None]:
vectors = grid.getInfo()

In [None]:
l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(grid.first().geometry()).map(mask).map(ndvil8)
l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(grid.first().geometry()).map(mask).map(ndvil8)
l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(grid.first().geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(grid.first().geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')

landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')

In [None]:
paths = landsat.aggregate_array('WRS_PATH').distinct().getInfo()
paths

In [None]:
landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', paths[0]))
# landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 233)).map(resample(landsat1.first()))

landsat_again = landsat1

In [None]:
stack = landsat_again.toBands().toFloat().clip(grid.first()).sampleRectangle(grid.first().geometry(), defaultValue=-99.).getInfo()

In [None]:
imgs = [n for n in stack['properties']]
dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
df_dates = df_dates.sort_values('date')
ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
reorder_ix = df_dates['ix'].values
reorder_imgs = [imgs[n] for n in reorder_ix]
stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
stacked[stacked <= 0] = np.nan

In [None]:
stacked.shape

In [None]:
breaks, break_dates = np.apply_along_axis(defo_break,
                                          0,
                                          stacked,
                                          df_dates,
                                          np.arange(1, 366, round(365/48)),
                                          ix_anom,
                                          h=2,
                                          rge=[0, 1],
                                          plot=False,
                                          dask='FFT')

In [None]:
breaks

In [None]:
df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
vfunc = np.vectorize(get_days(df_dates_anom))
days = vfunc(breaks)

In [None]:
np.save('/content/drive/MyDrive/npphen/breaks_{}.npy'.format(grid.first().get('id').getInfo()) ,days)

In [None]:
grid.first().get('id').getInfo()

In [None]:
for n in vectors['features'][1:]:
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(ee.Geometry(n['geometry'])).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(ee.Geometry(n['geometry'])).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(ee.Geometry(n['geometry'])).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(ee.Geometry(n['geometry'])).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')

    landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1))
    landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 233)).map(resample(landsat1.first()))

    landsat_again = landsat1.merge(landsat2)
    stack = landsat_again.toBands().toFloat().clip(ee.Geometry(n['geometry'])).sampleRectangle(ee.Geometry(n['geometry']), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan
    breaks, break_dates = np.apply_along_axis(defo_break,
                                          0,
                                          stacked,
                                          df_dates,
                                          np.arange(1, 366, round(365/48)),
                                          ix_anom,
                                          h=2,
                                          rge=[0, 1],
                                          plot=False,
                                          dask='FFT')
    df_dates_anom = df_dates.reset_index().iloc[ix_anom:].set_index('ix')
    vfunc = np.vectorize(get_days(df_dates_anom))
    days = vfunc(breaks)
    np.save('/content/drive/MyDrive/npphen/breaks_{}.npy'.format(n['id']) ,days)


In [None]:
vectors['features'][0]

In [None]:
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']

In [None]:
# ids, deforested = [], []
fig, axs = plt.subplots(5, 1)
for i,n in enumerate(ids[:2]):
    print(n)
    # ids.append(n)
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-05-01')
    paths = np.sort(landsat.aggregate_array('WRS_PATH').distinct().getInfo())
    if len(paths) == 1:
        landsat = landsat
    else:
        landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', int(paths[0])))
        for z in paths[1:]:
            landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', int(z))).map(resample(landsat1.first()))
            landsat1 = landsat1.merge(landsat2)
        landsat = landsat1
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan

    axs[i] = PhenKplot(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(), df_dates2['date'].values, np.arange(1, 366, round(365/48)), ylim=(0.3, 1), h=2)
    # plt.show()
    # fig.savefig('/content/drive/MyDrive/npphen_def_0000000a.png', dpi=300, )
fig.show()

In [None]:
labs = ['A', 'B', 'C', 'D', 'E']
fig, axes = plt.subplots(5, 1, sharex=True, figsize=(4,10))
for i,n in enumerate(ids[:]):
    print(n)
    # ids.append(n)
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    if i == 4:
        fea = final.filter(ee.Filter.eq('ID', 370)).first()

    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry())
    paths = l8.aggregate_histogram('WRS_PATH').getInfo()
    path = [i for i in paths if paths[i]==sorted(paths.values())[-1]][0]
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil8)
    l8 = l8.filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-06-01')

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2022-06-01')
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan

    x, y, cumdensity, maxy = PhenKplot(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(), df_dates2['date'].values, np.arange(1, 366, round(365/48)), ylim=(0.3, 1), h=1)
    axes[i].set_xlim(1, 365)
    axes[i].set_ylim(0.3, 1)
    cfset = axes[i].contourf(x, y, cumdensity, cmap='terrain')
    cset = axes[i].contour(x, y, cumdensity, levels=[0, 0.5, 0.75, 0.9, 0.95], colors='k')
    cline = axes[i].plot(np.arange(1,366), maxy, linewidth=1.5, color='r')
    axes[i].clabel(cset, inline=1, fontsize=10)
    if i == 4:
        axes[i].set_xlabel('Day of the year', fontsize=14)
    axes[i].set_ylabel('NDVI', fontsize=14)
    axes[i].tick_params(axis='both', labelsize=12)
    axes[i].text(-90, 1.05, labs[i], fontsize=16, weight='bold')
fig.savefig('/content/drive/MyDrive/npphen_und2.png', dpi=300, bbox_inches='tight')
fig.show()



In [None]:
ids = ['0000000000000000000c', '0000000000000000000a', '00000000000000000048', '00000000000000000029', '00000000000000000033']
labs = ['F', 'G', 'H', 'I', 'J']
fig, axes = plt.subplots(5, 1, sharex=True, figsize=(4,10))
for i,n in enumerate(ids[:]):
    print(n)
    # ids.append(n)
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).map(mask).map(ndvil57).filterDate('2000-01-01', '2022-05-01')
    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2016-01-01')
    paths = np.sort(landsat.aggregate_array('WRS_PATH').distinct().getInfo())
    if len(paths) == 1:
        landsat = landsat
    else:
        if i != 3:
            landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 233))
            landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 1)).map(resample(landsat1.first()))
            landsat_again = landsat1.merge(landsat2)
        # landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', int(paths[0])))
        else:
            landsat1 = landsat.filter(ee.Filter.eq('WRS_PATH', 1))
            landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', 233)).map(resample(landsat1.first()))
            landsat_again = landsat1.merge(landsat2)
        # for z in paths[1:]:
        #     landsat2 = landsat.filter(ee.Filter.eq('WRS_PATH', int(z))).map(resample(landsat1.first()))
        #     landsat1 = landsat1.merge(landsat2)
        landsat = landsat_again
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan

    x, y, cumdensity, maxy = PhenKplot(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(), df_dates2['date'].values, np.arange(1, 366, round(365/48)), ylim=(0, 1), h=1)
    axes[i].set_xlim(1, 365)
    axes[i].set_ylim(0.3, 1)
    cfset = axes[i].contourf(x, y, cumdensity, cmap='terrain')
    cset = axes[i].contour(x, y, cumdensity, levels=[0, 0.5, 0.75, 0.9, 0.95], colors='k')
    cline = axes[i].plot(np.arange(1,366), maxy, linewidth=1.5, color='r')
    axes[i].clabel(cset, inline=1, fontsize=10)
    if i == 4:
        axes[i].set_xlabel('Day of the year', fontsize=14)
    axes[i].set_ylabel('NDVI', fontsize=14)
    axes[i].tick_params(axis='both', labelsize=12)
    axes[i].text(-90, 1.05, labs[i], fontsize=16, weight='bold')
fig.savefig('/content/drive/MyDrive/npphen_defo.png', dpi=300, bbox_inches='tight')
fig.show()

In [None]:
def getS2_CLOUD_PROBABILITY(geo):
    innerJoined = ee.Join.inner().apply(primary=ee.ImageCollection("COPERNICUS/S2_HARMONIZED").filterBounds(geo).filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 20)),
                                        secondary=ee.ImageCollection("COPERNICUS/S2_CLOUD_PROBABILITY").filterBounds(geo),
                                        condition=ee.Filter.equals(leftField='system:index',
                                                                   rightField='system:index'))
    def mergeImageBands(joinResult):
        return ee.Image(joinResult.get('primary')).addBands(joinResult.get('secondary'))

    newCollection = innerJoined.map(mergeImageBands)
    return ee.ImageCollection(newCollection)


def projectShadows(cloudMask, sunAzimuth, offset):
    azimuth = ee.Number(sunAzimuth).multiply(np.pi).divide(180.0).add(ee.Number(0.5).multiply(np.pi))
    x = azimuth.cos().multiply(15.0).round();
    y = azimuth.sin().multiply(15.0).round();
    shadow = cloudMask.changeProj(cloudMask.projection(), cloudMask.projection().translate(x.multiply(ee.Number(offset)), y.multiply(ee.Number(offset))))
    return shadow


def scale(img):
    props = img.propertyNames()
    bands = ['B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B11', 'B12']
    ob = ['B1', 'B9', 'B10', 'probability']
    scaled = img.select(bands).divide(10000)
    return scaled.addBands(img.select(ob)).copyProperties(img, props)


def masking(img):
    props = img.propertyNames()
    img_scaled = ee.Image(scale(img))
    clouds = img_scaled.select('probability').gte(30)
    shadows = projectShadows(clouds,
                             img_scaled.get('MEAN_SOLAR_AZIMUTH_ANGLE'),
                             img.get('MEAN_INCIDENCE_ZENITH_ANGLE_B10'))
    shadow_distance = shadows.fastDistanceTransform().sqrt()

    shadow_dilation = shadow_distance.lt(5)
    mask = clouds.Not().multiply(shadow_dilation.Not())
    return img_scaled.addBands(img_scaled.normalizedDifference(['B8', 'B4']).rename('ndvi')).updateMask(mask).copyProperties(img_scaled, props)


def getNDVI(img):
    props = img.propertyNames()
    return img.normalizedDifference(['B8', 'B4']).copyProperties(img, props)


def set_date(date):
    def inner(i):
        return i.set('date', date)
    return inner


def set_date2(img):
    return img.set('date', ee.Date(img.get('system:time_start')).format('YYYY-MM-dd'))


def sampleS2(geo):
    def inner(img):
        date = ee.Date(img.get('system:time_start')).format('YYYY-MM-dd')
        ndvi = img.reduceRegion('mean', geo.geometry(), scale=10).values().get(0)
        return ee.Feature(None, {'date':date, 'ndvi':ndvi})
    return inner


In [None]:
ids = ['00000000000000000060', '0000000000000000002c', '0000000000000000004a', '00000000000000000001', '00000000000000000005']
for i, n in enumerate(ids[:]):
    print(n)
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking)
    # if (n == '0000000000000000003b') | (n == '0000000000000000003c') | (n == '0000000000000000003d') | (n == '0000000000000000003f') | (n == '00000000000000000041') | (n == '00000000000000000042'):
    tiles = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
    S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tiles[0]))
    # data = S2.select('ndvi').map(sampleS2(fea)).getInfo()
    # dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    # ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
    # df = pd.DataFrame(index=dates, data={'ndvi':ndvi})
    # df = df.dropna(how='any')
    stack = S2.select('ndvi').toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    # ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked <= 0] = np.nan
    series = np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel()
    iqr = np.nanquantile(series, 0.75) - np.nanquantile(series, 0.25)
    # print(iqr)
    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]

    anom_mean = PhenAnoma(series,
                            df_dates['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates)],
                            [0, len(df_dates)], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates.reset_index().iloc[ix_anom: len(df_dates)]['date'], 'ndvi':series})
    test_mean = test_mean.drop_duplicates(subset='date')
    test_mean = test_mean.dropna(subset=['an'])
    test_mean = test_mean.reset_index()

    # test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 1.5 * std_all)] # & an < 0
    # test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    # plt.scatter(test_mean['date'], test_mean['an'])
    # plt.axhline(0, color='red')
    # kmeans = KMeans(n_clusters=2).fit(test_mean['an'].values.reshape(-1, 1))
    # indices = np.where(test_mean['an'] > filters.threshold_otsu(test_mean['an'].values))[0]
    # test_mean['thresh'] = test_mean['an'].apply(lambda x: 0 if x > filters.threshold_otsu(test_mean['an'].values) else 1)
    # test_mean = test_mean.reset_index()
    # test_mean1 = test_mean[(test_mean['thresh'] == 1) & (test_mean['cumsum'] > 2)]

    # # test_mean['diff'] = test_mean['an'].diff(15)
    # # test_mean['booldiff'] = [int(n) for n in (np.abs(test_mean['diff']) > 1.8 * std_all)]
    # # test_mean['cumsumdiff'] = test_mean.groupby(test_mean['booldiff'].eq(0).cumsum()).cumcount().tolist()

    # # test_mean1 = test_mean[test_mean['booldiff'] == 1]






    test_mean['bool'] = [int(n) for n in np.abs(test_mean['an']) > 1.25 * iqr] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()

    test_mean['diff'] = test_mean['ndvi'].diff(1)
    test_mean['ma'] = test_mean['diff'].rolling(5).mean()

    # test_mean['diff1'] = test_mean['ndvi'].diff(1)
    # test_mean['ma1'] = test_mean['diff1'].rolling(5).mean()


    # if len(df3) > 0:
    #     if df3.iloc[0]['an'] > iqr:
    #         idx_mean = df3['ma'].idxmin()
    #         idx_mean = df3.loc[idx_mean]['index']
    #         return df.loc[idx_mean]['ix'], df.loc[idx_mean]['date']
    #     else:
    #         idx_mean = df3.iloc[0]['index']
    #         return df.loc[idx_mean-4]['ix'], df.loc[idx_mean-4]['date']

    test_mean1 = test_mean[(test_mean['ma'] < 0) & (test_mean['cumsum'] >= 3)]
    test_mean1 = test_mean1.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    plt.axhline(-iqr)
    plt.axhline(iqr)

    # subset = test_mean1[(test_mean1['cumsum'] == 3) & (test_mean1['ma'].abs() > iqr * 0.1)]
    if(len(test_mean1) > 0):
        if test_mean1.iloc[0]['an'] > iqr:
            print('b')
            ix_mean = test_mean1['ma'].idxmin()
            ix_mean = test_mean1.loc[ix_mean]['level_0']
            plt.axvline(test_mean.loc[ix_mean]['date'])
            fig = plt.figure(figsize=(12, 4))
            plt.scatter(df_dates['date'], series)
            plt.axvline(test_mean.loc[ix_mean]['date'])
            plt.title(n)
            plt.grid()
            plt.show()
            print(test_mean.loc[ix_mean-4]['date'])
        else:
            ix_mean = test_mean1.iloc[0]['level_0']
            plt.axvline(test_mean.loc[ix_mean-4]['date'])
            plt.show()
            # deforested2.append(1)
            # print(test_mean.iloc[ix_mean]['date'])
            fig = plt.figure(figsize=(12, 4))
            plt.scatter(df_dates['date'], series)
            plt.axvline(test_mean.loc[ix_mean-4]['date'])
            plt.title(n)
            plt.grid()
            plt.show()
            print(test_mean.loc[ix_mean-4]['date'])
    else:
        plt.show()
        fig = plt.figure(figsize=(12, 4))
        plt.scatter(df_dates['date'], series)
        # plt.axvline(test_mean.iloc[ix_mean-3]['date'])
        plt.title(n)
        plt.grid()
        plt.show()
    del stacked, stack,# test_mean, test_mean1


In [None]:
test_mean1

In [None]:
test_mean1

In [None]:
test_mean1['ma'].idxmin()

In [None]:
test_mean.loc[test_mean1.iloc[0]['index']-test_mean.loc[test_mean1.iloc[0]['index']]['cumsum']]

In [None]:
for i, n in enumerate(ids_forested[:]):
    print(n)
    fea = forested.filter(ee.Filter.eq('id', n)).first()
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking)
    # if (n == '0000000000000000003b') | (n == '0000000000000000003c') | (n == '0000000000000000003d') | (n == '0000000000000000003f') | (n == '00000000000000000041') | (n == '00000000000000000042'):
    tiles = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
    S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tiles[0]))
    # data = S2.select('ndvi').map(sampleS2(fea)).getInfo()
    # dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    # ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
    # df = pd.DataFrame(index=dates, data={'ndvi':ndvi})
    # df = df.dropna(how='any')
    stack = S2.select('ndvi').toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    # ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked < 0] = np.nan

    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked[ix_anom:, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel(),
                            df_dates['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates)],
                            [0, len(df_dates)], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates.reset_index().iloc[ix_anom: len(df_dates)]['date']})
    test_mean = test_mean.dropna(subset=['an'])

    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 1.5 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    kmeans = KMeans(n_clusters=2).fit(test_mean['an'].values.reshape(-1, 1))
    indices = np.where(test_mean['an'] > filters.threshold_otsu(test_mean['an'].values))[0]
    test_mean['thresh'] = test_mean['an'].apply(lambda x: 0 if x > filters.threshold_otsu(test_mean['an'].values) else 1)
    test_mean = test_mean.reset_index()
    test_mean1 = test_mean[(test_mean['thresh'] == 1) & (test_mean['cumsum'] > 2)]

    # test_mean['diff'] = test_mean['an'].diff(15)
    # test_mean['booldiff'] = [int(n) for n in (np.abs(test_mean['diff']) > 1.8 * std_all)]
    # test_mean['cumsumdiff'] = test_mean.groupby(test_mean['booldiff'].eq(0).cumsum()).cumcount().tolist()

    # test_mean1 = test_mean[test_mean['booldiff'] == 1]

    if len(test_mean1) > 0:
        ix = test_mean1.head(1).index[0]
        plt.axvline(test_mean.loc[ix-1]['date'])
        # ix = test_mean1.head(1).index
        # plt.axvline(test_mean.loc[ix-14]['date'])
        plt.show()
        # deforested2.append(1)
        # print(test_mean.loc[ix]['date'])
        fig = plt.figure(figsize=(12, 4))
        plt.scatter(df_dates['date'], np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel())
        plt.axvline(test_mean.loc[ix-1]['date'])
        plt.title(n)
        plt.grid()
        plt.show()
        # plt.scatter(test_mean['date'], test_mean['diff'])
        # plt.axvline(test_mean.loc[ix-14]['date'])
        # plt.show()
        print(n)
    else:
        plt.show()
        # deforested2.append(0)
        fig = plt.figure(figsize=(12, 4))
        plt.scatter(df_dates['date'], np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel())
        # plt.axvline(pd.to_datetime(test_mean.iloc[ix+1]['date']))
        plt.title(n)
        plt.grid()
        plt.show()
        # plt.scatter(test_mean['date'], test_mean['diff'])
        # plt.axvline(test_mean.loc[ix-14]['date'])
        # plt.show()
        print(n)

In [None]:
ids_forested[85:]

In [None]:
for i, n in enumerate(ids_deforested[:]):
    print(n)
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking)
    # if (n == '0000000000000000003b') | (n == '0000000000000000003c') | (n == '0000000000000000003d') | (n == '0000000000000000003f') | (n == '00000000000000000041') | (n == '00000000000000000042'):
    tiles = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
    S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tiles[0]))
    # data = S2.select('ndvi').map(sampleS2(fea)).getInfo()
    # dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    # ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
    # df = pd.DataFrame(index=dates, data={'ndvi':ndvi})
    # df = df.dropna(how='any')
    stack = S2.select('ndvi').toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    # ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked < 0] = np.nan

    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked[ix_anom:, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel(),
                            df_dates['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates)],
                            [0, len(df_dates)], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates.reset_index().iloc[ix_anom: len(df_dates)]['date']})
    test_mean = test_mean.dropna(subset=['an'])

    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 1.5 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    kmeans = KMeans(n_clusters=2).fit(test_mean['an'].values.reshape(-1, 1))
    indices = np.where(test_mean['an'] > filters.threshold_otsu(test_mean['an'].values))[0]
    test_mean['thresh'] = test_mean['an'].apply(lambda x: 0 if x > filters.threshold_otsu(test_mean['an'].values) else 1)
    test_mean = test_mean.reset_index()
    test_mean1 = test_mean[(test_mean['thresh'] == 1) & (test_mean['cumsum'] > 1)]

    # test_mean['diff'] = test_mean['an'].diff(15)
    # test_mean['booldiff'] = [int(n) for n in (np.abs(test_mean['diff']) > 1.8 * std_all)]
    # test_mean['cumsumdiff'] = test_mean.groupby(test_mean['booldiff'].eq(0).cumsum()).cumcount().tolist()

    # test_mean1 = test_mean[test_mean['booldiff'] == 1]

    if len(test_mean1) > 0:
        ix = test_mean1.head(1).index[0]
        plt.axvline(test_mean.loc[ix-1]['date'])
        # ix = test_mean1.head(1).index
        # plt.axvline(test_mean.loc[ix-14]['date'])
        plt.show()
        # deforested2.append(1)
        # print(test_mean.loc[ix]['date'])
        fig = plt.figure(figsize=(12, 4))
        plt.scatter(df_dates['date'], np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel())
        plt.axvline(test_mean.loc[ix-1]['date'])
        plt.title(n)
        plt.grid()
        plt.show()
        # plt.scatter(test_mean['date'], test_mean['diff'])
        # plt.axvline(test_mean.loc[ix-14]['date'])
        # plt.show()
        print(n)
    else:
        plt.show()
        # deforested2.append(0)
        fig = plt.figure(figsize=(12, 4))
        plt.scatter(df_dates['date'], np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel())
        # plt.axvline(pd.to_datetime(test_mean.iloc[ix+1]['date']))
        plt.title(n)
        plt.grid()
        plt.show()
        # plt.scatter(test_mean['date'], test_mean['diff'])
        # plt.axvline(test_mean.loc[ix-14]['date'])
        # plt.show()
        print(n)

In [None]:
ids_deforested[94:]

In [None]:
for i, n in enumerate(ids_deforested[94:]):
    print(n)
    fea = deforested.filter(ee.Filter.eq('id', n)).first()
    S2 = getS2_CLOUD_PROBABILITY(fea.geometry()).filterDate('2016-01-01', '2022-06-01').map(masking)
    # if (n == '0000000000000000003b') | (n == '0000000000000000003c') | (n == '0000000000000000003d') | (n == '0000000000000000003f') | (n == '00000000000000000041') | (n == '00000000000000000042'):
    tiles = S2.aggregate_array('MGRS_TILE').distinct().getInfo()
    S2 = S2.filter(ee.Filter.eq('MGRS_TILE', tiles[0]))
    # data = S2.select('ndvi').map(sampleS2(fea)).getInfo()
    # dates = [pd.to_datetime(n['properties']['date']) for n in data['features']]
    # ndvi = [n['properties']['ndvi'] if 'ndvi' in n['properties'] else None for n in data['features']]
    # df = pd.DataFrame(index=dates, data={'ndvi':ndvi})
    # df = df.dropna(how='any')
    stack = S2.select('ndvi').toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs = [n for n in stack['properties']]
    dates_imgs = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs]
    df_dates = pd.DataFrame(data={'date':dates_imgs, 'ix':range(len(dates_imgs))})
    df_dates = df_dates.sort_values('date')
    # ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    reorder_ix = df_dates['ix'].values
    reorder_imgs = [imgs[n] for n in reorder_ix]
    stacked = np.stack([stack['properties'][n] for n in reorder_imgs])
    stacked[stacked < 0] = np.nan

    ix_anom = df_dates.reset_index()[df_dates.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked[ix_anom:, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel(),
                            df_dates['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates)],
                            [0, len(df_dates)], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates.reset_index().iloc[ix_anom: len(df_dates)]['date']})
    test_mean = test_mean.dropna(subset=['an'])

    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 1.5 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    kmeans = KMeans(n_clusters=2).fit(test_mean['an'].values.reshape(-1, 1))
    indices = np.where(test_mean['an'] > filters.threshold_otsu(test_mean['an'].values))[0]
    test_mean['thresh'] = test_mean['an'].apply(lambda x: 0 if x > filters.threshold_otsu(test_mean['an'].values) else 1)
    test_mean = test_mean.reset_index()
    test_mean1 = test_mean[(test_mean['thresh'] == 1) & (test_mean['cumsum'] > 1)]

    # test_mean['diff'] = test_mean['an'].diff(15)
    # test_mean['booldiff'] = [int(n) for n in (np.abs(test_mean['diff']) > 1.8 * std_all)]
    # test_mean['cumsumdiff'] = test_mean.groupby(test_mean['booldiff'].eq(0).cumsum()).cumcount().tolist()

    # test_mean1 = test_mean[test_mean['booldiff'] == 1]

    if len(test_mean1) > 0:
        ix = test_mean1.head(1).index[0]
        plt.axvline(test_mean.loc[ix-1]['date'])
        # ix = test_mean1.head(1).index
        # plt.axvline(test_mean.loc[ix-14]['date'])
        plt.show()
        # deforested2.append(1)
        # print(test_mean.loc[ix]['date'])
        fig = plt.figure(figsize=(12, 4))
        plt.scatter(df_dates['date'], np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel())
        plt.axvline(test_mean.loc[ix-1]['date'])
        plt.title(n)
        plt.grid()
        plt.show()
        # plt.scatter(test_mean['date'], test_mean['diff'])
        # plt.axvline(test_mean.loc[ix-14]['date'])
        # plt.show()
        print(n)
    else:
        plt.show()
        # deforested2.append(0)
        fig = plt.figure(figsize=(12, 4))
        plt.scatter(df_dates['date'], np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel())
        # plt.axvline(pd.to_datetime(test_mean.iloc[ix+1]['date']))
        plt.title(n)
        plt.grid()
        plt.show()
        # plt.scatter(test_mean['date'], test_mean['diff'])
        # plt.axvline(test_mean.loc[ix-14]['date'])
        # plt.show()
        print(n)

In [None]:
test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates.reset_index().iloc[ix_anom: len(df_dates)]['date']})
test_mean = test_mean.drop_duplicates(subset='date', keep='first')
test_mean = test_mean.dropna(subset=['an'])
test_mean = test_mean.reset_index()

# test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 1.5 * std_all)] # & an < 0
# test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
test_mean['diff'] = test_mean['an'].diff(15)
test_mean['booldiff'] = [int(n) for n in (np.abs(test_mean['diff']) > 1.8 * std_all)]
test_mean['cumsumdiff'] = test_mean.groupby(test_mean['booldiff'].eq(0).cumsum()).cumcount().tolist()
plt.scatter(test_mean['date'], test_mean['diff'])
# plt.tick_params(axis='x', rotation=30)
# plt.axhline(filters.threshold_otsu(test_mean['an'].values))
# kmeans = KMeans(n_clusters=2).fit(test_mean['an'].values.reshape(-1, 1))
# test_mean['thresh'] = test_mean['an'].apply(lambda x: 0 if x > filters.threshold_otsu(test_mean['an'].values) else 1)
test_mean = test_mean[test_mean['booldiff'] == 1]
date = test_mean.head(1).date
plt.axvline(date)

# # sub = test_mean.loc[indices][test_mean.loc[indices]['cumsum'] > 1]
# ix = test_mean.head(1).index[0]
# plt.axvline(test_mean.loc[ix]['date'])

In [None]:
1.8 * std_all

In [None]:
plt.scatter(test_mean['date'], test_mean['an'])
plt.axhline(filters.threshold_otsu(test_mean['an'].values))
plt.axvline(test_mean.loc[ix]['date'])


In [None]:
plt.scatter(df_dates['date'], np.apply_over_axes(np.nanmean, stacked, (1, 2)).ravel())
plt.axvline(date)

In [None]:
plt.scatter(test_mean['date'], test_mean['an'])
plt.axhline(0, color='red')
if np.abs(np.diff(kmeans.cluster_centers_, axis=0)) > 1.5 * std_all:
    indices = np.where(test_mean['an'] > filters.threshold_otsu(test_mean['an'].values))[0]
    sub = test_mean.iloc[indices][test_mean.iloc[indices]['cumsum'] > 1].tail(1).index[0]
    plt.axvline(test_mean.iloc[sub+1]['date'])
    plt.show()

In [None]:
np.mean(kmeans.cluster_centers_)

In [None]:

# h = np.histogram(test_mean['an'], bins=20)
# h = np.vstack((0.5*(h[1][:-1]+h[1][1:]),h[0])).T  # because h[0] and h[1] have different sizes.

kmeans = KMeans(n_clusters=2).fit(test_mean['an'].values.reshape(-1, 1))
print(kmeans.cluster_centers_, std_all)
np.abs(np.diff(kmeans.cluster_centers_, axis=0)) > 1.5 * std_all
ix = np.where(test_mean['an'] < filters.threshold_otsu(test_mean['an'].values))[0][0]
kmeans.inertia_

In [None]:
if (np.abs(np.diff(kmeans.cluster_centers_, axis=0)) > 1.7



In [None]:
plt.plot(test_mean['date'], test_mean['an'])
plt.axvline(test_mean.iloc[ix]['date'])


In [None]:
S2.aggregate_array('MGRS_TILE').distinct().getInfo()

In [None]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score


pred = np.hstack([np.repeat(0, 43), np.repeat(1, 57), np.repeat(0, 53), np.repeat(1, 47)])
obs = np.hstack([np.repeat(0, 100), np.repeat(1, 100)])
print(precision_score(obs, pred), recall_score(obs, pred), f1_score(obs, pred), accuracy_score(obs, pred))

In [None]:
reference = ee.FeatureCollection('users/ignisfausto/PoligonosValidacionConsolidados_sIncendios')
reference_ids = reference.aggregate_array('ID').getInfo()

In [None]:
for i, n in enumerate(reference_ids):
    print(n)
    fea = reference.filter(ee.Filter.eq('ID', n)).first()
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry())
    paths = l8.aggregate_histogram('WRS_PATH').getInfo()
    path = [i for i in paths if paths[i]==sorted(paths.values())[-1]][0]
    l8 = l8.filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil8)
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).filterDate('2000-01-01', '2023-01-01').filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil57)
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).filterDate('2000-01-01', '2023-01-01').filter(ee.Filter.eq('WRS_PATH', float(path))).map(mask).map(ndvil57)

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2023-01-01')
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan
    fig = plt.figure(figsize=(12, 4))
    plt.scatter(df_dates2['date'], np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel())
    plt.title(n)
    plt.grid()
    plt.show()
    ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked2[:ix_anom, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(),
                            df_dates2['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates2)],
                            [0, ix_anom], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
    test_mean = test_mean.dropna(subset=['an'])

    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
        plt.axvline(test_mean.iloc[ix_mean-4]['date'])
        plt.show()
        # deforested2.append(1)
        print(test_mean.iloc[ix_mean-4]['date'])
    else:
        plt.show()
        # deforested2.append(0)
    print(n)

In [None]:
for i, n in enumerate(reference_ids[202:]):
    print(n)
    fea = reference.filter(ee.Filter.eq('ID', n)).first()
    l8 = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2").filterBounds(fea.geometry())
    paths = l8.aggregate_histogram('WRS_PATH').getInfo()
    rows = l8.aggregate_histogram('WRS_ROW').getInfo()
    path = [i for i in paths if paths[i]==sorted(paths.values())[-1]][0]
    row = [i for i in rows if rows[i]==sorted(rows.values())[-1]][0]
    l8 = l8.filter(ee.Filter.eq('WRS_PATH', float(path))).filter(ee.Filter.eq('WRS_ROW', float(row))).map(mask).map(ndvil8)
    l9 = ee.ImageCollection("LANDSAT/LC09/C02/T1_L2").filterBounds(fea.geometry()).filter(ee.Filter.eq('WRS_PATH', float(path))).filter(ee.Filter.eq('WRS_ROW', float(row))).map(mask).map(ndvil8)
    l7 = ee.ImageCollection("LANDSAT/LE07/C02/T1_L2").filterBounds(fea.geometry()).filterDate('2000-01-01', '2023-01-01').filter(ee.Filter.eq('WRS_PATH', float(path))).filter(ee.Filter.eq('WRS_ROW', float(row))).map(mask).map(ndvil57)
    l5 = ee.ImageCollection("LANDSAT/LT05/C02/T1_L2").filterBounds(fea.geometry()).filterDate('2000-01-01', '2023-01-01').filter(ee.Filter.eq('WRS_PATH', float(path))).filter(ee.Filter.eq('WRS_ROW', float(row))).map(mask).map(ndvil57)

    landsat = l9.merge(l8).merge(l7).merge(l5).select('ndvi').sort('system:time_start').filterDate('2000-01-01', '2023-01-01')
    stack = landsat.toBands().toFloat().sampleRectangle(fea.geometry(), defaultValue=-99.).getInfo()
    imgs2 = [n for n in stack['properties']]
    dates_imgs2 = [pd.to_datetime(re.findall('\d{8}', n)[0], format='%Y%m%d') for n in imgs2]
    df_dates2 = pd.DataFrame(data={'date':dates_imgs2, 'ix':range(len(dates_imgs2))})
    df_dates2 = df_dates2.sort_values('date')
    reorder_ix2 = df_dates2['ix'].values
    reorder_imgs2 = [imgs2[n] for n in reorder_ix2]
    stacked2 = np.stack([stack['properties'][n] for n in reorder_imgs2])
    # stacked2[stacked2 == -99] = np.nan
    stacked2[stacked2 < 0] = np.nan
    fig = plt.figure(figsize=(12, 4))
    plt.scatter(df_dates2['date'], np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel())
    plt.title(n)
    plt.grid()
    plt.show()
    ix_anom = df_dates2.reset_index()[df_dates2.reset_index()['date'] > '2016-01-01'].index[0]
    std_all = np.nanstd(stacked2[:ix_anom, :, :])
    anom_mean = PhenAnoma(np.apply_over_axes(np.nanmean, stacked2, (1, 2)).ravel(),
                            df_dates2['date'].values,
                            np.arange(1, 366, round(365/48)),
                            [ix_anom, len(df_dates2)],
                            [0, ix_anom], h=1,
                            rge=[0, 1],
                            plot=True,
                            dask='multiprocessing')[0]
    test_mean = pd.DataFrame(data={'an':anom_mean, 'date':df_dates2.reset_index().iloc[ix_anom: len(df_dates2)]['date']})
    test_mean = test_mean.dropna(subset=['an'])

    test_mean['bool'] = [int(n) for n in (np.abs(test_mean['an']) > 2 * std_all)] # & an < 0
    test_mean['cumsum'] = test_mean.groupby(test_mean['bool'].eq(0).cumsum()).cumcount().tolist()
    test_mean = test_mean[test_mean['an'] < 0]
    test_mean = test_mean.reset_index()
    plt.scatter(test_mean['date'], test_mean['an'])
    plt.axhline(0, color='red')
    if(len(test_mean[test_mean['cumsum'] == 5]) > 0):
        ix_mean = np.where(test_mean['cumsum'] == 5)[0][0]
        plt.axvline(test_mean.iloc[ix_mean-4]['date'])
        plt.show()
        # deforested2.append(1)
        print(test_mean.iloc[ix_mean-4]['date'])
    else:
        plt.show()
        # deforested2.append(0)
    print(n)

In [None]:
tala = reference.filter(ee.Filter.eq('NAME', 'tala'))
lc = tala.aggregate_array('cobDesde').distinct().getInfo()
print(lc)
tala_bosque = tala.filter(ee.Filter.eq('cobDesde', 'Plantacion Forestal').Not())
tala_planta = tala.filter(ee.Filter.eq('cobDesde', 'Plantacion Forestal'))
print(tala_bosque.aggregate_array('ID').getInfo(),
      tala_planta.aggregate_array('ID').getInfo())

In [None]:
und = reference.filter(ee.Filter.eq('NAME', 'estable'))
lc = und.aggregate_array('cobDesde').distinct().getInfo()
print(lc)
und_bosque = und.filter(ee.Filter.eq('cobDesde', 'Plantacion Forestal').Not())
und_planta = und.filter(ee.Filter.eq('cobDesde', 'Plantacion Forestal'))
print(und_bosque.aggregate_array('ID').getInfo(),
      und_planta.aggregate_array('ID').getInfo())