# computing functions

In [1]:
def get_valid_data(eopatch):
    """ filter time series thanks to the cloud mask
    Inputs :
        - eopatch : eopatch from the area of interest
    Returns :
        - valid_data : times series with NaN values for cloudy pixels
        - dates : array of dates
    """
    
    # number of superpixel
    n_superpixels = np.unique(eopatch.mask_timeless['SUPER_PIXELS']).size

    # superpixel to which belong the corresponding pixel 
    superpixels = eopatch.mask_timeless['SUPER_PIXELS']

    # number of dates
    n_timestamps = len(eopatch.timestamp)

    # temporal superpixels
    temporal_superpixels = np.array([idx*n_superpixels + superpixels for idx in range(n_timestamps)])
    
    # mean of cloud coverage of each superpixel
    mean_clm_superpixels = ndimage.mean(eopatch.mask['CLM'], labels=temporal_superpixels, index=np.unique(temporal_superpixels)).reshape((n_timestamps, n_superpixels))

    # mean of ndvi of each superpixel
    mean_ndvi_superpixels = ndimage.mean(eopatch.data['NDVI_STANDARD'], labels=temporal_superpixels, index=np.unique(temporal_superpixels)).reshape((n_timestamps, n_superpixels))
    
    # Apply filter : set to NaN every data that has too much cloud coverage
    valid_data = np.where(mean_clm_superpixels<0.2, mean_ndvi_superpixels, np.nan)

    # Get dates from time line
    dates = np.array([date.strftime('%Y-%m-%d') for date in eopatch.timestamp])
    
    return valid_data, dates

In [2]:
def get_forest_time_series(eopatch, shapefile, filter_percentage=0.8):
    """ returns only the time series related to forest zones
    Inputs :
        - eopatch : eopatch from the area of interest
        - shapefile : path to the forest shapefile
        - filter_percentage : a time serie is considered a forest time serie if the forest coverage of the superpixel is superior to this value. 0.8 is default (e.g. 80%)
    Returns :
        - time_series_forest : time series that are at least 80% covered by forest
        - ts_indices_preserved : time series indices that are preserved
    """
    # read shapefile
    forets = gpd.read_file(shapefile)
    
    # polygon of aoi
    poly = shp.box(*eopatch.bbox)

    # Projection en 4326
    forets['geometry'] = forets['geometry'].to_crs(eopatch.bbox.crs.epsg)

    # Intersection forêts et polygone superpixels
    forets['geometry'] = forets['geometry'].intersection(poly)

    # Polygones des forêts de l'aoi
    aoi_forest = forets[~forets['geometry'].is_empty]

    # dissolve polygons into a single one
    single_poly_forest = aoi_forest.dissolve()
    
    # Get time series polygons
    time_series_polygons = eopatch.vector_timeless['SUPER_PIXELS']

    # Get time series polygons of region of interest (forest)
    intersection = time_series_polygons.intersection(list(single_poly_forest.geometry)[0])

    # only keep superpixel that contain more than @filter_percentage of forest
    time_series_forest = time_series_polygons[(intersection.area / time_series_polygons.area) > filter_percentage]
    
    # indices of the time series preserved
    ts_indices_preserved = np.unique(time_series_forest['VALUE']).astype('int64')

    # number of time series preserved : print of the percentage
    n_superpixels = np.unique(eopatch.mask_timeless['SUPER_PIXELS']).size
    n_time_series_preserved = len(ts_indices_preserved)
    print(round(len(ts_indices_preserved)/n_superpixels * 100, 2), "% of time series preserved")
    
    return time_series_forest, ts_indices_preserved

In [5]:
def set_bfast_params(valid_data, dates, ts_indices_preserved, end_training, start_monitor, end_monitor, k=3, freq=365, trend=False, hfrac=0.25, level=0.05):
    """ returns only the time series related to forest zones
    Inputs :
        - valid_data : the times series
        - dates : the list of dates
        - ts_indices_preserved : time series indices that were preserved after forest filtering
        - end_training : date of the end of training period
        - start_monitor : start of monitoring date
        - end_monitor : end of monitoring date
        - k : The number of harmonic terms. Default is 3.
        - freq : The frequency for the seasonal model in days. Default is 365.
        - trend : Whether a tend offset term shall be used or not. Default is False.
        - hfrac : Float in the interval [0,1] specifying the bandwidth relative to the sample size in the MOSUM/ME monitoring processes
        - level : Significance level of the monitoring (and ROC, if selected) procedure, i.e., probability of type I error.
    Returns :
        - model : the BFASTMonitor object, ready to be executed.
        - valuid_data_f2 : the time series cropped according to the period considered
        - dates_f : list of dates corresponding to the period considered
    """
    # change date format to datetime
    dates = np.array([datetime.fromisoformat(str(date)) for date in dates])

    # list of dates
    dates = list(dates)

    # set NaN values to 0
    valid_data[np.isnan(valid_data)] = 0

    # fit BFASTMontiro model
    model = BFASTMonitor(
                start_monitor,
                freq=freq,
                k=k,
                hfrac=hfrac,
                trend=trend,
                level=level,
                backend='python',
                verbose=1,
                device_id=0,
            )

    # preparing change of type
    valid_data_int = valid_data * (32768/valid_data.max())

    # change of type
    valid_data_int = valid_data_int.astype(np.int16)

    # add third dimension to make it look like an image
    valid_data_int = valid_data_int[..., np.newaxis]
    
    # first date
    start_hist = dates[0]

    # crop data from start to end date of monitoring
    valid_data_f, dates_f = crop_data_dates(valid_data_int, dates, start=start_hist - timedelta(days=1), end=end_monitor)

    # filter of data
    valid_data_f2 = valid_data_f[:, ts_indices_preserved, :]
    
    # dates indices
    ind_end_train = 0
    while dates[ind_end_train] < end_training:
        ind_end_train+=1
    
    ind_start_monitor = ind_end_train
    while dates[ind_start_monitor] < start_monitor:
        ind_start_monitor+=1
    
    # filter dates
    valid_data_f2 = np.delete(valid_data_f2, list(range(ind_end_train, ind_start_monitor)), 0)
    del dates_f[ind_end_train:ind_start_monitor]
    
    return model, valid_data_f2, dates_f

In [6]:
def execute_bfast(model, data, dates, n_chunks=5, nan_values=0):
    """ execute bfast on time series
    Inputs :
        - model : the BFASTMonitor object parametered.
        - data : time series
        - dates : list of dates
        - n_chunks : number of chunk to divide the job. Default is 5.
        - nan_values : specified the NaN value used in the array data. Default is 0.
    Returns :
        - breaks : list of breaks that have been computed. Returns the index of the breakpoint date. -1 means no break in the time serie
        - means : returns the mean values of the individual MOSUM processes
        - magnitudes : values median of the difference between the data and the model prediction in the monitoring period
        - valids : returns the number of valid values for each time series (e.g. non NaN data)
    """
    old_stdout = sys.stdout # backup current stdout
    sys.stdout = open(os.devnull, "w")
    # execute bfast
    model.fit(data, dates, n_chunks=5, nan_value=0)
    sys.stdout = old_stdout # reset old stdout

    # return outputs
    breaks = model.breaks
    means = model.means
    #valids = model.valids
    magnitudes = model.magnitudes
    
    return breaks, magnitudes, means#, valids

In [7]:
def organise_results(time_series, dates, start_monitor, breaks, magnitudes):
    """ organises the results in a GeoDataFrame
    Inputs :
        - time_series : the time series array
        - dates : list of dates
        - start_monitor : start of monitoring date
        - breaks : the breaks computed
        - magnitudes : the magnitudes computed
    Returns :
        - results : GeoDataFrame tidying the results
    """
    
    # get the index of monitoring start
    start_monitor_index = 0
    while dates[start_monitor_index] < start_monitor:
        start_monitor_index+=1


    # datetime format
    def to_date(breakpoint):
        if breakpoint <= 0 :
            return np.datetime64("NaT")
        bp_index = breakpoint[0]
        return dates[start_monitor_index+bp_index]

    super_pixels_df = time_series

    output_df = pd.DataFrame({
        'VALUE': np.unique(time_series['VALUE']).astype('int64'), 
        'breakpoint': [to_date(b) for b in breaks],
        'magnitude': np.squeeze(magnitudes, axis=1)
    })

    results = super_pixels_df.merge(output_df, on='VALUE')

    # compute normalized magnitude
    results['norm_mag'] = np.zeros(len(results.index))
    min_mag = results['magnitude'].min()
    max_mag = results['magnitude'].max()

    results['norm_mag'][results['magnitude'] > 0] = results['magnitude'][results['magnitude'] > 0]/max_mag
    results['norm_mag'][results['magnitude'] < 0] = results['magnitude'][results['magnitude'] < 0]/abs(min_mag)
    
    return results

In [8]:
def group_by_breakpoints(results):
    """ groups the breakpoints by date of appearance
    Inputs :
        - results : GeoDataFrame containing the results
    Returns :
        - group : GeoDataFrame with breakpoints grouped by date 
    """
    
    # group sp by breakpoint date
    group = results.groupby(by='breakpoint', as_index=False).agg({'VALUE' : 'count', 'magnitude': ['min', 'max', 'mean', 'median']})
    group.columns = [f"{x}_{y}" if y else x for x, y in group.columns.to_flat_index()]
    
    return group

In [9]:
def bfast_dynamic(valid_data, dates, ts_indices_preserved, end_train, first_window):
    """ Applies BFAST each time the window is shifted by one date
    Inputs :
        - valid_data : the times series
        - dates : the list of dates
        - ts_indices_preserved : time series indices that were preserved after forest filtering
        - end_training : date of the end of training period
        - first window : first and last date of first monitoring period
    Returns :
        - breaks_list : list of breaks computed for each BFAST execution
        - magnitudes_list : list of magnitudes computed for each BFAST execution
        - results : list of results computed for each BFAST execution
        - window_dates : list of last date of the monitoring window for each BFAST execution (needed for plotting afterwoods)
    """
    
    first_date_window = first_window[0]
    last_date_window = first_window[1]
    last_date_index = len(dates)-1

    # find the first date index in dates
    first_date_window_index = 0
    while dates[first_date_window_index] < first_date_window.strftime('%Y-%m-%d'):
        first_date_window_index+=1
    
    # find the last date window index in dates
    last_date_window_index = last_date_index
    while dates[last_date_window_index] > last_date_window.strftime('%Y-%m-%d'):
        last_date_window_index-=1
    
    window_dates = dates[last_date_window_index:]
    
    breaks_list = []
    magnitudes_list = []
    results = []
    # progress bar setting
    with tqdm(total=last_date_index-last_date_window_index) as pbar:
        # loop applying BFAST
        while last_date_window_index <= last_date_index: 
            
            # changind date format
            start_monitor = datetime.fromisoformat(str(dates[first_date_window_index]))
            end_monitor = datetime.fromisoformat(str(dates[last_date_window_index]))
            
            # set params
            bfast_model, valid_data_f, dates_f = set_bfast_params(valid_data, dates, ts_indices_preserved, end_train, start_monitor, end_monitor)
            
            # execute BFAST
            breaks, magnitudes, means = execute_bfast(bfast_model, valid_data_f, dates_f);
            
            # append results
            breaks_list.append(breaks)
            magnitudes_list.append(magnitudes)
            results.append(organise_results(time_series_forest, dates_f, start_monitor, breaks, magnitudes))
            
            # increase by increment of 1 the window's edge indices (shift window to next date)
            first_date_window_index+=1
            last_date_window_index+=1
            
            # update progress bar
            pbar.update(1)
    
    return breaks_list, magnitudes_list, results, window_dates

# Plotting functions

In [10]:
def plot_dep(aoi, name, basemap='OSM', shapefile=''):
    """ Plots the area of interests with basemap
    Inputs :
        - aoi : the area of interest in BBOX format
        - name : name of the department. Lot or Var in this Notebook depending on the use case
        - basemap : 'OSM' -> Open Street Map basemap. Default
                    'GP' -> Géoportail basemap
        - shapefile : French department shapefile
    """
    # set figure size
    fig, ax = plt.subplots(figsize=(10,10))
    
    # read shapefile
    departements = gpd.read_file(shapefile)
    
    # set geometry to same CRS
    departements.geometry = departements.geometry.to_crs(aoi.crs.epsg)
    
    # find department shape
    departements[departements.nom==name].iloc[[0]].plot(ax=ax, alpha=0.3, color=None, edgecolor='k', linewidth = 1)
    
    # plot
    gpd.GeoDataFrame(geometry=[aoi.geometry], crs=aoi.crs.pyproj_crs()).plot(ax=ax, alpha=0.3, color='red', edgecolor='r', linewidth=3)
    if basemap=='GP':
        cx.add_basemap(ax=ax, crs=aoi.crs.epsg, source=cx.providers.GeoportailFrance.orthos)
    elif basemap=='OSM':
        cx.add_basemap(ax=ax, crs=aoi.crs.epsg, source=cx.providers.OpenStreetMap.Mapnik)

In [1]:
def plot_forest_sp(sp):
    """ Plots the forest superpixels
    Inputs :
        - sp : forest superpixels dataframe
    """
    
    fig, ax = plt.subplots(figsize=(15, 10))
    sp.geometry.plot(ax=ax, alpha=0.5, color='red', edgecolor='k', linewidth=1)
    cx.add_basemap(ax=ax, crs=sp.crs, source=cx.providers.GeoportailFrance.orthos)

In [5]:
def plot_magnitudes(results, time_series):
    """ Plots the magnitudes of each superpixel
    Inputs :
        - results : DataFrame where the result of BFAST have been stored
        - time_series : forest time series dataframe
    """
    
    fig, ax = plt.subplots(ncols=1, figsize=(15,10))
    divider = make_axes_locatable(ax)
    cax = make_axes_locatable(ax).append_axes('right', size='2%', pad=0.1)
    cbar = results.plot(ax=ax, column='norm_mag', cmap=cmaps.RdYlGn, legend=True, cax=cax)
    time_series.geometry.boundary.plot(ax=ax, color=None, edgecolor='grey', linewidth=0.2)
    cx.add_basemap(ax=ax, crs=time_series.crs, source=cx.providers.GeoportailFrance.orthos)

In [4]:
def plot_breakpoints(breakpoint_df, func='median'):
    """ Plots the breakpoints per dates and displays intensity changes (bar plot)
    Inputs :
        - breakpoint_df : DataFrame where the breakpoints have been grouped by dates
        - func : 'median', 'mean', 'min', or 'max'. Function applied to the magnitudes of time series detected abnormal at same date. Useful for the color gradient of the plot.
    """
    
    # prepare colormap
    min_ = breakpoint_df['magnitude_'+func].min()
    max_ = breakpoint_df['magnitude_'+func].max()
    colormap = (breakpoint_df['magnitude_'+func] - min_)/(max_ - min_)

    # plot graph
    fig, ax = plt.subplots(figsize=(12, 8))
    sn.barplot(ax=ax, x=breakpoint_df['breakpoint'], y=breakpoint_df['VALUE_count'], palette=plt.cm.Blues_r(colormap))
    plt.xticks(rotation=90)
    plt.tight_layout()
    ax.set(xlabel='Dates', ylabel='Number of breakpoints')

In [1]:
def plot_high_changing_sectors(eopatch, breakpoint_df, start_date, end_date, filenames, i, path):
    """ Plots high changing sectors by date of breakpoint appearance. Useful to build a GIF
    Inputs :
        - eopatch : eopatch of the area of interest
        - breakpoint_df : DataFrame where the breakpoints have been grouped by dates
        - start_date : start of monitoring date
        - end_date : end of breakpoint consideration date
        - filenames : list of filenames of all the plots to build the GIF
        - i : the file number
        - path : path where to save the plot
    Returns : 
        - filenames : list of filenames updated
    """
    
    palette_size = len(breakpoint_df.breakpoint)
    # build red palette
    red_palette = [clr.rgb2hex(plt.cm.Reds(i)) for i in range(0, plt.cm.Reds.N, round(plt.cm.Reds.N/(palette_size-1)) - 1)]
    
    start_date = start_date.strftime('%Y-%m-%d')
    end_date = end_date.strftime('%Y-%m-%d')
    
    # filter all superpixels detected abnormal betweet start and end date
    breakpoints = breakpoint_df.query("@start_date <= breakpoint <= @end_date")
    
    # plot
    fig, ax = plt.subplots(figsize=(12, 8))
    eopatch.vector_timeless['SUPER_PIXELS'].geometry.boundary.plot(ax=ax, color=None, edgecolor='black', linewidth=0.1)
    list_bp = list(breakpoints['breakpoint'])
    cmap = clr.ListedColormap([red_palette[b] for b in range(len(list_bp))])
    results.query('breakpoint in @list_bp').plot(ax=ax, column='breakpoint', categorical=True, cmap=cmap, legend=True)
    cx.add_basemap(ax=ax, crs=eopatch.bbox.crs.epsg, source=cx.providers.GeoportailFrance.orthos)
    
    # create name file and add it to the list
    filename = path + f'{i}.png'
    filenames.append(filename)
    
    # save frame
    plt.savefig(filename)
    plt.close()
    
    return filenames

In [None]:
def live_breaks(data, date, filenames, i, path, max_bp, figsize=(7,5), title='Breaks detected over time'):
    """ Plots curve of breaks detected over time. Useful to build a GIF
    Inputs :
        - data : list of the number of breaks detected over every sliding window
        - date : new date added to the sliding window
        - filenames : list of filenames of all the plots to build the GIF
        - i : the file number
        - path : path where to save the plot
        - figsize : figure size. Default (7,5)
        - title : title of the plot
    Returns : 
        - filenames : list of filenames updated
    """
    clear_output(wait=True)
    
    # create file and add it to the list
    filename = path + f'{i}.png'
    filenames.append(filename)
    
    # build figure
    plt.figure(figsize=figsize)
    plt.plot(data)
    # new date considered in sliding window
    plt.title(title + ' (' + date + ')')
    plt.grid(True)
    # maximum size of y axis
    plt.ylim(top=ceil(max_bp/1000)*1000)
    plt.xlabel('Time')
    plt.ylabel('Number of breaks detected')
    plt.savefig(filename)
    plt.close()
    
    return filenames

In [None]:
def live_mag(data, date, filenames, i, path, figsize=(7,5), title='', ):
    """ Plots magnitude of each superpixel. Useful to build a GIF
    Inputs :
        - data : list of results obtained after applying bfast_dynamic
        - date : new date added to the sliding window
        - filenames : list of filenames of all the plots to build the GIF
        - i : the file number
        - path : path where to save the plot
        - figsize : figure size. Default (7,5)
        - title : title of the plot
    Returns : 
        - filenames : list of filenames updated
    """
    clear_output(wait=True)
    # plot
    fig, ax = plt.subplots(figsize=figsize)
    cax = make_axes_locatable(ax).append_axes('right', size='2%', pad=0.1)
    time_series_forest.geometry.boundary.plot(ax=ax, color=None, edgecolor='grey', linewidth=0.2)
    cx.add_basemap(ax=ax, crs=time_series_forest.crs, source=cx.providers.GeoportailFrance.orthos)
    cbar = data.plot(ax=ax, column='norm_mag', cmap=cmaps.RdYlGn, legend=True, cax=cax)
    
    # create file and add it to the list
    filename = path + f'{i}.png'
    filenames.append(filename)
    
    # save frame
    plt.title('New date =' + date)
    plt.savefig(filename)
    plt.close()
    
    return filenames