# Get Features 

For each folder


In [None]:
dir_ = '/Users/arthurcalvi/Data/species/validation/tiles'

from utils import load_folder, fit_periodic_function, get_aspect, postprocess_cloud_mask, calculate_slope_with_dates
from datetime import datetime
import rasterio 
import os 
import numpy as np 
from tqdm import tqdm

error_files = []
force = True 
for folder in tqdm(os.listdir(dir_)):
    path = os.path.join(dir_, folder)
    if os.path.isdir(path):

        if os.path.exists(os.path.join(path, 'features')) and not force:
            continue
        
        try : 
            print(f'Processing {folder}...')
            #load data
            dates = [datetime.strptime(filename.split('_')[0], '%Y-%m-%d') for filename in os.listdir(os.path.join(path, 'rgb'))]
            dates.sort() 
            rgb = load_folder(os.path.join(path, 'rgb'))
            crswir = load_folder(os.path.join(path, 'crswir'))
            qa = load_folder(os.path.join(path, 'qa'))

            dir_dem = os.path.join(path, 'dl_lc', 'dem.tif')
            dem = rasterio.open(dir_dem).read(1)
            aspect = get_aspect(dem)

            new_qa = []
            for frame in tqdm(qa):
                frame_ = np.zeros_like(frame)
                mask = (frame == 1) | (frame == 3) | (frame == 6)| (frame == 8) | (frame == 9) | (frame == 10) | (frame == 11)
                frame_[mask] = 1
                #apply postprocess cloud mask
                frame_ = postprocess_cloud_mask(frame_, 5, 25)
                new_qa.append(frame_)

            qa_mask = np.array(new_qa)
            expanded_mask = 1 - qa_mask

            #do not take into account disturbed dates 
            from utils import calculate_slope_with_dates
            K = 6
            slopes = []
            for i in tqdm(range(rgb.shape[0])):
                slope = calculate_slope_with_dates(rgb[:, 0], dates, i, K ) 
                slopes.append(slope)

            slopes = np.array(slopes)

            #apply this to a 3d array 
            zero_mask = np.zeros_like(expanded_mask[0], dtype=int)
            for i in range(12, len(dates)):
                zero_mask[ abs(slopes[i]) > 10 ] = 1
                expanded_mask[i, zero_mask.astype(bool)] = 0.001

            #compute features 
            amplitude_map_r, phase_map_r, offset_map_r = fit_periodic_function(rgb[:, 0, :, :], expanded_mask, dates)
            amplitude_map_crswir, phase_map_crswir, offset_map_crswir = fit_periodic_function(crswir, expanded_mask, dates)

            rcc = rgb[:, 0, :, :] / (rgb[:, 0, :, :] + rgb[:, 1, :, :] + rgb[:, 2, :, :])
            amplitude_map_rcc, phase_map_rcc, offset_map_rcc = fit_periodic_function(rcc, expanded_mask, dates)

            #write features with rasterio in 3 different .tif files in a new folder called 'features' 
            #write dem and aspect 
            folder_path = os.path.join(path, 'features')
            os.makedirs(folder_path, exist_ok=True)

            with rasterio.open(dir_dem) as src:
                profile = src.profile
                profile.update(count=2, dtype=aspect.dtype)
                with rasterio.open(os.path.join(folder_path, 'elevation_aspect.tif'), 'w', **profile) as dst:
                    dst.write(np.stack([dem, aspect], axis=0))

            #write features of r channel 
            with rasterio.open(dir_dem) as src:
                profile = src.profile
                profile.update(count=3, dtype=amplitude_map_r.dtype)
                with rasterio.open(os.path.join(folder_path, 'r_APO.tif'), 'w', **profile) as dst:
                    dst.write(np.stack([amplitude_map_r, phase_map_r, offset_map_r], axis=0))

            #write features of crswir channel
            with rasterio.open(dir_dem) as src:
                profile = src.profile
                profile.update(count=3, dtype=amplitude_map_crswir.dtype)
                with rasterio.open(os.path.join(folder_path, 'crswir_APO.tif'), 'w', **profile) as dst:
                    dst.write(np.stack([amplitude_map_crswir, phase_map_crswir, offset_map_crswir], axis=0))

            #write features of rcc channel
            with rasterio.open(dir_dem) as src:
                profile = src.profile
                profile.update(count=3, dtype=amplitude_map_rcc.dtype)
                with rasterio.open(os.path.join(folder_path, 'rcc_APO.tif'), 'w', **profile) as dst:
                    dst.write(np.stack([amplitude_map_rcc, phase_map_rcc, offset_map_rcc], axis=0))

        except Exception as e:
            print(f'Error processing {folder} : {e}')
            error_files.append(folder)
            continue


# Ablation study 

In [None]:
import os
from datetime import datetime, timedelta
import numpy as np
import rasterio
from tqdm import tqdm
from scipy.interpolate import interp1d
from utils import load_folder, fit_periodic_function, get_aspect, postprocess_cloud_mask, calculate_slope_with_dates

import numpy as np
import pandas as pd
from datetime import datetime, timedelta

def resample_time_series(data: np.ndarray, dates: list[datetime], resample_step_days: int = 28) -> tuple[np.ndarray, list[datetime]]:
    """Resample time series to a fixed time step using pandas."""
    print('Resampling time series...')
    start_date = dates[0]
    end_date = dates[-1]
    new_dates = pd.date_range(start=start_date, end=end_date, freq=f'{resample_step_days}D')
    
    # Flatten the data for easier handling with pandas
    n_samples, height, width = data.shape
    flat_data = data.reshape(n_samples, -1)
    
    # Create a pandas DataFrame for the time series
    df = pd.DataFrame(flat_data, index=pd.to_datetime(dates))
    
    # Resample the DataFrame
    df_resampled = df.reindex(pd.to_datetime(new_dates)).interpolate(method='linear', limit_direction='both').values
    
    # Reshape back to the original dimensions
    resampled_data = df_resampled.reshape(len(new_dates), height, width)
    
    return resampled_data, new_dates

# Example usage:
# resampled_data, new_dates = resample_time_series(data, dates)


def compute_features(folder_path: str, data, mask, dates, suffix: str, profile):
    """Compute and save amplitude, phase, and offset features."""
    print(f'Computing features for {suffix}...')
    amplitude_map, phase_map, offset_map = fit_periodic_function(data, mask, dates)

    profile.update(count=3, dtype=amplitude_map.dtype)
    with rasterio.open(os.path.join(folder_path, f'APO_{suffix}.tif'), 'w', **profile) as dst:
        dst.write(np.stack([amplitude_map, phase_map, offset_map], axis=0))

def process_folder(folder: str, dir_: str, force: bool):
    """Process a single folder to compute features under different conditions."""
    path = os.path.join(dir_, folder)
    if os.path.isdir(path):
        if os.path.exists(os.path.join(path, 'features')) and not force:
            return
        
        try:
        # if True:
            print(f'Processing {folder}...')
            dates = [datetime.strptime(filename.split('_')[0], '%Y-%m-%d') for filename in os.listdir(os.path.join(path, 'rgb'))]
            dates.sort()
            rgb = load_folder(os.path.join(path, 'rgb'))
            crswir = load_folder(os.path.join(path, 'crswir'))
            qa = load_folder(os.path.join(path, 'qa'))

            dir_dem = os.path.join(path, 'dl_lc', 'dem.tif')
            dem = rasterio.open(dir_dem).read(1)
            aspect = get_aspect(dem)

            # Process QA mask
            new_qa = []
            for frame in tqdm(qa):
                frame_ = np.zeros_like(frame)
                mask = (frame == 1) | (frame == 3) | (frame == 6) | (frame == 8) | (frame == 9) | (frame == 10) | (frame == 11)
                frame_[mask] = 1
                frame_ = postprocess_cloud_mask(frame_, 5, 25)
                new_qa.append(frame_)
            qa_mask = np.array(new_qa)
            qa_mask = 1 - qa_mask

            # Calculate slopes for disturbance detection
            K = 6
            slopes = [calculate_slope_with_dates(rgb[:, 0], dates, i, K) for i in tqdm(range(rgb.shape[0]))]
            slopes = np.array(slopes)
            zero_mask = np.zeros_like(qa_mask[0], dtype=int)
            expanded_mask_with_disturbances = qa_mask.copy()
            for i in range(12, len(dates)):
                zero_mask[abs(slopes[i]) > 10] = 1
                expanded_mask_with_disturbances[i, zero_mask.astype(bool)] = 0.001

            # Folder for features
            folder_path = os.path.join(path, 'features')
            os.makedirs(folder_path, exist_ok=True)

            with rasterio.open(dir_dem) as src:
                profile = src.profile


            conditions = [
                ("resampled_no_weights", True, False, False),
                ("no_resample_no_weights", False, False, False),
                ("no_resample_cloud_weights", False, True, False),
                ("no_resample_cloud_disturbance_weights", False, True, True)
            ]

            for suffix, resample, use_cloud_mask, use_disturbance_mask in conditions:

                for index in ['R', 'CRSWIR', 'RCC']:
                    data = rgb[:, 0, :, :] if index.startswith('R') else crswir
                    if index == 'RCC':
                        epsilon = 1e-10
                        # Perform the division while preventing division by zero
                        denominator = rgb[:, 0, :, :] + rgb[:, 1, :, :] + rgb[:, 2, :, :]
                        data = rgb[:, 0, :, :] / (denominator + epsilon)

                    if use_disturbance_mask:
                        mask = expanded_mask_with_disturbances
                    elif use_cloud_mask:
                        mask = qa_mask
                    else:
                        mask = np.ones_like(qa_mask)

                    if resample:
                        mask_, _ = resample_time_series(mask, dates)
                        data_, dates_ = resample_time_series(data, dates)
                    else : 
                        mask_ = mask
                        data_ = data
                        dates_ = dates

                    compute_features(folder_path, data_, mask_, dates_, f'{index}_{suffix}', profile)
          

        except Exception as e:
            print(f'Error processing {folder} : {e}')
            print(suffix, resample, use_cloud_mask, use_disturbance_mask)
            print(mask.shape, data.shape, len(dates))
            error_files.append(folder)

dir_ = '/Users/arthurcalvi/Data/species/validation/tiles'
error_files = []
force = True
for folder in tqdm(os.listdir(dir_)):
    process_folder(folder, dir_, force)

if error_files:
    print(f"Errors occurred in the following files: {error_files}")
