In [None]:
import os
from tqdm import tqdm 
from datetime import datetime
dir_ = '/Users/arthurcalvi/Data/species/validation/tiles'
ts_length = []
force = True
for folder in tqdm(os.listdir(dir_)):
    if not os.path.isdir(os.path.join(dir_, folder)):
        continue
    path = os.path.join(dir_, folder)
    dates = [datetime.strptime(filename.split('_')[0], '%Y-%m-%d') for filename in os.listdir(os.path.join(path, 'rgb'))]
    dates.sort()
    #compute length of the time series
    time_series_length = (dates[-1] - dates[0]).days / 365.25 
    ts_length.append(time_series_length)

#histogram of the time series length
import matplotlib.pyplot as plt
plt.hist(ts_length)
    

We need to write features by fixing ts length : 1, 2 and 3. 

In [None]:
import os
from datetime import datetime, timedelta
import numpy as np
import rasterio
from tqdm import tqdm
from scipy.interpolate import interp1d
from utils import load_folder, fit_periodic_function, get_aspect, postprocess_cloud_mask, calculate_slope_with_dates

import numpy as np
import pandas as pd
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

def resample_time_series(data: np.ndarray, dates: list[datetime], resample_step_days: int = 28) -> tuple[np.ndarray, list[datetime]]:
    """Resample time series to a fixed time step using pandas."""
    print('Resampling time series...')
    start_date = dates[0]
    end_date = dates[-1]
    new_dates = pd.date_range(start=start_date, end=end_date, freq=f'{resample_step_days}D')
    
    # Flatten the data for easier handling with pandas
    n_samples, height, width = data.shape
    flat_data = data.reshape(n_samples, -1)
    
    # Create a pandas DataFrame for the time series
    df = pd.DataFrame(flat_data, index=pd.to_datetime(dates))
    
    # Resample the DataFrame
    df_resampled = df.reindex(pd.to_datetime(new_dates)).interpolate(method='linear', limit_direction='both').values
    
    # Reshape back to the original dimensions
    resampled_data = df_resampled.reshape(len(new_dates), height, width)
    
    return resampled_data, new_dates

# Example usage:
# resampled_data, new_dates = resample_time_series(data, dates)


def compute_features(folder_path: str, data, mask, dates, suffix: str, profile):
    """Compute and save amplitude, phase, and offset features."""
    print(f'Computing features for {suffix}...')
    amplitude_map, phase_map, offset_map = fit_periodic_function(data, mask, dates)

    amplitude_map = amplitude_map.astype(np.float16)
    phase_map = phase_map.astype(np.float16)
    offset_map = offset_map.astype(np.float16)

    profile.update(count=3, dtype=amplitude_map.dtype)
    with rasterio.open(os.path.join(folder_path, f'APO_{suffix}.tif'), 'w', **profile) as dst:
        dst.write(np.stack([amplitude_map, phase_map, offset_map], axis=0))

inverse_dfunc = {
    'rgb': lambda x: x / 65535 if x.dtype == np.uint16 else x,
    'ndvi': lambda x: 2 * (x - 0.5),
    'gndvi': lambda x: 2 * (x - 0.5),
    'ndwi': lambda x: 2 * (x - 0.5),
    'ndmi': lambda x: 2 * (x - 0.5),
    'nbr': lambda x: 2 * (x - 0.5),
    'ndre': lambda x: 2 * (x - 0.5),
    'evi': lambda x: 2 * (x - 0.5) / 2.5,  # for EVI, apply the inverse of the entire transformation
    'crswir': lambda x: x * 5,  # scale back crswir
}

def process_folder(folder: str, dir_: str, force: bool, indices: list[str] = ['rgb', 'crswir', 'ndvi', 'gndvi', 'evi', 'nbr']):
    """Process a single folder to compute features under different conditions."""
    path = os.path.join(dir_, folder)
    if os.path.isdir(path):
        if os.path.exists(os.path.join(path, 'features')) and not force:
            return
        
        try:
        # if True:
            print(f'Processing {folder}...')
            dates = [datetime.strptime(filename.split('_')[0], '%Y-%m-%d') for filename in os.listdir(os.path.join(path, 'rgb'))]
            dates.sort()
            dict_indices = {index: inverse_dfunc[index](load_folder(os.path.join(path, index))) for index in indices}

            # rgb = load_folder(os.path.join(path, 'rgb'))
            # crswir = load_folder(os.path.join(path, 'crswir'))
            # ndvi = load_folder(os.path.join(path, 'ndvi'))
            # gndvi = load_folder(os.path.join(path, 'gndvi'))
            # evi = load_folder(os.path.join(path, 'evi'))
            # nbr = load_folder(os.path.join(path, 'nbr'))
            qa = load_folder(os.path.join(path, 'qa'))

            dir_dem = os.path.join(path, 'dem.tif')
            dem = rasterio.open(dir_dem).read(1)
            aspect = get_aspect(dem)

            # Process QA mask
            new_qa = []
            for frame in tqdm(qa):
                frame_ = np.zeros_like(frame)
                mask = (frame == 1) | (frame == 3) | (frame == 6) | (frame == 8) | (frame == 9) | (frame == 10) | (frame == 11)
                frame_[mask] = 1
                frame_ = postprocess_cloud_mask(frame_, 5, 25)
                new_qa.append(frame_)
            qa_mask = np.array(new_qa)
            qa_mask = 1 - qa_mask

            # Calculate slopes for disturbance detection
            K = 6
            slopes = [calculate_slope_with_dates(dict_indices['rgb'][:, 0], dates, i, K) for i in tqdm(range(dict_indices['rgb'].shape[0]))]
            slopes = np.array(slopes)
            zero_mask = np.zeros_like(qa_mask[0], dtype=int)
            expanded_mask_with_disturbances = qa_mask.copy()
            for i in range(8, len(slopes)):
                zero_mask[abs(slopes[i]) > 10] = 1
                expanded_mask_with_disturbances[i, zero_mask.astype(bool)] = 0.001

            # Folder for features
            folder_path = os.path.join(path, 'features')
            os.makedirs(folder_path, exist_ok=True)

            with rasterio.open(dir_dem) as src:
                profile = src.profile


            conditions = [
                ("resampled_no_weights", True, False, False),
                ("no_resample_no_weights", False, False, False),
                ("no_resample_cloud_weights", False, True, False),
                ("no_resample_cloud_disturbance_weights", False, True, True)
            ]

            for suffix, resample, use_cloud_mask, use_disturbance_mask in conditions:

                for index in indices:
                    if index == 'r':
                        data = dict_indices['rgb'][:, 0, :, :]
                    elif index == 'g':
                        data = dict_indices['rgb'][:, 1, :, :]
                    elif index == 'b':
                        data = dict_indices['rgb'][:, 2, :, :]
                    else :
                        data = dict_indices[index]

                    if use_disturbance_mask:
                        mask = expanded_mask_with_disturbances
                    elif use_cloud_mask:
                        mask = qa_mask
                    else:
                        mask = np.ones_like(qa_mask)

                    if resample:
                        mask_, _ = resample_time_series(mask, dates)
                        data_, dates_ = resample_time_series(data, dates)
                    else : 
                        mask_ = mask
                        data_ = data
                        dates_ = dates

                    compute_features(folder_path, data_.astype(np.float16), mask_, dates_, f'{index}_{suffix}', profile)
          

        except Exception as e:
            print(f'Error processing {folder} : {e}')
            error_files.append(folder)

dir_ = '/Users/arthurcalvi/Data/species/validation/tiles'
error_files = []
force = True
for folder in tqdm(os.listdir(dir_)):
    process_folder(folder, dir_, force)

if error_files:
    print(f"Errors occurred in the following files: {error_files}")


In [None]:
import os
import pandas as pd 
from datetime import datetime, timedelta
import numpy as np
import rasterio
from tqdm import tqdm
from utils import load_folder, fit_periodic_function, get_aspect, postprocess_cloud_mask, calculate_slope_with_dates
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

def resample_time_series(data: np.ndarray, dates: list[datetime], resample_step_days: int = 28) -> tuple[np.ndarray, list[datetime]]:
    """Resample time series to a fixed time step using pandas."""
    print('Resampling time series...')
    start_date = dates[0]
    end_date = dates[-1]
    new_dates = pd.date_range(start=start_date, end=end_date, freq=f'{resample_step_days}D')
    
    # Flatten the data for easier handling with pandas
    n_samples, height, width = data.shape
    flat_data = data.reshape(n_samples, -1)
    
    # Create a pandas DataFrame for the time series
    df = pd.DataFrame(flat_data, index=pd.to_datetime(dates))
    
    # Resample the DataFrame
    df_resampled = df.reindex(pd.to_datetime(new_dates)).interpolate(method='linear', limit_direction='both').values
    
    # Reshape back to the original dimensions
    resampled_data = df_resampled.reshape(len(new_dates), height, width)
    
    return resampled_data, new_dates

def compute_features(folder_path: str, data, mask, dates, suffix: str, profile):
    """Compute and save amplitude, phase, and offset features."""
    print(f'Computing features for {suffix}...')
    amplitude_map, phase_map, offset_map = fit_periodic_function(data, mask, dates)

    profile.update(count=3, dtype=amplitude_map.dtype)
    with rasterio.open(os.path.join(folder_path, f'APO_{suffix}.tif'), 'w', **profile) as dst:
        dst.write(np.stack([amplitude_map, phase_map, offset_map], axis=0))

    # del amplitude_map, phase_map, offset_map
    # gc.collect()

def process_folder(folder: str, dir_: str, force: bool, disable: bool = True):
    """Process a single folder to compute features under different conditions."""
    path = os.path.join(dir_, folder)
    if os.path.isdir(path):
        if os.path.isdir(path):
            # Check if folder has already been processed
            marker_file = os.path.join(path, 'features', 'processed.marker')
            if os.path.exists(marker_file) and not force:
                print(f'Skipping {folder} as it has already been processed.')
                return
        
        try:
            print(f'Processing {folder}...')
            dates = [datetime.strptime(filename.split('_')[0], '%Y-%m-%d') for filename in os.listdir(os.path.join(path, 'rgb'))]
            dates.sort()
            rgb = load_folder(os.path.join(path, 'rgb'))
            crswir = load_folder(os.path.join(path, 'crswir'))
            qa = load_folder(os.path.join(path, 'qa'))

            dir_dem = os.path.join(path, 'dl_lc', 'dem.tif')
            # dem = rasterio.open(dir_dem).read(1)
            # aspect = get_aspect(dem)

            # Process QA mask
            new_qa = []
            for frame in tqdm(qa, disable=disable):
                frame_ = np.zeros_like(frame)
                mask = (frame == 1) | (frame == 3) | (frame == 6) | (frame == 8) | (frame == 9) | (frame == 10) | (frame == 11)
                frame_[mask] = 1
                frame_ = postprocess_cloud_mask(frame_, 5, 25)
                new_qa.append(frame_)
            qa_mask = np.array(new_qa)
            qa_mask = 1 - qa_mask
            # del new_qa
            # gc.collect()

            # Calculate slopes for disturbance detection
            K = 6
            slopes = [calculate_slope_with_dates(rgb[:, 0], dates, i, K) for i in tqdm(range(rgb.shape[0]))]
            slopes = np.array(slopes)
            zero_mask = np.zeros_like(qa_mask[0], dtype=int)
            expanded_mask_with_disturbances = qa_mask.copy()
            for i in range(6, len(slopes)):
                zero_mask[abs(slopes[i]) > 10] = 1
                expanded_mask_with_disturbances[i, zero_mask.astype(bool)] = 0.001

            # del slopes, zero_mask
            # gc.collect()

            # Folder for features
            folder_path = os.path.join(path, 'features')
            os.makedirs(folder_path, exist_ok=True)

            with rasterio.open(dir_dem) as src:
                profile = src.profile

            conditions = [
                ("resampled_no_weights", True, False, False),
                ("no_resample_no_weights", False, False, False),
                ("no_resample_cloud_weights", False, True, False),
                ("no_resample_cloud_disturbance_weights", False, True, True)
            ]

            year_lengths = [1, 2, 3]

            for suffix, resample, use_cloud_mask, use_disturbance_mask in conditions:
                for year_length in year_lengths:
                    end_date = dates[0] + timedelta(days=365 * year_length)
                    segment_indices = [i for i, date in enumerate(dates) if date < end_date]

                    if len(segment_indices) < 8:
                        continue  # Skip if there is not enough data

                    segment_dates = [dates[i] for i in segment_indices]
                    print(f'Processing {folder} for {year_length} years between {segment_dates[0]} and {segment_dates[-1]}...')

                    if use_disturbance_mask:
                        mask = expanded_mask_with_disturbances
                    elif use_cloud_mask:
                        mask = qa_mask
                    else:
                        mask = np.ones_like(qa_mask)

                    mask_segment = mask[segment_indices, :, :]
                    if resample:
                        mask_segment, _ = resample_time_series(mask_segment, segment_dates)
                        # segment_dates_ = [segment_dates[0] + timedelta(days=28 * i) for i in range(len(mask_segment))]
                    else:
                        segment_dates_ = segment_dates

                    for index in ['R', 'G', 'B', 'CRSWIR']:
                        if index == 'R':
                            data = rgb[:, 0, :, :]
                        elif index == 'G':
                            data = rgb[:, 1, :, :]
                        elif index == 'B':
                            data = rgb[:, 2, :, :]
                        elif index == 'CRSWIR':
                            data = crswir

                        data_segment = data[segment_indices, :, :]
                        if resample:
                            data_segment, segment_dates_ = resample_time_series(data_segment, segment_dates)

                        compute_features(folder_path, data_segment, mask_segment, segment_dates_, f'{index}_{suffix}_{year_length}Y', profile)

                        # del data_segment
                        # gc.collect()

            # Create marker file to indicate that the folder has been processed
            with open(marker_file, 'w') as f:
                f.write('Processed')

            # del rgb, crswir, qa, qa_mask, expanded_mask_with_disturbances
            # # gc.collect()

        except Exception as e:
            print(f'Error processing {folder} : {e}')
            error_files.append(folder)

dir_ = '/Users/arthurcalvi/Data/species/validation/tiles'
error_files = []
force = False
for folder in tqdm(os.listdir(dir_)):
    process_folder(folder, dir_, force)

if error_files:
    print(f"Errors occurred in the following files: {error_files}")
