In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from utils import load_and_preprocess_table_data

config = "no_resample_cloud_disturbance_weights_3Y"
data = load_and_preprocess_table_data(config)

# Appliquer le mapping des régions GRECO et des sources de données
from utils import mapping_real_greco, mapping_source

data['greco_region'] = data['greco_region'].map(mapping_real_greco)
data['source'] = data['source'].map(mapping_source)

# Configuration des paramètres de style
sns.set(style="whitegrid")

# Créer la figure combinée
fig, axes = plt.subplots(3, 2, figsize=(12, 16))

# Fonction pour configurer les graphiques avec échelle logarithmique et suppression des spines
def configure_plot(ax):
    ax.set_yscale('log')
    sns.despine(ax=ax, top=True, right=True)

# Palette de couleurs pour Deciduous (rouge) et Evergreen (vert)
phen_palette = {1: 'red', 2: 'green'}

# Nombre de pixels par greco_region
sns.countplot(data=data, x='greco_region', ax=axes[0, 0], order=sorted(data['greco_region'].unique()), alpha=0.75)
axes[0, 0].set_xticklabels(axes[0, 0].get_xticklabels(), rotation=45)
axes[0, 0].set_xlabel(None)
axes[0, 0].set_ylabel('Nombre de Pixels')
axes[0, 0].set_title('Nombre de Pixels par Région GRECO')
configure_plot(axes[0, 0])

# Nombre de pixels par source
sns.countplot(data=data, x='source', ax=axes[0, 1], order=sorted(data['source'].unique()), alpha=0.75)
axes[0, 1].set_xticklabels(axes[0, 1].get_xticklabels(), rotation=45)
axes[0, 1].set_xlabel(None)
axes[0, 1].set_ylabel('Nombre de Pixels')
axes[0, 1].set_title('Nombre de Pixels par Source de Données')
configure_plot(axes[0, 1])

# Nombre de pixels par source par greco_region
sns.countplot(data=data, x='greco_region', hue='source', ax=axes[1, 0], order=sorted(data['greco_region'].unique()), alpha=0.75)
axes[1, 0].set_xticklabels(axes[1, 0].get_xticklabels(), rotation=45)
axes[1, 0].set_xlabel(None)
axes[1, 0].set_ylabel('Nombre de Pixels')
axes[1, 0].set_title('Nombre de Pixels par Source de Données et Région GRECO')
axes[1, 0].legend(title='Source')
configure_plot(axes[1, 0])

# Nombre de pixels par phen par greco_region
sns.countplot(data=data, x='greco_region', hue='phen', ax=axes[1, 1], palette=phen_palette, order=sorted(data['greco_region'].unique()), alpha=0.75)
axes[1, 1].set_xticklabels(axes[1, 1].get_xticklabels(), rotation=45)
axes[1, 1].set_xlabel(None)
axes[1, 1].set_ylabel('Nombre de Pixels')
axes[1, 1].set_title('Nombre de Pixels par Phénologie et Région GRECO')
axes[1, 1].legend(title='Phénologie', labels=['Deciduous', 'Evergreen'])
configure_plot(axes[1, 1])

# Distribution globale des Phénologies
sns.countplot(data=data, x='phen', ax=axes[2, 0], palette=phen_palette, order=[1, 2], alpha=0.75)
axes[2, 0].set_xticklabels(['Deciduous', 'Evergreen'])
axes[2, 0].set_xlabel(None)
axes[2, 0].set_ylabel('Nombre de Pixels')
axes[2, 0].set_title('Distribution Globale des Phénologies')
axes[2, 0].set_ylim(1, 1e6)
configure_plot(axes[2, 0])

# Répartition des Phénologies par Source
sns.countplot(data=data, x='source', hue='phen', ax=axes[2, 1], palette=phen_palette, order=sorted(data['source'].unique()), alpha=0.75)
axes[2, 1].set_xticklabels(axes[2, 1].get_xticklabels(), rotation=45)
axes[2, 1].set_xlabel(None)
axes[2, 1].set_ylabel('Nombre de Pixels')
axes[2, 1].set_title('Répartition des Phénologies par Source')
axes[2, 1].legend(title='Phénologie', labels=['Deciduous', 'Evergreen'])
configure_plot(axes[2, 1])

plt.tight_layout()
plt.show()

fig.savefig(f'images/{config}_dataset_analysis.png', dpi=300, bbox_inches='tight')


# Figure for the article

In [None]:
# Example usage
import geopandas as gpd

#load the greco regions
greco = gpd.read_file('/Users/arthurcalvi/Data/eco-regions/France/ser_l93_new/ser_l93_new.dbf')
greco['greco'] = greco.codeser.apply(lambda x:x[0])
greco = greco.dissolve(by='greco', aggfunc='first')
greco = greco.reset_index().iloc[1:].to_crs('EPSG:2154')

#load the sampling tiles 
result_gdf = gpd.read_parquet("/Users/arthurcalvi/Data/species/validation/val_train_tiles.parquet")
#only keep the index that are in data['tile_id'] 
result_gdf = result_gdf[result_gdf.index.isin(data['tile_id'].astype(int))]
print(result_gdf.crs)

In [None]:
data.groupby('phen').greco_region.value_counts()

In [None]:
import matplotlib.pyplot as plt
import contextily as ctx
import geopandas as gpd
import os 

fig, ax = plt.subplots(1, 1, figsize=(7, 7))


greco.plot(ax=ax, column='NomSER', edgecolor='white', alpha=0.25, cmap='tab20', legend=True)
# Plot the GeoDataFrame with the specified colors
result_gdf.to_crs(greco.crs).plot(ax=ax, color='black', edgecolor='black')

# Add basemap and remove axis
ax.set_axis_off()

# Display the plot
plt.tight_layout()
plt.show()

# os.makedirs('images', exist_ok=True)
# fig.savefig('images/validation_data.png', dpi=300)


In [None]:
import rasterio
from utils import load_folder, normalize

dir_ = '/Users/arthurcalvi/Data/species/validation/tiles/'
tiles = os.listdir()
# for tile in [x for x in os.listdir(dir_) if os.path.isdir(os.path.join(dir_, x))]:
#     print(tile)
#     path = os.path.join(dir_, tile)
#     rgb = load_folder(os.path.join(path, 'rgb'))
#     path_reference = os.path.join(path, 'reference_species')
#     path_reference_species = os.path.join(path_reference, [x for x in os.listdir(path_reference) if x.endswith('.tif')][0])
#     raster = rasterio.open(path_reference_species).read(3)
#     plt.imshow(3*normalize(rgb[4].transpose(1,2,0)), interpolation='none')
#     #build cmap 1 = red, 2 = green
#     plt.imshow(raster.squeeze(), interpolation='none', alpha=0.5)
#     plt.grid(False)
#     plt.axis('off')
#     plt.colorbar()
#     plt.show()

tile = 'tile_79_20200102_20241230_Garrigues_validation'
path = os.path.join(dir_, tile)
rgb = load_folder(os.path.join(path, 'rgb'))
path_reference = os.path.join(path, 'reference_species')
path_reference_species = os.path.join(path_reference, [x for x in os.listdir(path_reference) if x.endswith('.tif')][0])
raster = rasterio.open(path_reference_species).read(3)
plt.imshow(3*normalize(rgb[4].transpose(1,2,0)), interpolation='none')
#build cmap 1 = red, 2 = green
plt.imshow(raster.squeeze(), interpolation='none', alpha=0.5)
plt.grid(False)
plt.axis('off')
plt.colorbar()
plt.show()

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import rasterio
import os
from utils import load_and_preprocess_table_data, load_folder, normalize, mapping_real_greco, mapping_source, greco_regions_fr_en
from matplotlib.colors import ListedColormap
import numpy as np
from mpl_toolkits.axes_grid1.inset_locator import inset_axes, mark_inset

# Function to plot the map
def plot_map(ax, greco, result_gdf):
    greco.plot(ax=ax, column='color', edgecolor='white', alpha=0.25, legend=False)
    result_gdf.plot(ax=ax, color='white', edgecolor='black')
    
    # Add region numbers
    for idx, row in greco.iterrows():
        ax.text(row.geometry.centroid.x, row.geometry.centroid.y, idx, color='black', fontsize=12, fontdict={'weight': 'bold'}, ha='center')
    
    # Add sample AOI legend using custom rectangles
    custom_legend = [plt.Rectangle((0, 0), 1, 1, color='white', ec='black', lw=1, alpha=0.75)]
    ax.legend(custom_legend, ['tiles'], loc='upper left', frameon=False)
    ax.set_axis_off()
    return ax

# Function to plot the cumulative bar plot
def plot_bar(ax, data, region_mapping):
    data['region_num'] = data['greco_region'].map(region_mapping)
    bar_data = data.groupby(['region_num', 'phen']).size().unstack(fill_value=0)
    bar_data.plot(kind='barh', stacked=False, ax=ax, color=['red', 'green'], logx=True, alpha=0.75)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.set_xlabel('Number of Pixels')
    ax.set_ylabel('Greco Region')
    ax.legend(['Deciduous', 'Evergreen'])
    ax.yaxis.grid(False)
    ax.xaxis.grid(True, linestyle='--', color='grey', alpha=0.7)
    return ax

# Function to plot the RGB image with deciduous and evergreen pixels
def plot_rgb(ax, dir_, tile):
    path = os.path.join(dir_, tile)
    rgb = load_folder(os.path.join(path, 'rgb'))
    path_reference = os.path.join(path, 'reference_species')
    path_reference_species = os.path.join(path_reference, [x for x in os.listdir(path_reference) if x.endswith('.tif') and x.startswith('tile')][0])

    raster = rasterio.open(path_reference_species).read(3).astype(float)
    raster[raster == 0] = np.nan

    # Create a custom colormap
    cmap = ListedColormap(['none', 'red', 'green'])
    norm = plt.Normalize(0, 2)

    ax.imshow(2 * normalize(rgb[4].transpose(1, 2, 0)), interpolation='none')
    ax.imshow(raster.squeeze(), cmap=cmap, interpolation='none', alpha=0.75, norm=norm)
    ax.grid(False)
    ax.axis('off')

    # Add legend with rectangles and white text
    legend_handles = [
        plt.Rectangle((0, 0), 1, 1, color='red', ec='white', lw=1, alpha=0.75),
        plt.Rectangle((0, 0), 1, 1, color='green', ec='white', lw=1, alpha=0.75)
    ]
    legend_labels = ['Deciduous', 'Evergreen']
    legend = ax.legend(handles=legend_handles, labels=legend_labels, loc='lower right', frameon=False)
    for text in legend.get_texts():
        text.set_color('white')

    return ax

# Load data
config = "no_resample_cloud_disturbance_weights_3Y"
data = load_and_preprocess_table_data(config)
data['greco_region'] = data['greco_region'].map(mapping_real_greco)
data['source'] = data['source'].map(mapping_source)

greco = gpd.read_file('/Users/arthurcalvi/Data/eco-regions/France/ser_l93_new/ser_l93_new.dbf')
greco['greco'] = greco.codeser.apply(lambda x: x[0])
greco = greco.dissolve(by='greco', aggfunc='first')
greco = greco.reset_index().iloc[1:].to_crs('EPSG:2154')
greco['greco_name'] = greco['NomSER'].map({k.replace('_', ' '): v for k, v in mapping_real_greco.items()})

# Assign colors explicitly
cmap = plt.get_cmap('tab20')
colors = [cmap(i % 20) for i in range(len(greco))]
greco['color'] = colors

result_gdf = gpd.read_parquet("/Users/arthurcalvi/Data/species/validation/val_train_tiles.parquet").to_crs(greco.crs)
result_gdf = result_gdf[result_gdf.index.isin(data['tile_id'].astype(int))]
result_gdf['greco_region'] = result_gdf['NomSER'].map(mapping_real_greco)

# Creating region mapping
region_mapping = {name: num for num, name in zip(greco.index, greco['greco_name'])}

# Creating the figure
fig = plt.figure(figsize=(12, 9))
gs = gridspec.GridSpec(9, 12, figure=fig)  # Increased the number of rows to 9

# Plotting the map
ax_map = fig.add_subplot(gs[:8, :8])
plot_map(ax_map, greco, result_gdf)
ax_map.set_title('A) Tile sampling', loc='center', fontsize=12, pad=10, fontweight='bold')

# Adding region mapping legend in a new axis at the bottom of the figure
ax_legend = fig.add_subplot(gs[8, :])
ax_legend.axis('off')

# Creating custom legend with explicit colors
region_legend = [f"{num}: {greco_regions_fr_en[name]}" for name, num in zip(greco['greco_name'], greco.index)]
handles = [plt.Line2D([0], [0], color='white', marker='o', linestyle='None', markersize=10) for color in greco['color']]
ax_legend.legend(handles=handles, labels=region_legend, loc='center', ncol=4, frameon=False)

# Plotting the cumulative bar plot
ax_bar = fig.add_subplot(gs[:4, 8:])
plot_bar(ax_bar, data, region_mapping)
ax_bar.set_title('B) Distribution of pixels across regions', loc='center', fontsize=12, pad=10, fontweight='bold')


# Plotting the RGB image
ax_rgb = fig.add_subplot(gs[4:8, 8:])
plot_rgb(ax_rgb, '/Users/arthurcalvi/Data/species/validation/tiles/', 'tile_79_20200102_20241230_Garrigues_validation')
ax_rgb.set_title('C) Reference data example', loc='center', fontsize=12, pad=10, fontweight='bold')

plt.tight_layout()
plt.show()
fig.savefig('images/validation_data.png', dpi=300, bbox_inches='tight')


In [None]:
greco['greco_name'].tolist()