# Essais pour enlever les gros bâtiments des masques BDTOPO

## Packages et chargement des données

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%reload_ext autoreload

In [137]:
import sys
sys.path.append('../src')
from satellite_image import SatelliteImage
from utils import *
from plot_utils import *
from mappings import *
from labeler import (RIL_BDTOPOLabeler, BDTOPOLabeler, RILLabeler)
from labeled_satellite_image import SegmentationLabeledSatelliteImage

In [5]:
import s3fs
import matplotlib.pyplot as plt
import re
import pandas as pd

In [76]:
from scipy.ndimage import label
import geopandas as gpd
from rasterio.features import rasterize, shapes
from shapely.geometry import Polygon, box
from sklearn.cluster import KMeans

In [6]:
update_storage_access()

environment = get_environment()

root_path = get_root_path()

bucket = environment["bucket"]

path_s3_pleiades_data_2022_guyane = environment["sources"]["PLEIADES"][2022]["guyane"]
path_local_pleiades_data_2022_guyane = environment["local-path"]["PLEIADES"][2022]["guyane"]

path_local_bdtopo_data_guyane = environment["local-path"]["BDTOPO"][2022]["guyane"]
path_s3_bdtopo_data_guyane = environment["sources"]["BDTOPO"][2022]["guyane"]

#martinique 2022
path_s3_pleiades_data_2022_martinique = environment["sources"]["PLEIADES"][2022]["martinique"]
path_local_pleiades_data_2022_martinique = environment["local-path"]["PLEIADES"][2022]["martinique"]

path_local_bdtopo_data_martinique = environment["local-path"]["BDTOPO"][2022]["martinique"]
path_s3_bdtopo_data_martinique = environment["sources"]["BDTOPO"][2022]["martinique"]

fs = s3fs.S3FileSystem(client_kwargs={"endpoint_url": "https://minio.lab.sspcloud.fr"})



In [122]:
fs.download(
        rpath=f"{bucket}/{path_s3_pleiades_data_2022_guyane}",
        lpath=f"../{path_local_pleiades_data_2022_guyane}",
        recursive=True)

fs.download(
        rpath=f"{bucket}/{path_s3_bdtopo_data_guyane}",
        lpath=f"../{path_local_bdtopo_data_guyane}",
        recursive=True
)

In [7]:
# DL PLEIADE martinique 2022
fs.download(
        rpath=f"{bucket}/{path_s3_pleiades_data_2022_martinique}",
        lpath=f"../{path_local_pleiades_data_2022_martinique}",
        recursive=True)

In [8]:
fs.download(
        rpath=f"{bucket}/{path_s3_bdtopo_data_martinique}",
        lpath=f"../{path_local_bdtopo_data_martinique}",
        recursive=True
)

## Image et chargement du RIL et de BDTOPO 

In [132]:
#mont baduel
from image_utils import *
find_image_of_point([4.918313, -52.303234], '../data/PLEIADES/2022/GUYANE', coord_gps = True)

In [148]:
#filename = '../data/PLEIADES/2022/GUYANE/ORT_2022072050325085_0353_0545_U22N_16Bits.jp2'
filename = '../data/PLEIADES/2022/GUYANE/ORT_2022072050325085_0355_0544_U22N_16Bits.jp2'
date2022 = date.fromisoformat('2022-01-01')

In [149]:
image = SatelliteImage.from_raster(
        filename,
        date = date2022, 
        n_bands = 4,
        dep = '973'
    )
image.normalize()

In [150]:
labeler_RIL = RILLabeler(date2022, '973')

In [153]:
labeler_BDTOPO = BDTOPOLabeler(date2022, '973')

In [430]:
labeler_RIL_1 = RILLabeler(date2022, '972')

In [431]:
labeler_BDTOPO_1 = BDTOPOLabeler(date2022, '972')

## Exploration des colonnes du RIL pour trouver s'ils précisent bidonville ou non

In [375]:
labeler_RIL.labeling_data.columns

In [381]:
type_ril = list(labeler_RIL.labeling_data["ea_type"])
np.unique(type_ril)

In [310]:
if labeler_RIL.labeling_data.crs != image.crs:
    labeler_RIL.labeling_data.geometry = labeler_RIL.labeling_data.geometry.to_crs(
        image.crs
    )

# Filtering geometries from RIL
xmin, ymin, xmax, ymax = image.bounds
patch = labeler_RIL.labeling_data.cx[xmin:xmax, ymin:ymax].copy()

patch.geometry = patch.geometry.buffer(
    labeler_RIL.buffer_size, cap_style=labeler_RIL.cap_style
)

In [312]:
type_ril = list(patch["ea_type"])
np.unique(type_ril)

### On affiche le masque RIL uniquement pour les bâtiments type HABITATION

In [313]:
patch1 = patch[patch["ea_type"] == 'HABIT']

In [314]:
if patch1.empty:
    rasterized = np.zeros(image.array.shape[1:])
else:
    rasterized = rasterize(
        patch1.geometry,
        out_shape=image.array.shape[1:],
        fill=0,
        out=None,
        transform=image.transform,
        all_touched=True,
        default_value=1,
        dtype=None,
    )

In [315]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(np.transpose(image.array, (1, 2, 0))[:,:,:3])
ax.imshow(rasterized, alpha=0.3)

## Exploration des colonnes de BDTOPO pour trouver s'ils précisent bidonville ou non -> filtrage sur la hauteur des bâtiments

In [320]:
labeler_BDTOPO.labeling_data

In [323]:
type_bdtopo = list(labeler_BDTOPO.labeling_data['USAGE1'])
np.unique(type_bdtopo)

In [326]:
if labeler_BDTOPO.labeling_data.crs != image.crs:
    labeler_BDTOPO.labeling_data.geometry = labeler_BDTOPO.labeling_data.geometry.to_crs(
        image.crs
    )

# Filtering geometries from RIL
xmin, ymin, xmax, ymax = image.bounds
patch = labeler_BDTOPO.labeling_data.cx[xmin:xmax, ymin:ymax].copy()


### On prend les bâtiments étiquetés comme Indifférencié ou Résidentiel

In [337]:
patch1 = patch[patch['USAGE1'] == 'Indifférencié'] 
patch11 = patch[patch['USAGE1'] == 'Résidentiel']

In [334]:
patch2 = pd.concat([patch1, patch11], ignore_index=True)

In [396]:
if patch2.empty:
    rasterized = np.zeros(image.array.shape[1:])
else:
    rasterized = rasterize(
        patch2.geometry,
        out_shape=image.array.shape[1:],
        fill=0,
        out=None,
        transform=image.transform,
        all_touched=True,
        default_value=1,
        dtype=None,
    )

In [398]:
patch2.columns

### Clustering sur la colonne hauteur pour trouver sur quelle hauteur seuiller

In [401]:
# Préparer les données pour le clustering
X = patch2['HAUTEUR'].values.reshape(-1, 1)

# Initialiser l'algorithme de clustering KMeans
kmeans = KMeans(n_clusters=4, n_init = 'auto', random_state=0)

# Appliquer le clustering aux tailles de polygons
kmeans.fit(X)

# Obtenir les étiquettes de cluster pour chaque taille de polygon
labels = kmeans.predict(X)

# Ajouter les étiquettes de cluster à la colonne correspondante du Geodataframe
patch2['cluster'] = labels

In [402]:
fig, ax = plt.subplots(figsize=(10, 10))
patch2.plot(ax=ax, column='cluster', cmap='jet', legend = True)

On choisit le seuil du 2e cluster : on prend la hauteur maximale de ce groupe.

In [403]:
max(patch2[patch2['cluster'] == 1]['HAUTEUR'])

In [404]:
patch_petite_hab = patch2[patch2['HAUTEUR'] <= 7.0]

In [405]:
fig, ax = plt.subplots(figsize=(10, 10))
patch_petite_hab.plot(ax=ax)

## Lecture des masques Label Studio

In [302]:
from gestion_label_studio import creer_mask_from_label_studio_export
creer_mask_from_label_studio_export(
    num_task = "1",
    type_label = 'Petites habitations',
    emplacement_zip = "../labelstudio.zip",
    name_output = "bibi"
)



## Tests masque fusion RIL_BDTOPO (pas bon masque mais on garde les essais)

In [126]:
labeler_RIL_BDTOPO = RIL_BDTOPOLabeler(date2022, '973')

In [135]:
mask_RIL_BDTOPO = labeler_RIL_BDTOPO.create_segmentation_label(image)

In [136]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(np.transpose(image.array, (1, 2, 0))[:,:,:3])
ax.imshow(mask_RIL_BDTOPO, alpha=0.3)

Il y a un gros problème car les bidonvilles disparaissent. En effet, sur le RIL les bidonvilles sont marqués avec un point et donc il est compliqué de récuperer tous les masques BDTOPO du bidonville si on n'a qu'un point RIL !!!!!

In [407]:
filename_1 = '../data/PLEIADES/2022/MARTINIQUE/ORT_2022_0711_1619_U20N_8Bits.jp2'
date_1 = date.fromisoformat('2022-01-01')

In [408]:
image_1 = SatelliteImage.from_raster(
        filename_1,
        date = date_1, 
        n_bands = 3,
        dep = '972'
    )
image_1.normalize()

In [11]:
image_1.plot([0,1,2])

In [None]:
labeler_RIL_BDTOPO_1 = RIL_BDTOPOLabeler(date_1, '972')

In [None]:
mask_RIL_BDTOPO_1 = labeler_RIL_BDTOPO_1.create_segmentation_label(image_1)

In [111]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(np.transpose(image_1.array, (1, 2, 0))[:,:,:3])
ax.imshow(mask_RIL_BDTOPO_1, alpha=0.3)

## Tests de clustering sur la taille des amas de pixels des polygones du masque BDTOPO (ne peut pas marcher mais on garde les essais)

In [432]:
mask_BDTOPO_1 = labeler_BDTOPO_1.create_segmentation_label(image_1)

In [433]:
height, width = image_1.array.shape[1:]

# Create a list of polygons from the masked center clouds in order
# to obtain a GeoDataFrame from it
polygon_list = []
for shape in list(shapes(mask_BDTOPO_1)):
    polygon = Polygon(shape[0]["coordinates"][0])
    if polygon.area > 0.85 * height * width:
        continue
    polygon_list.append(polygon)

gdf = gpd.GeoDataFrame(geometry=polygon_list)

In [440]:
# Calculer la taille de chaque polygon
gdf['polygon_size'] = gdf.geometry.area

# Préparer les données pour le clustering
X = gdf['polygon_size'].values.reshape(-1, 1)

# Initialiser l'algorithme de clustering KMeans
kmeans = KMeans(n_clusters=5, n_init = 'auto', random_state=0)

# Appliquer le clustering aux tailles de polygons
kmeans.fit(X)

# Obtenir les étiquettes de cluster pour chaque taille de polygon
labels = kmeans.predict(X)

# Ajouter les étiquettes de cluster à la colonne correspondante du Geodataframe
gdf['cluster'] = labels

In [441]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(np.transpose(image_1.array, (1, 2, 0))[:,:,:3])
gdf.plot(column='cluster', cmap='jet', ax=ax)

On prend comme seuil le max du premier cluster

In [442]:
max(gdf[gdf['cluster'] == 0]['polygon_size'])

In [443]:
gdf_petite_hab = gdf[gdf['size'] <= 2142.0]

In [444]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(np.transpose(image_1.array, (1, 2, 0))[:,:,:3])
gdf_petite_hab.plot(column='cluster', cmap='jet', ax=ax)

In [189]:
mask_BDTOPO = labeler_BDTOPO.create_segmentation_label(image)

In [213]:
def BD_TOPO_petite_hab(image, mask_BDTOPO) :
    height, width = image.array.shape[1:]

    # Create a list of polygons from the masked center clouds in order
    # to obtain a GeoDataFrame from it
    polygon_list = []
    for shape in list(shapes(mask_BDTOPO)):
        polygon = Polygon(shape[0]["coordinates"][0])
        # if polygon.area > 0.85 * height * width:
        #     continue
        polygon_list.append(polygon)

    gdf = gpd.GeoDataFrame(geometry=polygon_list)

    # size amas
    gdf['polygon_size'] = gdf.geometry.area
    X = gdf['polygon_size'].values.reshape(-1, 1)
    gdf['size'] = X
    
    #threshold
    gdf_petite_hab = gdf[gdf['polygon_size'] <= 2142.0]
    
    # Rasterize the geometries into a numpy array
    if gdf_petite_hab.empty:
        rasterized = np.zeros(image.array.shape[1:])
    else:
        rasterized = rasterize(
            gdf_petite_hab.geometry,
            out_shape=image.array.shape[1:],
            fill=0,
            out=None,
            all_touched=True,
            default_value=1,
            dtype=None,
        )
    return rasterized

In [214]:
mask_filtred = BD_TOPO_petite_hab(image, mask_BDTOPO)

In [215]:
fig, ax = plt.subplots(figsize=(10, 10))
ax.imshow(np.transpose(image.array, (1, 2, 0))[:,:,:3])
ax.imshow(mask_filtred, alpha=0.3)

Ca ne marche pas car les polygones des bidonvilles englobent parfois de gros amas d'habitations de forturne et donc ont une grosse taille d'amas !!