In [1]:
import leafmap
from tqdm import tqdm
import datetime
import geopandas as gpd
import pandas as pd
import os
from pathlib import Path

if Path.cwd().name != 'src':
    os.chdir('./src')

In [2]:
#Read the csv file
events_df = pd.read_csv('../metadata/dateEventi.csv', sep=';')

# Create a dictionary with the event name as key and the date as value
event2date = events_df.set_index('Aligned name')['date'].to_dict()

In [4]:
all_collections = leafmap.maxar_collections()
all_collections

['BayofBengal-Cyclone-Mocha-May-23',
 'Emilia-Romagna-Italy-flooding-may23',
 'Gambia-flooding-8-11-2022',
 'Hurricane-Fiona-9-19-2022',
 'Hurricane-Ian-9-26-2022',
 'Hurricane-Idalia-Florida-Aug23',
 'Iceland-Volcano_Eruption-Dec-2023',
 'India-Floods-Oct-2023',
 'Indonesia-Earthquake22',
 'Japan-Earthquake-Jan-2024',
 'Kahramanmaras-turkey-earthquake-23',
 'Kalehe-DRC-Flooding-5-8-23',
 'Libya-Floods-Sept-2023',
 'Marshall-Fire-21-Update',
 'Maui-Hawaii-fires-Aug-23',
 'McDougallCreekWildfire-BC-Canada-Aug-23',
 'Morocco-Earthquake-Sept-2023',
 'NWT-Canada-Aug-23',
 'Nepal-Earthquake-Apr-2015',
 'Nepal-Earthquake-Nov-2023',
 'New-Zealand-Flooding23',
 'SmokeHouseCreek-Wildfires-Texas-Mar24',
 'Sudan-flooding-8-22-2022',
 'afghanistan-earthquake22',
 'cyclone-emnati22',
 'ghana-explosion22',
 'kentucky-flooding-7-29-2022',
 'pakistan-flooding22',
 'shovi-georgia-landslide-8Aug23',
 'southafrica-flooding22',
 'tonga-volcano21',
 'volcano-indonesia21',
 'yellowstone-flooding22']

### Funzioni per controllare se le cartelle sono full

In [12]:
def get_true_img_set(collection_id, child_id):
    gdf = gpd.read_file(os.path.join('../metadata/from_github_maxar_metadata/datasets/', collection_id + '.geojson'))
    gdf = gdf[gdf['catalog_id'] == child_id]
    return set(gdf['quadkey'].values)

### Funzioni per downloadare eventi

In [5]:
def get_pre_post_gdf_local(collection_id, event2date = event2date, local_gdf = True):
    
    #Retrieve the event date
    try:
        event_date = event2date[collection_id] #'2023-10-04'
    except:
        print("ERROR: Event date not found!!!")
        return None, None

    #Create the geodataframe
    if local_gdf:
        gdf = gpd.read_file(os.path.join('../metadata/from_github_maxar_metadata/datasets/', collection_id + '.geojson'))
    else:
        gdf = gpd.GeoDataFrame()
        for child_id in tqdm(leafmap.maxar_child_collections(collection_id)):
            current_gdf = leafmap.maxar_items(
                collection_id = collection_id,
                child_id = child_id,
                return_gdf=True,
                assets=['visual'],
            )
        gdf = pd.concat([gdf, current_gdf])
        
    
    #Split the geodataframe
    pre_gdf = gdf[gdf['datetime'] < event_date]
    post_gdf = gdf[gdf['datetime'] >= event_date]

    print('Collection_id:',collection_id,'\nEvent date:', event_date)

    if pre_gdf.shape[0] + post_gdf.shape[0] == gdf.shape[0]:
        print("OK: All items are accounted for\n")
    else:
        print("ERROR: Some items are missing!!!\n")

    print("pre_gdf", pre_gdf.shape)
    print("post_gdf", post_gdf.shape)

    return pre_gdf, post_gdf

def download_event(collection_id, out_dir_root = "/nfs/projects/overwatch/maxar-segmentation/maxar-open-data/"):
    
    pre_gdf, post_gdf = get_pre_post_gdf_local(collection_id)
    if pre_gdf is None or post_gdf is None:
        return

    leafmap.maxar_download(pre_gdf['visual'].to_list(), out_dir = os.path.join(out_dir_root, collection_id, 'pre', ""))
    leafmap.maxar_download(post_gdf['visual'].to_list(), out_dir = os.path.join(out_dir_root, collection_id, 'post', ""))
    


In [None]:
event2date

In [7]:
download_event('New-Zealand-Flooding23')

Collection_id: New-Zealand-Flooding23 
Event date: 2023-01-27
OK: All items are accounted for

pre_gdf (23, 20)
post_gdf (14, 20)
/nfs/projects/overwatch/maxar-segmentation/maxar-open-data/New-Zealand-Flooding23/pre/10300100DE4D9300/213311212301.tif already exists. Skipping...
/nfs/projects/overwatch/maxar-segmentation/maxar-open-data/New-Zealand-Flooding23/pre/10300100DE4D9300/213311212303.tif already exists. Skipping...
/nfs/projects/overwatch/maxar-segmentation/maxar-open-data/New-Zealand-Flooding23/pre/10300100DE4D9300/213311212310.tif already exists. Skipping...
/nfs/projects/overwatch/maxar-segmentation/maxar-open-data/New-Zealand-Flooding23/pre/10300100DE4D9300/213311212311.tif already exists. Skipping...
/nfs/projects/overwatch/maxar-segmentation/maxar-open-data/New-Zealand-Flooding23/pre/10300100DE4D9300/213311212312.tif already exists. Skipping...
/nfs/projects/overwatch/maxar-segmentation/maxar-open-data/New-Zealand-Flooding23/pre/10300100DE4D9300/213311212313.tif already ex

### Funzioni non utilizzate downoladare event Cella con tutte le funzioni per download

In [35]:
def isChildCorrectlyDownloaded_(collection_id, child_id, out_dir_root = "/nfs/projects/overwatch/maxar-segmentation/maxar-open-data/"):
    pre_gdf, post_gdf = get_pre_post_gdf(collection_id)
    if pre_gdf is None or post_gdf is None:
        return
    
    pre_childs = pre_gdf['catalog_id'].unique()
    post_childs = post_gdf['catalog_id'].unique()

    if child_id in pre_childs and child_id in post_childs:
        print('Child correctly downloaded')
        return True
    else:
        print('Child not correctly downloaded')
        return False

def get_pre_post_gdf_for_n_children_(collection_id, n = 1, event2date = event2date):
    
    out_dir_root = "/nfs/projects/overwatch/maxar-segmentation/maxar-open-data/"

    #Retrieve the event date
    try:
        event_date = event2date[collection_id] #'2023-10-04'
    except:
        print("ERROR: Event date not found!!!")
        return None, None

    #Create the geodataframe
    gdf = gpd.GeoDataFrame()
    counter = 0
    for child_id in tqdm(leafmap.maxar_child_collections(collection_id)):
        if counter == n:
            break
        #check if the child is entirely downloaded
        if 
        current_gdf = leafmap.maxar_items(
            collection_id = collection_id,
            child_id = child_id,
            return_gdf=True,
            assets=['visual'],
        )
        gdf = pd.concat([gdf, current_gdf])
        counter += 1

def download_event_(collection_id):
    out_dir_root = "/nfs/projects/overwatch/maxar-segmentation/maxar-open-data/"
    #if os.path.exists(os.path.join(out_dir_root, collection_id)):
    #    print('Already downloaded: ', collection_id)
    #    return

    pre_gdf, post_gdf = get_pre_post_gdf(collection_id)
    if pre_gdf is None or post_gdf is None:
        return
    
    leafmap.maxar_download(pre_gdf['visual'].to_list(), out_dir = os.path.join(out_dir_root, collection_id, 'pre', ""))
    leafmap.maxar_download(post_gdf['visual'].to_list(), out_dir = os.path.join(out_dir_root, collection_id, 'post', ""))

def reorganize_(collection_id):
    pre_gdf, post_gdf = get_pre_post_gdf(collection_id)

    pre_childs = pre_gdf['catalog_id'].unique()
    post_childs = post_gdf['catalog_id'].unique()

    print('Pre-event images:', pre_childs, '\nPost-event images:', post_childs)