In [19]:
import leafmap
from tqdm import tqdm
import datetime
import geopandas as gpd
import pandas as pd
import os

In [20]:
#Read the csv file
events_df = pd.read_csv('dateEventi.csv', sep=';')
#events_df.head(2)

# Create a dictionary with the event name as key and the date as value
event2date = events_df.set_index('Aligned name')['date'].to_dict()
#event2date

In [21]:
all_collections = leafmap.maxar_collections()
all_collections

['BayofBengal-Cyclone-Mocha-May-23',
 'Emilia-Romagna-Italy-flooding-may23',
 'Gambia-flooding-8-11-2022',
 'Hurricane-Fiona-9-19-2022',
 'Hurricane-Ian-9-26-2022',
 'Hurricane-Idalia-Florida-Aug23',
 'India-Floods-Oct-2023',
 'Indonesia-Earthquake22',
 'Kahramanmaras-turkey-earthquake-23',
 'Kalehe-DRC-Flooding-5-8-23',
 'Libya-Floods-Sept-2023',
 'Marshall-Fire-21-Update',
 'Maui-Hawaii-fires-Aug-23',
 'McDougallCreekWildfire-BC-Canada-Aug-23',
 'Morocco-Earthquake-Sept-2023',
 'NWT-Canada-Aug-23',
 'New-Zealand-Flooding22',
 'New-Zealand-Flooding23',
 'Sudan-flooding-8-22-2022',
 'afghanistan-earthquake22',
 'cyclone-emnati22',
 'ghana-explosion22',
 'kentucky-flooding-7-29-2022',
 'pakistan-flooding22',
 'shovi-georgia-landslide-8Aug23',
 'southafrica-flooding22',
 'tonga-volcano21',
 'volcano-indonesia21',
 'yellowstone-flooding22']

### Funzioni per controllare se le cartelle sono full

In [22]:
def get_true_img_set(collection_id, child_id):
    gdf = gpd.read_file(os.path.join('from_github_maxar_metadata/datasets/', collection_id + '.geojson'))
    gdf = gdf[gdf['catalog_id'] == child_id]
    return set(gdf['quadkey'].values)

### Funzioni per downloadare eventi

In [17]:
def get_pre_post_gdf_local(collection_id, event2date = event2date, local_gdf = True):
    
    #Retrieve the event date
    try:
        event_date = event2date[collection_id] #'2023-10-04'
    except:
        print("ERROR: Event date not found!!!")
        return None, None

    #Create the geodataframe
    if local_gdf:
        gdf = gpd.read_file(os.path.join('from_github_maxar_metadata/datasets/', collection_id + '.geojson'))
    else:
        gdf = gpd.GeoDataFrame()
        for child_id in tqdm(leafmap.maxar_child_collections(collection_id)):
            current_gdf = leafmap.maxar_items(
                collection_id = collection_id,
                child_id = child_id,
                return_gdf=True,
                assets=['visual'],
            )
        gdf = pd.concat([gdf, current_gdf])
        
    
    #Split the geodataframe
    pre_gdf = gdf[gdf['datetime'] < event_date]
    post_gdf = gdf[gdf['datetime'] >= event_date]

    print('Collection_id:',collection_id,'\nEvent date:', event_date)

    if pre_gdf.shape[0] + post_gdf.shape[0] == gdf.shape[0]:
        print("OK: All items are accounted for\n")
    else:
        print("ERROR: Some items are missing!!!\n")

    print("pre_gdf", pre_gdf.shape)
    print("post_gdf", post_gdf.shape)

    return pre_gdf, post_gdf

def download_event(collection_id, out_dir_root = "/mnt/data2/vaschetti_data/maxar/"):
    
    pre_gdf, post_gdf = get_pre_post_gdf_local(collection_id)
    if pre_gdf is None or post_gdf is None:
        return

    leafmap.maxar_download(pre_gdf['visual'].to_list(), out_dir = os.path.join(out_dir_root, collection_id, 'pre', ""))
    leafmap.maxar_download(post_gdf['visual'].to_list(), out_dir = os.path.join(out_dir_root, collection_id, 'post', ""))
    


In [23]:
event2date

{'afghanistan-earthquake22': '2022-06-21',
 'BayofBengal-Cyclone-Mocha-May-23': '2023-05-23',
 'cyclone-emnati22': '2022-02-22',
 'Emilia-Romagna-Italy-flooding-may23': '2023-05-23',
 'Gambia-flooding-8-11-2022': '2022-08-09',
 'Hurricane-Fiona-9-19-2022': '2022-09-19',
 'Hurricane-Ian-9-26-2022': '2022-09-26',
 'Hurricane-Idalia-Florida-Aug23': '2023-08-29',
 'India-Floods-Oct-2023': '2023-10-04',
 'Indonesia-Earthquake22': '2022-11-21',
 'Kahramanmaras-turkey-earthquake-23': '2023-02-06',
 'Kalehe-DRC-Flooding-5-8-23': '2023-05-08',
 'kentucky-flooding-7-29-2022': '2022-07-29',
 'Libya-Floods-Sept-2023': '2023-09-12',
 'Marshall-Fire-21-Update': '2022-01-01',
 'Maui-Hawaii-fires-Aug-23': '2023-08-08',
 'Morocco-Earthquake-Sept-2023': '2023-09-09',
 'New-Zealand-Flooding23': '2023-01-27',
 'NWT-Canada-Aug-23': '2023-08-16',
 'pakistan-flooding22': '2022-07-26',
 'shovi-georgia-landslide-8Aug23': '2023-08-05',
 'southafrica-flooding22': '2022-04-13',
 'Sudan-flooding-8-22-2022': '2022-

In [None]:
download_event('southafrica-flooding22')

### Funzioni non utilizzate downoladare evenetCella con tutte le funzioni per download

In [35]:
def isChildCorrectlyDownloaded_(collection_id, child_id, out_dir_root = "/mnt/data2/vaschetti_data/maxar/"):
    pre_gdf, post_gdf = get_pre_post_gdf(collection_id)
    if pre_gdf is None or post_gdf is None:
        return
    
    pre_childs = pre_gdf['catalog_id'].unique()
    post_childs = post_gdf['catalog_id'].unique()

    if child_id in pre_childs and child_id in post_childs:
        print('Child correctly downloaded')
        return True
    else:
        print('Child not correctly downloaded')
        return False

def get_pre_post_gdf_for_n_children_(collection_id, n = 1, event2date = event2date):
    
    out_dir_root = "/mnt/data2/vaschetti_data/maxar/"

    #Retrieve the event date
    try:
        event_date = event2date[collection_id] #'2023-10-04'
    except:
        print("ERROR: Event date not found!!!")
        return None, None

    #Create the geodataframe
    gdf = gpd.GeoDataFrame()
    counter = 0
    for child_id in tqdm(leafmap.maxar_child_collections(collection_id)):
        if counter == n:
            break
        #check if the child is entirely downloaded
        if 
        current_gdf = leafmap.maxar_items(
            collection_id = collection_id,
            child_id = child_id,
            return_gdf=True,
            assets=['visual'],
        )
        gdf = pd.concat([gdf, current_gdf])
        counter += 1

def download_event_(collection_id):
    out_dir_root = "/mnt/data2/vaschetti_data/maxar/"
    #if os.path.exists(os.path.join(out_dir_root, collection_id)):
    #    print('Already downloaded: ', collection_id)
    #    return

    pre_gdf, post_gdf = get_pre_post_gdf(collection_id)
    if pre_gdf is None or post_gdf is None:
        return
    
    leafmap.maxar_download(pre_gdf['visual'].to_list(), out_dir = os.path.join(out_dir_root, collection_id, 'pre', ""))
    leafmap.maxar_download(post_gdf['visual'].to_list(), out_dir = os.path.join(out_dir_root, collection_id, 'post', ""))

def reorganize_(collection_id):
    pre_gdf, post_gdf = get_pre_post_gdf(collection_id)

    pre_childs = pre_gdf['catalog_id'].unique()
    post_childs = post_gdf['catalog_id'].unique()

    print('Pre-event images:', pre_childs, '\nPost-event images:', post_childs)