# GEDI Data processing pipeline

In [2]:
# Convenient jupyter setup
%load_ext autoreload
%autoreload 2

## Pipeline-steps:

1. Download the zip-file
2. Unzip the file
3. For each granule in the unzipped file
    - Load the hdf5 file into a GEDI object
    - Extract the main data
    - Save the main data
    - Close the object (delete from RAM)
4. Upload file granule to GCloud
5. Remove that granule
6. Remove zipped file

### Step 1: Download the file

In [3]:
from src.constants import GEDI_L2A_PATH
from src.utils.unzip import unzip
from src.utils.os import list_content
import os
import pathlib
save_dir = GEDI_L2A_PATH/ "v002" / "amazon_basin"

In [25]:
import subprocess
%%time
save_dir = GEDI_L2A_PATH/ "v002" / "amazon_basin"

# change working directory
#os.chdir(save_dir)
# Download the data
#subprocess.call(["wget", "https://e4ftl01.cr.usgs.gov/ops/esir/52374.zip?1"])

UsageError: Line magic function `%%time` not found.


### Step 2: Unzip the file

In [5]:
zip_path = list_content(save_dir)[0]
order_number = zip_path.name.split('.')[0]
zip_number = zip_path.name.split("?")[1]
zip_path

PosixPath('/gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/52365.zip?1')

In [None]:
%%time
tmp_unzip_path = save_dir / f"unzip{order_number}-{zip_number}"
tmp_unzip_path.mkdir(parents=True, exist_ok=True)
unzip(zip_path, out_path=tmp_unzip_path, remove_archive_name=False)

### Step 3: Process the files in the zip

In [24]:
from src.data.gedi_granule import GediGranule
import geopandas as gpd
import pandas as pd

In [71]:
granules = 
granule_name = granule.file_path.name.split('.')[0]

In [70]:
granule = GediGranule(granule_path)

NameError: name 'granule_path' is not defined

In [57]:
%%time
# Gather data for whole granule into one table
granule_data = []
for beam in granule.iter_beams():
    granule_data.append(beam.main_data)
granule_data = pd.concat(granule_data, ignore_index=True)

CPU times: user 17.9 s, sys: 4.52 s, total: 22.4 s
Wall time: 24.4 s


In [68]:
%%time
save_name = save_dir / f"order{order_number}/{granule_name}.gpkg"
save_name.parent.mkdir(exist_ok=True, parents=True)
granule_data.to_file(save_name, driver="GPKG")

CPU times: user 1min 50s, sys: 7.15 s, total: 1min 57s
Wall time: 2min 12s


### Step 4: Upload granule to GCloud

In [None]:
#skip

### Step 5: Remove granule

In [69]:
# From RAM
del granule_data
del beam
del granule

In [72]:
import shutil
# From disk
shutil.rmtree(tmp_unzip_path)
shutil.rmtree(zip_path)

# Combined:

### Orders

In [3]:
orders = {
    52374: 83,  # order number, order length
    52375: 81,
    52376: 138,
    52377: 88,
    52378: 87,
    52379: 152,
    52380: 104,
    52381: 104,
    52382: 127
}

In [22]:
for order in orders.keys():
    download_path = pathlib.Path(save_dir) / f"download_{order}"
    download_path.mkdir(exist_ok=True, parents=True)
    
    with open(download_path / "README.txt", "w") as file:
        text = "\n".join([generate_link(order, zip_number) for zip_number in range(1, orders[order]+1)])
        file.write(text)

### Download script

In [4]:
import subprocess
import os
import threading
import concurrent
from tqdm.autonotebook import tqdm
from src.constants import GEDI_L2A_PATH

def generate_link(order_number, zip_number):
    return f"https://e4ftl01.cr.usgs.gov/ops/esir/{order_number}.zip?{zip_number}"

def wget_file(link):
    try:
        subprocess.call(["wget", link])
        return True
    except Exception as e:
        print(e)
        return False

def download_gedi_order(order_number, n_zips, max_threads, save_dir):
    
    # change working directory
    os.chdir(save_dir)
    
    # set up all links to download
    links = [generate_link(order_number, zip_number) for zip_number in range(1, n_zips+1)]
    results = {}
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_threads, 
                                               thread_name_prefix=f"Order_{order_number}") as executor:
        futures = {executor.submit(wget_file, link):link for link in links}
        
        for future in tqdm(concurrent.futures.as_completed(futures), total=len(links)):
            try:
                name = futures[future]
                was_successful = future.result()
                print(f"{name} successful: {was_successful}")
                results[name] = was_successful
            except Exception as e:
                print(e)
            
    return results

In [18]:
download_gedi_order(52374, 83, max_threads=10, save_dir=save_dir)

  0%|          | 0/83 [00:00<?, ?it/s]

AttributeError: 'bool' object has no attribute '_condition'

### Processing script

In [7]:
from src.data.gedi_processing_pipeline import process_gedi_l2a_zip, _extract_granule_data, logger
from tqdm.autonotebook import tqdm
save_dir = GEDI_L2A_PATH/ "v002" / "amazon_basin"
order_folder = save_dir / "download_52374"

In [8]:
zip_files = list(sorted(order_folder.glob("*.zip?*"), 
                        key=lambda path: int(path.name.split("?")[-1])))
#zip_files

In [None]:
for zip_file in tqdm(zip_files):
    process_gedi_l2a_zip(zip_file, save_dir)

  0%|          | 0/83 [00:00<?, ?it/s]

2021-05-31 21:52:53,273 DEBUG: Processing order zip 52374-1 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:34]


  0%|          | 0/7 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/wch4_w8j
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/wch4_w8j/README exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/README
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/notbndeo
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/notbndeo/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/h99tm2fx
Extracted file: /gws/nopw/j04/forecol/data/GEDI/

  0%|          | 0/6 [00:00<?, ?it/s]

2021-05-31 21:53:31,090 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 21:53:36,374 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]



This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.

  granule_data.to_feather(save_name)


2021-05-31 21:53:37,187 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432365567/processed_GEDI02_A_2019108093620_O01965_01_T05338_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 21:54:31,535 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432365567/processed_GEDI02_A_2019108093620_O01965_01_T05338_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 21:54:37,066 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432365568/processed_GEDI02_A_2019110215109_O02004_04_T03189_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 21:55:51,751 

  0%|          | 0/7 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-2/y8sduvtk
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-2/y8sduvtk/2432370683/processed_GEDI02_A_2019112201147_O02034_04_T01337_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-2/2432370683/processed_GEDI02_A_2019112201147_O02034_04_T01337_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-2/zdkt9wob
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-2/zdkt9wob/2432371896/processed_GEDI02_A_2019113205447_O02050_04_T03893_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-2/2432371896/processed_GEDI02_A_2019113205447_O02050_04_T03893_02_003_01_V002.h5
Temporary dir:

  0%|          | 0/7 [00:00<?, ?it/s]

2021-05-31 22:02:16,220 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-2/2432370683/processed_GEDI02_A_2019112201147_O02034_04_T01337_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:02:27,421 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-2/2432370683/processed_GEDI02_A_2019112201147_O02034_04_T01337_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 22:02:29,061 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-2/2432371896/processed_GEDI02_A_2019113205447_O02050_04_T03893_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:02:48,699 

  0%|          | 0/7 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-3/hgvloexr
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-3/hgvloexr/2432382366/processed_GEDI02_A_2019117064711_O02103_01_T01758_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-3/2432382366/processed_GEDI02_A_2019117064711_O02103_01_T01758_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-3/9w5m2xhf
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-3/9w5m2xhf/2432382989/processed_GEDI02_A_2019117190841_O02111_04_T04459_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-3/2432382989/processed_GEDI02_A_2019117190841_O02111_04_T04459_02_003_01_V002.h5
Temporary dir:

  0%|          | 0/7 [00:00<?, ?it/s]

2021-05-31 22:10:35,891 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-3/2432382366/processed_GEDI02_A_2019117064711_O02103_01_T01758_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:11:56,930 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-3/2432382366/processed_GEDI02_A_2019117064711_O02103_01_T01758_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 22:12:02,881 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-3/2432382989/processed_GEDI02_A_2019117190841_O02111_04_T04459_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:13:12,541 

  0%|          | 0/5 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-4/12shq1k1
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-4/12shq1k1/2432388640/processed_GEDI02_A_2019120181215_O02157_04_T05163_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-4/2432388640/processed_GEDI02_A_2019120181215_O02157_04_T05163_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-4/ht8c8qh3
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-4/ht8c8qh3/2432389146/processed_GEDI02_A_2019121050103_O02164_01_T00748_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-4/2432389146/processed_GEDI02_A_2019121050103_O02164_01_T00748_02_003_01_V002.h5
Temporary dir:

  0%|          | 0/5 [00:00<?, ?it/s]

2021-05-31 22:18:56,746 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-4/2432388640/processed_GEDI02_A_2019120181215_O02157_04_T05163_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:19:09,664 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-4/2432388640/processed_GEDI02_A_2019120181215_O02157_04_T05163_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 22:19:11,758 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-4/2432389146/processed_GEDI02_A_2019121050103_O02164_01_T00748_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:21:03,768 

  0%|          | 0/6 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-5/5gr7uovh
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-5/5gr7uovh/2432396674/processed_GEDI02_A_2019124040434_O02210_01_T00029_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-5/2432396674/processed_GEDI02_A_2019124040434_O02210_01_T00029_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-5/bl4hnue4
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-5/bl4hnue4/2432400166/processed_GEDI02_A_2019124162603_O02218_04_T01460_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-5/2432400166/processed_GEDI02_A_2019124162603_O02218_04_T01460_02_003_01_V002.h5
Temporary dir:

  0%|          | 0/6 [00:00<?, ?it/s]

2021-05-31 22:26:27,844 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-5/2432396674/processed_GEDI02_A_2019124040434_O02210_01_T00029_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:27:38,276 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-5/2432396674/processed_GEDI02_A_2019124040434_O02210_01_T00029_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 22:27:42,533 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-5/2432400166/processed_GEDI02_A_2019124162603_O02218_04_T01460_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:28:49,493 

  0%|          | 0/4 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-6/4aamnt87
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-6/4aamnt87/2432417775/processed_GEDI02_A_2019128021818_O02271_01_T02018_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-6/2432417775/processed_GEDI02_A_2019128021818_O02271_01_T02018_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-6/0fe4563k
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-6/0fe4563k/2432421596/processed_GEDI02_A_2019128143946_O02279_04_T04719_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-6/2432421596/processed_GEDI02_A_2019128143946_O02279_04_T04719_02_003_01_V002.h5
Temporary dir:

  0%|          | 0/4 [00:00<?, ?it/s]

2021-05-31 22:34:05,692 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-6/2432417775/processed_GEDI02_A_2019128021818_O02271_01_T02018_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:35:58,005 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-6/2432417775/processed_GEDI02_A_2019128021818_O02271_01_T02018_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 22:36:04,472 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-6/2432421596/processed_GEDI02_A_2019128143946_O02279_04_T04719_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:38:00,754 

  0%|          | 0/7 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-7/jq05i0h3
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-7/jq05i0h3/2432460367/processed_GEDI02_A_2019131012144_O02317_01_T05568_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-7/2432460367/processed_GEDI02_A_2019131012144_O02317_01_T05568_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-7/bmhgkb7t
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-7/bmhgkb7t/2432461478/processed_GEDI02_A_2019131134312_O02325_04_T01154_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-7/2432461478/processed_GEDI02_A_2019131134312_O02325_04_T01154_02_003_01_V002.h5
Temporary dir:

  0%|          | 0/7 [00:00<?, ?it/s]

2021-05-31 22:41:36,013 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-7/2432460367/processed_GEDI02_A_2019131012144_O02317_01_T05568_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:42:42,087 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-7/2432460367/processed_GEDI02_A_2019131012144_O02317_01_T05568_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 22:42:46,735 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-7/2432461478/processed_GEDI02_A_2019131134312_O02325_04_T01154_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:43:46,217 

  0%|          | 0/5 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-8/wzklvcgy
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-8/wzklvcgy/2432482199/processed_GEDI02_A_2019134233523_O02378_01_T03135_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-8/2432482199/processed_GEDI02_A_2019134233523_O02378_01_T03135_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-8/vdxwfnrp
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-8/vdxwfnrp/2432487097/processed_GEDI02_A_2019135115649_O02386_04_T01720_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-8/2432487097/processed_GEDI02_A_2019135115649_O02386_04_T01720_02_003_01_V002.h5
Temporary dir:

  0%|          | 0/5 [00:00<?, ?it/s]

2021-05-31 22:49:45,920 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-8/2432482199/processed_GEDI02_A_2019134233523_O02378_01_T03135_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:51:35,615 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-8/2432482199/processed_GEDI02_A_2019134233523_O02378_01_T03135_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 22:51:43,492 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-8/2432487097/processed_GEDI02_A_2019135115649_O02386_04_T01720_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:53:35,933 

  0%|          | 0/5 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-9/0m65din5
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-9/0m65din5/2432497566/processed_GEDI02_A_2019138110010_O02432_04_T05270_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-9/2432497566/processed_GEDI02_A_2019138110010_O02432_04_T05270_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-9/skxn8pvz
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-9/skxn8pvz/2432499419/processed_GEDI02_A_2019138214856_O02439_01_T00855_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-9/2432499419/processed_GEDI02_A_2019138214856_O02439_01_T00855_02_003_01_V002.h5
Temporary dir:

  0%|          | 0/5 [00:00<?, ?it/s]

2021-05-31 22:58:27,766 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-9/2432497566/processed_GEDI02_A_2019138110010_O02432_04_T05270_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 22:59:21,524 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-9/2432497566/processed_GEDI02_A_2019138110010_O02432_04_T05270_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 22:59:25,116 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-9/2432499419/processed_GEDI02_A_2019138214856_O02439_01_T00855_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 23:01:23,190 

  0%|          | 0/5 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-10/at5yhb23
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-10/at5yhb23/2432508975/processed_GEDI02_A_2019142200228_O02500_01_T05690_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-10/2432508975/processed_GEDI02_A_2019142200228_O02500_01_T05690_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-10/vaazbwjd
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-10/vaazbwjd/2432509119/processed_GEDI02_A_2019143082354_O02508_04_T01276_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-10/2432509119/processed_GEDI02_A_2019143082354_O02508_04_T01276_02_003_01_V002.h5
Temporar

  0%|          | 0/5 [00:00<?, ?it/s]

2021-05-31 23:07:01,140 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-10/2432508975/processed_GEDI02_A_2019142200228_O02500_01_T05690_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 23:08:26,553 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-10/2432508975/processed_GEDI02_A_2019142200228_O02500_01_T05690_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 23:08:32,694 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-10/2432509119/processed_GEDI02_A_2019143082354_O02508_04_T01276_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 23:10:03,1

  0%|          | 0/5 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-11/o4awrqob
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-11/o4awrqob/2432511624/processed_GEDI02_A_2019146073100_O02554_04_T01521_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-11/2432511624/processed_GEDI02_A_2019146073100_O02554_04_T01521_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-11/vyfa1tk9
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-11/vyfa1tk9/2432511914/processed_GEDI02_A_2019146182025_O02561_01_T04068_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-11/2432511914/processed_GEDI02_A_2019146182025_O02561_01_T04068_02_003_01_V002.h5
Temporar

  0%|          | 0/5 [00:00<?, ?it/s]

2021-05-31 23:14:35,566 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-11/2432511624/processed_GEDI02_A_2019146073100_O02554_04_T01521_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 23:16:36,778 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-11/2432511624/processed_GEDI02_A_2019146073100_O02554_04_T01521_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]
2021-05-31 23:16:44,637 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-11/2432511914/processed_GEDI02_A_2019146182025_O02561_01_T04068_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 23:17:33,5

  0%|          | 0/6 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-12/y4qvwxcw
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-12/y4qvwxcw/2432521900/processed_GEDI02_A_2019149172758_O02607_01_T04160_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-12/2432521900/processed_GEDI02_A_2019149172758_O02607_01_T04160_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-12/6zlcraqm
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-12/6zlcraqm/2432523768/processed_GEDI02_A_2019150055009_O02615_04_T01169_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-12/2432523768/processed_GEDI02_A_2019150055009_O02615_04_T01169_02_003_01_V002.h5
Temporar

  0%|          | 0/6 [00:00<?, ?it/s]

2021-05-31 23:22:07,330 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-12/2432521900/processed_GEDI02_A_2019149172758_O02607_01_T04160_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
2021-05-31 23:24:01,489 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-12/2432521900/processed_GEDI02_A_2019149172758_O02607_01_T04160_02_003_01_V002.h5 [in process_gedi_l2a_zip at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:53]


In [None]:
for zip_file in tqdm(zip_files):
    try:
        process_gedi_l2a_zip(zip_file, save_dir)
    except:
        print(zip_file)

In [34]:
%%time
order_number = zip_path.name.split(".")[0]
zip_number = zip_path.name.split("?")[1]
logger.debug("Processing order zip %s-%s", order_number, zip_number)

2021-05-31 21:31:14,633 DEBUG: Processing order zip 52374-1 [in <module> at <timed exec>:3]
CPU times: user 1.93 ms, sys: 0 ns, total: 1.93 ms
Wall time: 1.76 ms


In [35]:
%%time
# Step 2: unzipping
tmp_unzip_path = zip_path.parent / f"unzip{order_number}-{zip_number}"
tmp_unzip_path.mkdir(parents=True, exist_ok=True)
unzip(zip_path, out_path=tmp_unzip_path, remove_archive_name=False)

  0%|          | 0/7 [00:00<?, ?it/s]

Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/9gk7o0ec
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/9gk7o0ec/README exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/README
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/nic7f_zj
Extracted file: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/nic7f_zj/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5 exists: True
Moving file to /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5
Temporary dir: /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/na99llw6
Extracted file: /gws/nopw/j04/forecol/data/GEDI/

True

In [37]:
%%time
# Step 3: Find granules
granules = list(sorted(tmp_unzip_path.glob("*/*.h5")))
logger.debug("Found granules: %s", granules)

2021-05-31 21:32:36,900 DEBUG: Found granules: [PosixPath('/gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5'), PosixPath('/gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432365567/processed_GEDI02_A_2019108093620_O01965_01_T05338_02_003_01_V002.h5'), PosixPath('/gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432365568/processed_GEDI02_A_2019110215109_O02004_04_T03189_02_003_01_V002.h5'), PosixPath('/gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432366773/processed_GEDI02_A_2019111210128_O02019_04_T04474_02_003_01_V002.h5'), PosixPath('/gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432369840/processed_GEDI02_A_2019110092939_O01996_01_T03334_02_003_01_V002.h5'), PosixPath('/gws/nopw/j04/forecol/data/GEDI/level2A/v002/a

In [39]:
granule_path = granules[0]

In [41]:
%%time
granule_name = granule_path.name.split(".")[0]
save_name = save_dir / f"test.gpkg"

CPU times: user 41 µs, sys: 13 µs, total: 54 µs
Wall time: 61.5 µs


In [42]:
%%time
granule_data = _extract_granule_data(granule_path)

2021-05-31 21:33:58,869 DEBUG: Processing granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5 [in _extract_granule_data at /home/users/svm/Code/gedi_biomass_mapping/src/data/gedi_processing_pipeline.py:24]
CPU times: user 4.21 s, sys: 935 ms, total: 5.15 s
Wall time: 7.67 s


In [49]:
%%time
granule_name = granule_path.name.split(".")[0]
save_name = save_dir / f"test.gpkg"
logger.debug("Saving granule %s", granule_path)
save_name.parent.mkdir(exist_ok=True, parents=True)
granule_data.to_file(save_name, driver="GPKG")

2021-05-31 21:37:51,649 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5 [in <module> at <timed exec>:3]
CPU times: user 20.2 s, sys: 916 ms, total: 21.1 s
Wall time: 25.7 s


In [48]:
%%time
logger.debug("Saving granule %s", granule_path)
save_name.parent.mkdir(exist_ok=True, parents=True)
save_name = save_dir / f"test.geojson"
granule_data.to_file(save_name, driver="GeoJSON")

2021-05-31 21:37:26,917 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5 [in <module> at <timed exec>:1]
CPU times: user 21.9 s, sys: 1.01 s, total: 23 s
Wall time: 24.7 s


In [50]:
%%time
logger.debug("Saving granule %s", granule_path)
save_name = save_dir / f"test.geofeather"
save_name.parent.mkdir(exist_ok=True, parents=True)
granule_data.to_feather(save_name)

2021-05-31 21:38:17,437 DEBUG: Saving granule /gws/nopw/j04/forecol/data/GEDI/level2A/v002/amazon_basin/download_52374/unzip52374-1/2432361744/processed_GEDI02_A_2019112075017_O02026_01_T00059_02_003_01_V002.h5 [in <module> at <timed exec>:1]



This metadata specification does not yet make stability promises.  We do not yet recommend using this in a production setting unless you are able to rewrite your Parquet/Feather files.



CPU times: user 213 ms, sys: 15.2 ms, total: 228 ms
Wall time: 454 ms
