# Saving, exporting and downloading data

## Package imports

In [None]:
import sys
from pathlib import Path
import urllib.request
from IPython.display import display, clear_output

sys.path.append("..")
from pytreedb import db

## Import database

Define (local) MongoDB connection and import database from URL

In [None]:
mydbfile = "pytree.db"
db_url = "https://github.com/3dgeo-heidelberg/pytreedb/raw/main/data/test/data.db"
mydb = db.PyTreeDB(dbfile=mydbfile)
mydb.import_db(db_url, overwrite=True)

Define the path to a folder to which all data will be exported.

In [None]:
out_dir = "../export"
if not Path(out_dir).exists():
    Path(out_dir).mkdir()

## Save the database

With the `save` method, we can save the database to our local database file (`pytree.db`). 

In [None]:
mydb.save()

We can also save the database to a new file, e.g., `new_pytree.db`.

In [None]:
mydb.save(f"new_{mydbfile}")

## Export as GeoJSON

Before exporting the data as GeoJSON, let's filter is first. We are only interested in trees for which TLS data is available and which have a DBH of more than 90 cm.

In [None]:
subset = mydb.query({"$and": [{"properties.data.mode": "TLS"}, {"properties.measurements.DBH_cm": {"$gte": 80}}]})

Using the `export_data` method in combination with `get_ids`, we export the filtered trees as GeoJSONs to a subfolder `geojsons_subset` in out output folder. The method returns the paths of the files that were written.

In [None]:
mydb.export_data(Path(out_dir) / "geojsons_subset", trees=mydb.get_ids(subset))

We can also export all trees by not providing a list of `trees`:

In [None]:
all_files_written = mydb.export_data(Path(out_dir) / "geojsons_all")

## Download LAZ point clouds

We also want to download the point clouds of the filtered trees.

Let's define some functions for downloading files from a URL.

In [None]:
def reporthook(count, block_size, total_size):
    percent = min(int(count * block_size * 100 / total_size), 100)
    print("\r...{}%".format(percent), end="")


def download_data(filename, url):
    if not Path(filename).exists():
        clear_output(wait=True)
        display(f"Downloading data from '{url}' to '{filename}'. Please wait ...")
        if not Path(filename).parent.exists():
            Path(filename).parent.mkdir()
        urllib.request.urlretrieve(url, filename, reporthook=reporthook)
        display("Download finished")
    else:
        print("File already exists. Great!", end="\r")

We first create a list of download URLs, which are retrieved from the tree dictionaries. Then, we download the data to a `laz` subfolder in our output folder. The download will take a while.

In [None]:
download_links = mydb.get_pointcloud_urls(subset)
output_dir = Path(out_dir) / "laz"

for i, url in enumerate(download_links):
    download_data(output_dir / url.split("/")[-1], url)
clear_output(wait=True)
print(f"{i+1} files succesfully downloaded to {output_dir}")

## Download CSV files

We can also download CSV files, one for general tree metrics and one for tree measurements (one row per measurement source).

In [None]:
mydb.convert_to_csv(Path(out_dir), trees=mydb.get_ids(subset))