# Download Classification and Manually Edit

In [None]:
import geopandas as gpd
import boto3
import os
from tools.addImageCalc import addImageCalc
from pathlib import Path

### User defines the validation site and the directory where files will be downloaded

In [None]:
#Only specify one. Leave the other as ''. If more than one planet image for given chip, PLANET_ID must be specified
PLANET_ID = '20210924_082025_48_2424'
SITE_NAME = ''
assert((len(PLANET_ID) == 0) ^ (len(SITE_NAME) == 0))

### Open AWS session and read image and image_calc tables

In [None]:
os.environ["AWS_NO_SIGN_REQUEST"] = "YES"

session = boto3.session.Session(profile_name='saml-pub')
s3 = session.resource('s3')
s3_client = session.client('s3')

In [None]:
bucket_name = 'opera-calval-database-dswx'

In [None]:
obj = s3.Object(bucket_name,'image.geojson')
image_table_data = obj.get()['Body']
imageTable = gpd.read_file(image_table_data)

In [None]:
obj = s3.Object(bucket_name,'image_calc.geojson')
image_table_data = obj.get()['Body']
image_calcs = gpd.read_file(image_table_data)

In [None]:
# This cell will show the number of planet images found for a given chip. If more than one, ensure the printed Planet
# ID matches the planet image used to generate the classification
temp = imageTable[['image_name', 'site_name']]
df_site2image = temp.set_index('site_name')
df_image2site = temp.set_index('image_name')
if not PLANET_ID:
    values = PLANET_ID = df_site2image.loc[SITE_NAME].tolist()
    PLANET_ID = values[0]
    print(f'There was {len(values)} planet images for this chip')
else:
    values = df_image2site.loc[PLANET_ID].tolist()
    SITE_NAME = values[0]
    print(f'There were {len(values)} chips for this planet_image')

(SITE_NAME, PLANET_ID)

In [None]:
#Local directory where classification file(s) are located
download_dir = Path(f'planet_images_cropped/{PLANET_ID}').absolute()
download_dir.mkdir(exist_ok=True, parents=True)

In [None]:
def downloadImage_calc(row,download_dir):
    bucket = row.bucket.iloc[0]
    keys = row.s3_keys.iloc[0]
    keys = keys.split(',')
    for key in keys:
        filename = key.split('/')[-1]
        response = s3_client.download_file(bucket,
                                           key,
                                           str(download_dir / filename))

### Search for specific image and classified image for the defined validation site

In [None]:
#This cell shows all classifications of specified planet image
search = image_calcs[image_calcs.image_name == PLANET_ID]
search.head(20)

In [None]:
#This cell selects the classification with the highest 'version' value (i.e. the most recent version)
#If the version is listed as 'NaN'. then the classification with the most recent 'upload_date' instead will be passed
try:
    search_iter = search[search.version==search['version'].max()]
    search_iter = search_iter.iloc[[0]]
except IndexError:
    search_iter = search[search.upload_date.values==search.upload_date.values.max()]
imagecalc_row = search_iter
imagecalc_row.head()

## Download Metadata 

Label it by version

In [None]:
image_calc_name = imagecalc_row.image_calc_name.iloc[0]
version = imagecalc_row.version.iloc[0]
imagecalc_row.to_file(download_dir / f'metadata_{PLANET_ID}_v{version}.geojson', driver='GeoJSON')

### Download classified image to the specified directory

In [None]:
downloadImage_calc(imagecalc_row,download_dir)