# Download Classification and Manually Edit

In [15]:
import geopandas as gpd
import boto3
import os
from tools.addImageCalc import addImageCalc
from pathlib import Path

### User defines the validation site and the directory where files will be downloaded

In [16]:
#Only specify one. Leave the other as ''. If more than one planet image for given chip, PLANET_ID must be specified
PLANET_ID = ''
SITE_NAME = '3_8'
assert((len(PLANET_ID) == 0) ^ (len(SITE_NAME) == 0))

### Open AWS session and read image and image_calc tables

In [23]:
os.environ["AWS_NO_SIGN_REQUEST"] = "YES"

session = boto3.session.Session()
s3 = session.resource('s3')
s3_client = session.client('s3')

In [24]:
bucket_name = 'opera-calval-database-dswx'

In [25]:
obj = s3.Object(bucket_name,'image.geojson')
image_table_data = obj.get()['Body']
imageTable = gpd.read_file(image_table_data)

In [26]:
obj = s3.Object(bucket_name,'image_calc.geojson')
image_table_data = obj.get()['Body']
image_calcs = gpd.read_file(image_table_data)

In [27]:
# This cell will show the number of planet images found for a given chip. If more than one, ensure the printed Planet
# ID matches the planet image used to generate the classification
temp = imageTable[['image_name', 'site_name']]
df_site2image = temp.set_index('site_name')
df_image2site = temp.set_index('image_name')
if not PLANET_ID:
    values = PLANET_ID = df_site2image.loc[SITE_NAME].tolist()
    PLANET_ID = values[0]
    print(f'There was {len(values)} planet images for this chip')
else:
    values = df_image2site.loc[PLANET_ID].tolist()
    SITE_NAME = values[0]
    print(f'There were {len(values)} chips for this planet_image')

(SITE_NAME, PLANET_ID)

There was 1 planet images for this chip


('3_8', '20211010_135831_84_227e')

In [28]:
#Local directory where classification file(s) are located
download_dir = Path(f'planet_images_cropped/{PLANET_ID}').absolute()
download_dir.mkdir(exist_ok=True, parents=True)

In [29]:
def downloadImage_calc(row,download_dir):
    bucket = row.bucket.iloc[0]
    keys = row.s3_keys.iloc[0]
    keys = keys.split(',')
    for key in keys:
        filename = key.split('/')[-1]
        response = s3_client.download_file(bucket,
                                           key,
                                           str(download_dir / filename))

### Search for specific image and classified image for the defined validation site

In [30]:
#This cell shows all classifications of specified planet image
search = image_calcs[image_calcs.image_name == PLANET_ID]
search.head(20)

Unnamed: 0,bucket,calc_type,calculated_by,image_calc_name,image_name,notes,oversight_level,previous_name,processing_level,public,reviewed_by,s3_keys,upload_date,version,geometry
11,opera-calval-database-dswx,Supervised Classification,Alexander Handwerger,20211010_135831_84_227e_class,20211010_135831_84_227e,,,,Intermediate,True,,data/site/3_8/image/20211010_135831_84_227e/im...,20220809_091703,,"POLYGON ((-64.96109 -11.14220, -64.99808 -11.3..."
15,opera-calval-database-dswx,Manual classification,Alexander Handwerger,20211010_135831_84_227e_classification_v1,20211010_135831_84_227e,Supervised Classification using Semi-automatic...,,,Intermediate,True,,data/site/3_8/image/20211010_135831_84_227e/im...,20220916_160006,1.0,"POLYGON ((-64.81607 -11.29998, -64.81607 -11.2..."


In [31]:
#This cell selects the classificaiton with the highest 'version' value (i.e. the most recent version)
search = search[search.version==search['version'].max()]
imagecalc_row = search.iloc[[0]]
imagecalc_row.head()

Unnamed: 0,bucket,calc_type,calculated_by,image_calc_name,image_name,notes,oversight_level,previous_name,processing_level,public,reviewed_by,s3_keys,upload_date,version,geometry
15,opera-calval-database-dswx,Manual classification,Alexander Handwerger,20211010_135831_84_227e_classification_v1,20211010_135831_84_227e,Supervised Classification using Semi-automatic...,,,Intermediate,True,,data/site/3_8/image/20211010_135831_84_227e/im...,20220916_160006,1.0,"POLYGON ((-64.81607 -11.29998, -64.81607 -11.2..."


## Download Metadata 

Label it by version

In [32]:
image_calc_name = imagecalc_row.image_calc_name.iloc[0]
version = imagecalc_row.version.iloc[0]
imagecalc_row.to_file(download_dir / f'metadata_{PLANET_ID}_v{version}.geojson', driver='GeoJSON')

### Download classified image to the specified directory

In [33]:
downloadImage_calc(imagecalc_row,download_dir)