# This notebook commits staged image_calc entries to the DSWx calval database

In [348]:
import geopandas as gpd
import boto3

In [384]:
bucket_name = 'opera-calval-database-dswx'
bucket_name_staging = 'opera-calval-database-dswx-staging'

In [638]:
session = boto3.session.Session(profile_name='saml-pub')
s3 = session.resource('s3')
s3_client = session.client('s3')

### Search for pending geojsons in the staging bucket

In [639]:
# This cell lists each pending staged entry to the image_calc table. 
# Each geojson file represents a single row to be added to the table
# geojson filenames are generated using the date and time they were staged
bucket = s3.Bucket(bucket_name_staging)

for obj in bucket.objects.filter(Delimiter='/', Prefix='pending/'):
    print(obj.key)

pending/20221005_094901_imagecalc.geojson


In [641]:
# I just manually copy/paste the key from the above list to select one entry at a time.
pending_key = 'pending/20221005_094901_imagecalc.geojson'

### Inspect the staged geojson row
The staged table row should always be generated by using the addImagecalc function found in /DSWx/tools/

In [642]:
obj = s3.Object(bucket_name_staging,pending_key)
pending_data = obj.get()['Body']
pending_gdf = gpd.read_file(pending_data)
pending_gdf.head()

Unnamed: 0,image_name,image_calc_name,calc_type,processing_level,oversight_level,calculated_by,reviewed_by,notes,public,water_stratum,bucket,s3_keys,upload_date,geometry
0,20211021_182217_09_2456,20211021_182217_09_2456_classification,Review,Intermediate,,Matthew Bonnema,Alexander Handwerger,Previous(Classified with SCP and mannually edi...,True,3,opera-calval-database-dswx-staging,pending/files/20221005_094901_imagecalc/classi...,20221005_094901,"POLYGON ((-119.47278 59.44326, -119.47278 59.4..."


In [643]:
#pending_gdf['calculated_by'] = 'Karthik Venkataramani'
#pending_gdf['reviewed_by'] = 'Charlie Marshak'
#pending_gdf['calc_type'] = 'Manual Classification'
#pending_gdf.head()

### Read image and image_calc geojson tables from database

In [644]:
imagecalc_gdf = gpd.read_file(s3.Object(bucket_name,'image_calc.geojson').get()['Body'])
image_gdf = gpd.read_file(s3.Object(bucket_name,'image.geojson').get()['Body'])

### Build some metadata fields and identify staged file keys 

In [645]:
source_image_name = pending_gdf.image_name.iloc[0]
imagecalc_name = pending_gdf.image_calc_name.iloc[0]
site = image_gdf[image_gdf.image_name == source_image_name].site_name.iloc[0]
src_bucket = pending_gdf.bucket.iloc[0]
src_keys = pending_gdf.s3_keys.iloc[0].split(',')
src_keys

['pending/files/20221005_094901_imagecalc/classification_reviewed_20211021_182217_09_2456_formatted.tif']

In [646]:
# This cell assigns a version number to the classification. If this is the first classification of a given planet
# image, the assigned version should be 0. Otherwise, it will increment on the latest version found in the database
search = imagecalc_gdf[imagecalc_gdf.image_name == source_image_name]
prev_version = -1
if len(search) == 0:
    version = 0
    previous_name = None
    print('first entry into table for ID:'+source_image_name+' assigning version = 0')
else:
    try:
        prev_version = search['version'].max() 
        version = int(prev_version + 1)
        previous_name = search[search.version==search['version'].max()].image_calc_name.iloc[0]
        print('assigning version based on maximum version in table. version = '+str(version))
    except:
        version = int(len(search))
        
        previous_name = None
        print('could not read version from table. assigned based on number of matching table entries. verson = '+str(version))

pending_gdf['image_calc_name'] = imagecalc_name+'_v'+str(version)
pending_gdf['version'] = version
pending_gdf['previous_name'] = previous_name
pending_gdf.head()

assigning version based on maximum version in table. version = 1


Unnamed: 0,image_name,image_calc_name,calc_type,processing_level,oversight_level,calculated_by,reviewed_by,notes,public,water_stratum,bucket,s3_keys,upload_date,geometry,version,previous_name
0,20211021_182217_09_2456,20211021_182217_09_2456_classification_v1,Review,Intermediate,,Matthew Bonnema,Alexander Handwerger,Previous(Classified with SCP and mannually edi...,True,3,opera-calval-database-dswx-staging,pending/files/20221005_094901_imagecalc/classi...,20221005_094901,"POLYGON ((-119.47278 59.44326, -119.47278 59.4...",1,20211021_182217_09_2456_classification_v0


### Commit staged image_calc to database
This codeblock copies the staged files to the database, as well as to the completed folder in the staging bucket

In [647]:
s3_folder_path = 'data/site/'+site+'/image/'+source_image_name+'/image_calc/'+imagecalc_name+'/'
s3_keys = []
for key in src_keys:
    new_key = s3_folder_path+key.split('/')[-1]
    complete_key = 'complete/'+'/'.join(key.split('/')[1:])
    s3_keys.append(new_key)
    response = s3.meta.client.copy({'Bucket':src_bucket,'Key':key}, bucket_name, new_key)
    response = s3.meta.client.copy({'Bucket':src_bucket,'Key':key}, src_bucket, complete_key)
    response = s3_client.delete_object(Bucket=src_bucket, Key=key)

pending_gdf['s3_keys'] = ','.join(s3_keys)
pending_gdf['bucket'] = bucket_name

if len(imagecalc_gdf[imagecalc_gdf.image_calc_name==pending_gdf.image_calc_name.iloc[0]]) != 0:
    print('image_calc_name: '+pending_gdf.image_calc_name.iloc[0]+' is already in image table')
    imagecalc_upd = imagecalc_gdf[imagecalc_gdf.image_calc_name != pending_gdf.image_calc_name.iloc[0]]
else:
    print('Adding new row to table')
    
imagecalc_upd = imagecalc_gdf.append(pending_gdf,ignore_index=True)
imagecalc_upd = imagecalc_upd[imagecalc_upd.image_calc_name !=None]
imagecalc_upd_bytes = bytes(imagecalc_upd.to_json(drop_id=True).encode('UTF-8'))
s3object = s3.Object(bucket_name,'image_calc.geojson')
s3object.put(Body=imagecalc_upd_bytes)

response = s3.meta.client.copy({'Bucket':src_bucket,'Key':pending_key}, src_bucket, 'complete/'+'/'.join(pending_key.split('/')[1:]))
response = s3_client.delete_object(Bucket=src_bucket, Key=pending_key)



Adding new row to table


### Check  updated image_calc table

In [611]:
new_imagecalc_gdf = gpd.read_file(s3.Object(bucket_name,'image_calc.geojson').get()['Body'])
new_imagecalc_gdf.head(100)

Unnamed: 0,bucket,calc_type,calculated_by,image_calc_name,image_name,notes,oversight_level,previous_name,processing_level,public,reviewed_by,s3_keys,upload_date,version,water_strata,water_stratum,geometry
0,opera-calval-database-dswx,Supervised Classification,Alexander Handwerger,20211021_133031_75_245a_class,20211021_133031_75_245a,,,,Intermediate,True,,data/site/3_4/image/20211021_133031_75_245a/im...,20220802_145632,,,,"POLYGON ((-64.30053 -33.07480, -64.34944 -33.2..."
1,,,,,,,,,,,,,,,,,
2,opera-calval-database-dswx,Manual Edit of Classification,Alexander Handwerger,20211021_133031_75_245a_class_edit,20211021_133031_75_245a,,,20211021_133031_75_245a_class,Intermediate,True,,data/site/3_4/image/20211021_133031_75_245a/im...,20220802_161902,,,,"POLYGON ((-64.30053 -33.07480, -64.34944 -33.2..."
3,opera-calval-database-dswx,Review,Alexander Handwerger,20211021_133031_75_245a_class_edit_review,20211021_133031_75_245a,,Reviewed-Complete,20211021_133031_75_245a_class_edit,Final,True,Matthew Bonnema,data/site/3_4/image/20211021_133031_75_245a/im...,20220803_104213,,,,"POLYGON ((-64.30053 -33.07480, -64.34944 -33.2..."
4,opera-calval-database-dswx,Supervised Classification,Alexander Handwerger,20210924_082025_48_2424_class,20210924_082025_48_2424,,,,Intermediate,True,,data/site/4_37/image/20210924_082025_48_2424/i...,20220805_102655,,,,"POLYGON ((28.20956 -9.14523, 28.17025 -9.34005..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
65,opera-calval-database-dswx,Review,Simran Sangha,20211028_134803_20_227a_classification_v1,20211028_134803_20_227a,"Previous(SCP with minimum distance scheme, and...",,20211028_134803_20_227a_classification_v0,Final,True,Charlie Marshak,data/site/1_5/image/20211028_134803_20_227a/im...,20220930_162123,1.0,,2.0,"POLYGON ((-56.21699 -31.40985, -56.21699 -31.3..."
66,opera-calval-database-dswx,Review,Simran Sangha,20211011_065101_82_2274_classification_v1,20211011_065101_82_2274,(CM): Fixed isolated single pixels labeled as ...,,20211011_065101_82_2274_classification_v0,Final,True,Charlie Marshak,data/site/4_1/image/20211011_065101_82_2274/im...,20220930_164836,1.0,,3.0,"POLYGON ((63.12334 54.24869, 63.12334 54.30270..."
67,opera-calval-database-dswx,Mannual classification,Matthew Bonnema,20210911_005129_82_106a_classification_v0,20210911_005129_82_106a,Classified with SCP and mannually editted usin...,,,Intermediate,True,,data/site/3_30/image/20210911_005129_82_106a/i...,20221003_132602,0.0,,3.0,"POLYGON ((131.11412 61.45039, 131.11412 61.505..."
68,opera-calval-database-dswx,Mannual classification,Matthew Bonnema,20211021_182217_09_2456_classification_v0,20211021_182217_09_2456,Classified with SCP and mannually editted usin...,,,Intermediate,True,,data/site/3_5/image/20211021_182217_09_2456/im...,20221003_145836,0.0,,3.0,"POLYGON ((-119.47278 59.44326, -119.47278 59.4..."
