In [1]:
import geopandas as gpd
import pandas as pd
import boto3
import os

In [2]:
bucket_name = 'opera-calval-database-dswx'
private_bucket_name = 'opera-calval-database-dswx-private'

In [3]:
local_planet_dir = './downloads/'

In [4]:
session = boto3.session.Session(profile_name='saml-pub')
s3 = session.resource('s3')
s3_client = session.client('s3')

In [5]:
collocation = gpd.read_file('Collocation_HLS_Planet_2021.geojson')


In [6]:
#test = collocation.head(100)
#for row in test.iterrows():
#    print(row[1].pl_ID)

In [7]:
fileList = os.listdir(local_planet_dir)
print(fileList)

['20210925_191708_94_2408_3B_AnalyticMS_SR_8b.tif', '20211027_151445_21_2231_3B_AnalyticMS_SR_8b.tif', '20211030_142613_41_227b_3B_AnalyticMS_SR_8b.tif', '20210926_072414_71_2233_3B_AnalyticMS_SR_8b.tif', '20210901_175205_71_2426_3B_AnalyticMS_SR_8b.tif', '20211014_152910_49_2459_3B_AnalyticMS_SR_8b.tif', '20211005_135201_42_227e_3B_AnalyticMS_SR_8b.tif', '20211028_045455_02_2459_3B_AnalyticMS_SR_8b.tif', '20210924_082025_48_2424_3B_AnalyticMS_SR_8b.tif', '20211011_155455_52_2262_3B_AnalyticMS_SR_8b.tif', '20211010_080351_47_2458_3B_AnalyticMS_SR_8b.tif', '20210906_183639_52_240c_3B_AnalyticMS_SR_8b.tif', '20210914_051809_10_2455_3B_AnalyticMS_SR_8b.tif', '20211023_034107_36_2223_3B_AnalyticMS_SR_8b.tif', '20211016_081255_90_2420_3B_AnalyticMS_SR_8b.tif', '20211008_054349_78_241d_3B_AnalyticMS_SR_8b.tif', '.DS_Store', '20211010_003854_61_241d_3B_AnalyticMS_SR_8b.tif', '20211008_073033_21_245c_3B_AnalyticMS_SR_8b.tif', '20210918_135626_99_245f_3B_AnalyticMS_SR_8b.tif', '20211025_085849_

In [None]:
#fileList = fileList[0:1]
print(fileList)
for file in fileList:
    if file[-3:] != 'tif':
        continue
    
    ID = file.split('_3B_Analytic')[0]
    print('Adding Image ID: '+ID)
    print('\tBuilding Image Table Entry')
    search = collocation[(collocation.pl_ID == ID)].sort_values('hls_pl_AOI_intersect',ascending=False)
    search = search.iloc[[0]]
    site = search.chip_ID.iloc[0]
    s3_key_image = 'data/'+site+'/'+ID+'/'+file
    new_image = search.drop(columns=['hls_Date','hls_Cloud_Cover','hls_Footprint','hls_AOI_intersect','hls_pl_AOI_intersect','chip_rID'])
    new_image = new_image.rename(columns = {'hls_ID':'collocated_dswx','pl_Date':'datetime','pl_ID':'image_name','pl_Cloud_Cover':'cloud_cover','pl_AOI_intersect':'site_coverage','chip_ID':'site_name'})
    new_image['s3_bucket'] = private_bucket_name
    new_image['s3_key_image'] = s3_key_image
    #print(new_image.head())
    obj = s3.Object(bucket_name,'image.geojson')
    image_data = obj.get()['Body']
    image_gdf = gpd.read_file(image_data)
    #print(image_gdf)
    if 'image_name' in image_gdf.columns:
        if len(image_gdf[image_gdf.image_name==new_image.image_name.iloc[0]]) != 0:
            print('\timage_name: '+new_image.image_name.iloc[0]+' is already in image table')
            continue
    
    print('\tUploading File')
    result = s3_client.list_objects_v2(Bucket=private_bucket_name, Prefix=s3_key_image)
    if 'Contents' in result:
        print("\tImage file already exists in bucket")
    else:
        response = s3_client.upload_file(local_planet_dir+file,private_bucket_name,s3_key_image)
    
    print('\tUpdating Image Table')
    
    image_gdf_upd = image_gdf.append(new_image,ignore_index=True)
    image_upd_bytes = bytes(image_gdf_upd.to_json(drop_id=True).encode('UTF-8'))
    s3object = s3.Object(bucket_name,'image.geojson')
    s3object.put(Body=image_upd_bytes)
    print('\tCompleted')

['20210925_191708_94_2408_3B_AnalyticMS_SR_8b.tif', '20211027_151445_21_2231_3B_AnalyticMS_SR_8b.tif', '20211030_142613_41_227b_3B_AnalyticMS_SR_8b.tif', '20210926_072414_71_2233_3B_AnalyticMS_SR_8b.tif', '20210901_175205_71_2426_3B_AnalyticMS_SR_8b.tif', '20211014_152910_49_2459_3B_AnalyticMS_SR_8b.tif', '20211005_135201_42_227e_3B_AnalyticMS_SR_8b.tif', '20211028_045455_02_2459_3B_AnalyticMS_SR_8b.tif', '20210924_082025_48_2424_3B_AnalyticMS_SR_8b.tif', '20211011_155455_52_2262_3B_AnalyticMS_SR_8b.tif', '20211010_080351_47_2458_3B_AnalyticMS_SR_8b.tif', '20210906_183639_52_240c_3B_AnalyticMS_SR_8b.tif', '20210914_051809_10_2455_3B_AnalyticMS_SR_8b.tif', '20211023_034107_36_2223_3B_AnalyticMS_SR_8b.tif', '20211016_081255_90_2420_3B_AnalyticMS_SR_8b.tif', '20211008_054349_78_241d_3B_AnalyticMS_SR_8b.tif', '.DS_Store', '20211010_003854_61_241d_3B_AnalyticMS_SR_8b.tif', '20211008_073033_21_245c_3B_AnalyticMS_SR_8b.tif', '20210918_135626_99_245f_3B_AnalyticMS_SR_8b.tif', '20211025_085849_

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


	Completed
Adding Image ID: 20211027_151445_21_2231
	Building Image Table Entry
	Uploading File
	Image file already exists in bucket
	Updating Image Table


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


	Completed
Adding Image ID: 20211030_142613_41_227b
	Building Image Table Entry
	Uploading File
	Updating Image Table


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


	Completed
Adding Image ID: 20210926_072414_71_2233
	Building Image Table Entry
	Uploading File
	Updating Image Table


of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  sort=sort,


	Completed
Adding Image ID: 20210901_175205_71_2426
	Building Image Table Entry
	Uploading File


In [15]:
obj = s3.Object(bucket_name,'image.geojson')
image_data = obj.get()['Body']
image_gdf = gpd.read_file(image_data)
image_gdf.head()

Unnamed: 0,cloud_cover,collocated_dswx,datetime,image_name,instrument,provider,resolution,s3_bucket,s3_key_image,site_coverage,site_name,timeDelta_days,geometry
0,0.0,HLS.S30.T10VEK.2021268T192141.v2.0,2021-09-25T19:17:08,20210925_191708_94_2408,PSB.SD,planetscope,3.0,opera-calval-database-dswx,data/3_42/20210925_191708_94_2408/20210925_191...,84.975663,3_42,0.007904,"POLYGON ((-122.94132 57.97664, -123.03517 57.7..."


In [27]:
print(new_image.image_name.iloc[0])
if len(image_gdf[image_gdf.image_name==new_image.image_name.iloc[0]]) != 0:
    print('\timage_name: '+new_image.image_name.iloc[0]+' is already in image table')


20210925_191708_94_2408
	image_name: 20210925_191708_94_2408 is already in image table
