In [54]:
import datacube
import numpy as np
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt

dc = datacube.Datacube()

dss = dc.find_datasets_lazy(product="ga_s2am_ard_3")

In [91]:
dir(dss[0].metadata)

['cloud_cover',
 'creation_dt',
 'creation_time',
 'crs_raw',
 'dataset_maturity',
 'eo_gsd',
 'eo_sun_azimuth',
 'eo_sun_elevation',
 'fmask_clear',
 'fmask_cloud_shadow',
 'fmask_snow',
 'fmask_water',
 'format',
 'gqa_abs_iterative_mean_x',
 'gqa_abs_iterative_mean_xy',
 'gqa_abs_iterative_mean_y',
 'gqa_abs_x',
 'gqa_abs_xy',
 'gqa_abs_y',
 'gqa_cep90',
 'gqa_iterative_mean_x',
 'gqa_iterative_mean_xy',
 'gqa_iterative_mean_y',
 'gqa_iterative_stddev_x',
 'gqa_iterative_stddev_xy',
 'gqa_iterative_stddev_y',
 'gqa_mean_x',
 'gqa_mean_xy',
 'gqa_mean_y',
 'gqa_stddev_x',
 'gqa_stddev_xy',
 'gqa_stddev_y',
 'grid_spatial',
 'id',
 'instrument',
 'label',
 'lat',
 'lon',
 'measurements',
 'platform',
 'product_family',
 'region_code',
 's2cloudless_clear',
 's2cloudless_cloud',
 'sentinel_datastrip_id',
 'sentinel_product_name',
 'sentinel_tile_id',
 'sources',
 'time']

In [130]:
fields = ("region_code", "s2cloudless_cloud", "gqa_mean_xy")

metadata_df = pd.DataFrame({k: i.metadata.fields[k] for k in fields} for i in dss)

metadata_df

Unnamed: 0,region_code,s2cloudless_cloud,gqa_mean_xy
0,49JFM,2.713082,0.24
1,49JFM,0.523513,0.25
2,49JFM,0.759414,0.36
3,49JFM,76.479262,
4,49JFM,0.662456,67.36
...,...,...,...
383127,56KPC,0.000000,
383128,56KPC,44.120842,
383129,56KPC,97.620657,
383130,56KPC,71.813482,


In [133]:
metadata_df = metadata_df.set_index("region_code")

In [113]:
# metadata_df = pd.concat([pd.DataFrame(data={'Name': [i.metadata.region_code], 
#                                             'gqa_mean_xy': [i.metadata.gqa_mean_xy],
#                                             's2cloudless_cloud': [i.metadata.s2cloudless_cloud]}) for i in dss]).set_index('Name')

In [134]:
metadata_df['gqa_invalid'] = metadata_df.gqa_mean_xy.isna()

In [137]:
metadata_df_summary = metadata_df[["gqa_invalid"]].groupby('region_code').mean()
metadata_df_summary["gqa_mean_xy"] = metadata_df.query("s2cloudless_cloud < 50")[["gqa_mean_xy"]].groupby('region_code').median()

In [138]:
metadata_df_summary

Unnamed: 0_level_0,gqa_invalid,gqa_mean_xy
region_code,Unnamed: 1_level_1,Unnamed: 2_level_1
49JFM,0.220859,31.395
49JFN,0.732218,45.965
49JGH,0.990698,0.585
49JGJ,0.057018,0.590
49JGK,0.170259,51.010
...,...,...
56KMU,0.108696,0.610
56KNA,1.000000,
56KNB,1.000000,
56KPB,1.000000,


In [46]:
s2_mgrs = gpd.read_file('https://data.dea.ga.gov.au/derivative/ga_s2_mgrs_tile_grid.geojson').set_index('Name')

In [47]:
s2_mgrs

Unnamed: 0_level_0,geometry
Name,Unnamed: 1_level_1
49GCJ,"MULTIPOLYGON Z (((108.41593 -46.02435 0.00000,..."
49GCK,"MULTIPOLYGON Z (((108.45688 -45.12497 0.00000,..."
49GCL,"MULTIPOLYGON Z (((108.49595 -44.22542 0.00000,..."
49GCM,"MULTIPOLYGON Z (((108.53322 -43.32625 0.00000,..."
49GCN,"MULTIPOLYGON Z (((108.56882 -42.42636 0.00000,..."
...,...
57LTJ,"MULTIPOLYGON Z (((156.26397 -9.93968 0.00000, ..."
57LTK,"MULTIPOLYGON Z (((156.27113 -9.03605 0.00000, ..."
57LTL,"MULTIPOLYGON Z (((156.27758 -8.13291 0.00000, ..."
57MTM,"MULTIPOLYGON Z (((156.28333 -7.22918 0.00000, ..."


In [139]:
s2_mgrs.join(metadata_df_summary).to_file('s2_mgrs_gqa.geojson')

## Load GCPs

In [None]:
# !unzip Fix_QA_points.zip -d GCP_points

In [33]:
import pandas as pd
import geopandas as gpd
import glob


In [15]:
all_files = glob.glob('GCP_points/Fix_QA_points/*/*/*.txt')
all_files_clean = [i for i in all_files if 'old' not in i]

In [29]:
dfs = [pd.read_csv(file, header=None) for file in all_files_clean]
df = pd.concat(dfs)
df.columns = ['x', 'y']

In [40]:
gpd.GeoDataFrame(geometry=gpd.points_from_xy(x=df.x, y=df.y), crs='EPSG:4326').to_file('gcp_points.geojson')