## Convert SpaceNet Dataset to Stac Compliant

In [1]:
import rasterio
import shapely
import sys,os,os.path

# Rasterio python expect ssl certs in Centos location
os.environ['CURL_CA_BUNDLE']='/etc/ssl/certs/ca-certificates.crt'

In [3]:
from spacenet_stac.stac_item import spacenetStacItem

In [4]:
%%time
spacenetPath = "s3://spacenet-dataset/AOI_2_Vegas/srcData/rasterData/AOI_2_Vegas_MUL-PanSharpen_Cloud.tif"
spacenetPath2 = "/raid/nfs/data/Datasets/CosmiQ_SpaceNet_Src/AOI_2_Vegas/srcData/rasterData/AOI_2_Vegas_MUL-PanSharpen_Cloud.tif"
xmlTest = '/raid/nfs/data/Datasets/CosmiQ_SpaceNet_Src/AOI_2_Vegas/srcData/rasterData/Vegas_Imagery/15OCT22183656-P2AS-055649178030_01_P001.XML'

linkDict = {"self": {"rel":"self",
                    "href":spacenetPath},
            "collection": {"rel": "collection",
                          "href": "s3://spacenet-stac/AOI_2_Catalog.json"}
           }

spacenetCogPath = spacenetPath
spacenetIMD = 's3://spacenet-dataset/AOI_2_Vegas/srcData/rasterData/Vegas_Imagery/15OCT22183656-M2AS-055649178030_01_P001.XML'
thumbNailPath = 's3://spacenet-dataset/AOI_2_Vegas/srcData/rasterData/Vegas_Imagery/15OCT22183656-M2AS-055649178030_01_P001-BROWSE.JPG'
assetDict = {"MUL-PanSharpen": {"href": spacenetCogPath},
         "imd": {"href": spacenetIMD},
         "thumbnail": {"href": thumbNailPath}
         }
stac_Item = spacenetStacItem(rasterPath=spacenetPath2, 
                             provider='DigitalGlobe', 
                             license="CC 4.0 BY-SA", 
                            idStr='15OCT22183656-P2AS-055649178030_01_P001', 
                             assetDict=assetDict, 
                             imdPath=xmlTest, 
                             links=linkDict)


print(stac_Item.stac_item)
stac_Item.write_toJSON("/raid/nfs/workingDir/AOI_2_Vegas_-stac_v1.json")

  profile = dataset.profile


{'type': 'Polygon', 'coordinates': [[[-115.307518, 36.263959], [-115.307518, 36.121278], [-115.152608, 36.121278], [-115.152608, 36.263959], [-115.307518, 36.263959]]]}
{'id': '15OCT22183656-P2AS-055649178030_01_P001', 'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-115.307518, 36.263959], [-115.307518, 36.121278], [-115.152608, 36.121278], [-115.152608, 36.263959], [-115.307518, 36.263959]]]}, 'properties': {'eo:sun_azimuth': '1.640000000000000e+02', 'eo:cloud_cover': '0.000000000000000e+00', 'eo:off_nadir': '1.680000000000000e+01', 'eo:azimuth': '1.506000000000000e+02', 'eo:platform': 'WV03', 'eo:sun_elevation': '4.150000000000000e+01', 'eo:gsd': '3.330000000000000e-01', 'eo:crs': '', 'dg:catalog_id': '10400100137F4900', 'dg:platform': 'WV03', 'dg:product_level': 'LV2A', 'datetime': '2015-10-22T18:36:56.065137Z', 'provider': 'DigitalGlobe', 'license': 'CC 4.0 BY-SA'}, 'assets': {'MUL-PanSharpen': {'href': 's3://spacenet-dataset/AOI_2_Vegas/srcData/rasterData/AOI

#https://github.com/radiantearth/stac-spec/blob/dev/json-spec/examples/digitalglobe-sample.json
    

https://github.com/radiantearth/stac-spec/blob/dev/json-spec/examples/digitalglobe-sample.json

In [76]:
import xml.etree.ElementTree as ET
import json

def iterate_OverXML(root, recursionTagList=[], metaDataStruct={}):
    
    for child in root:
        if child.tag in recursionTagList:
            metaDataStruct = iterate_OverXML(child, recursionTagList, metaDataStruct)
        else:
            metaDataStruct[child.tag] = child.text
    
    return metaDataStruct
            

class spacenetStacItem:
    
    def __init__(self, rasterPath, provider, license, idStr, assetDict, imdPath=[], links=[]):
    
        self.rasterPath = rasterPath
        self.provider   = provider
        self.license    = license
        self.id         = idStr
        self.stac_item   = {"id": idStr,
                            "type": "Feature",
                            "geometry": self.calcGeometry()  
                           }
        
        if imdPath:
            self.stac_item['properties']=self.createProperties_EO(imdPath)
            
        self.stac_item['assets']=assetDict
        self.stac_item['links'] = links
            
        
        
        
        
        
    def calcGeometry(self):
        
        with rasterio.open(self.rasterPath) as dataset:
            profile = dataset.profile
                # Read the dataset's valid data mask as a ndarray.
            mask = dataset.dataset_mask()
            # Extract feature shapes and values from the array.
            for geom, val in features.shapes(
                    mask, transform=dataset.transform):

                # Transform shapes from the dataset's own coordinate
                # reference system to CRS84 (EPSG:4326).
                geom = rasterio.warp.transform_geom(
                    dataset.crs, 'EPSG:4326', geom, precision=6)

                # Print GeoJSON shapes to stdout.
                print(geom)
        
        self.geometry=geom
        
        return geom
        
    
    def createAssetList(self):
        pass
    
    
    def createProperties_EO(self, imdPath):
        eo_prop_dict = {}
        tree = ET.parse(imdPath)
        root = tree.getroot()
        self.metaDataStruct = iterate_OverXML(root, recursionTagList=['IMD', 'IMAGE'])
        self.eoDict = self.processMetaData_To_Properties(self.metaDataStruct, self.provider, self.license)
        
        return self.eoDict
        

    def processMetaData_To_Properties(self, metaDataStruct, provider, license):
        eoDict = {
            "eo:sun_azimuth": metaDataStruct['MEANSUNAZ'],
            "eo:cloud_cover": metaDataStruct['CLOUDCOVER'],
            "eo:off_nadir": metaDataStruct['MEANOFFNADIRVIEWANGLE'],
            "eo:azimuth": metaDataStruct['MEANSATAZ'],
            "eo:platform": metaDataStruct['SATID'],
            "eo:sun_elevation": metaDataStruct['MEANSUNEL'],
            "eo:gsd": metaDataStruct['MEANCOLLECTEDGSD'],
            "eo:crs": '',
            "dg:catalog_id": metaDataStruct['CATID'],
            "dg:platform": metaDataStruct['SATID'],
            "dg:product_level": metaDataStruct['PRODUCTLEVEL'],
            "datetime": metaDataStruct['FIRSTLINETIME'],
            "provider": provider,
            "license": license
            }

        return eoDict
        
    def write_toJSON(self, filename):
        
        with open(filename, 'w') as fp:
            json.dump(self.stac_item, fp, indent=2)
        
        
        

In [67]:
%%time
from rasterio import features
spacenetPath = "s3://spacenet-dataset/AOI_2_Vegas/srcData/rasterData/AOI_2_Vegas_MUL-PanSharpen_Cloud.tif"
spacenetPath2 = "/raid/nfs/data/Datasets/CosmiQ_SpaceNet_Src/AOI_2_Vegas/srcData/rasterData/AOI_2_Vegas_MUL-PanSharpen_Cloud.tif"
xmlTest = '/raid/nfs/data/Datasets/CosmiQ_SpaceNet_Src/AOI_2_Vegas/srcData/rasterData/Vegas_Imagery/15OCT22183656-P2AS-055649178030_01_P001.XML'

linkDict = {"self": {"rel":"self",
                    "href":spacenetPath},
            "collection": {"rel": "collection",
                          "href": "s3://spacenet-stac/AOI_2_Catalog.json"}
           }

spacenetCogPath = spacenetPath
spacenetIMD = 's3://spacenet-dataset/AOI_2_Vegas/srcData/rasterData/Vegas_Imagery/15OCT22183656-M2AS-055649178030_01_P001.XML'
thumbNailPath = 's3://spacenet-dataset/AOI_2_Vegas/srcData/rasterData/Vegas_Imagery/15OCT22183656-M2AS-055649178030_01_P001-BROWSE.JPG'
assetDict = {"MUL-PanSharpen": {"href": spacenetCogPath},
         "imd": {"href": spacenetIMD},
         "thumbnail": {"href": thumbNailPath}
         }
stac_Item = spacenetStacItem(rasterPath=spacenetPath2, provider='DigitalGlobe', license="CC 4.0 BY-SA", 
                              idStr='15OCT22183656-P2AS-055649178030_01_P001', assetDict=assetDict, imdPath=xmlTest, links=linkDict)


print(stac_Item.stac_item)
stac_Item.write_toJSON("/raid/nfs/workingDir/AOI_2_Vegas_-stac_v1.json")



{'type': 'Polygon', 'coordinates': [[[-115.307518, 36.263959], [-115.307518, 36.121278], [-115.152608, 36.121278], [-115.152608, 36.263959], [-115.307518, 36.263959]]]}
CPU times: user 39.6 s, sys: 22.9 s, total: 1min 2s
Wall time: 1min 2s


In [75]:
%%time
from rasterio import features
spacenetPath = "s3://spacenet-dataset/AOI_5_Khartoum/srcData/rasterData/AOI_5_Khartoum_MUL-PanSharpen_Cloud.tif"
spacenetPath2 = "/raid/nfs/data/Datasets/CosmiQ_SpaceNet_Src/AOI_5_Khartoum/srcData/rasterData/AOI_5_Khartoum_MUL-PanSharpen_Cloud.tif"
xmlTest = '/raid/nfs/data/Datasets/CosmiQ_SpaceNet_Src/AOI_5_Khartoum/srcData/rasterData/Khartoum_Imagery/15APR13081815-M2AS-055649178020_01_P001.XML'

linkDict = {"self": {"rel":"self",
                    "href":spacenetPath},
            "collection": {"rel": "collection",
                          "href": "s3://spacenet-stac/AOI_5_Catalog.json"}
           }

spacenetCogPath = spacenetPath
spacenetIMD = 's3://spacenet-dataset/AOI_5_Khartoum/srcData/rasterData/Khartoum_Imagery/15APR13081815-M2AS-055649178020_01_P001.XML'
thumbNailPath = 's3://spacenet-dataset/AOI_5_Khartoum/srcData/rasterData/Khartoum_Imagery/15APR13081815-M2AS-055649178020_01_P001-BROWSE.JPG'
assetDict = {"MUL-PanSharpen": {"href": spacenetCogPath},
         "imd": {"href": spacenetIMD},
         "thumbnail": {"href": thumbNailPath}
         }
stac_Item = spacenetStacItem(rasterPath=spacenetPath2, provider='DigitalGlobe', license="CC 4.0 BY-SA", 
                              idStr='15APR13081815-M2AS-055649178020_01_P001', assetDict=assetDict, imdPath=xmlTest, links=linkDict)


print(stac_Item.stac_item)
stac_Item.write_toJSON("/raid/nfs/workingDir/AOI_5_Vegas_-stac_v1.json")



{'type': 'Polygon', 'coordinates': [[[32.419148, 15.871988], [32.419148, 15.436591], [32.568782, 15.436591], [32.568782, 15.871988], [32.419148, 15.871988]]]}
{'id': '15APR13081815-M2AS-055649178020_01_P001', 'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[32.419148, 15.871988], [32.419148, 15.436591], [32.568782, 15.436591], [32.568782, 15.871988], [32.419148, 15.871988]]]}, 'properties': {'eo:sun_azimuth': '1.035000000000000e+02', 'eo:cloud_cover': '0.000000000000000e+00', 'eo:off_nadir': '2.430000000000000e+01', 'eo:azimuth': '1.143000000000000e+02', 'eo:platform': 'WV03', 'eo:sun_elevation': '6.650000000000000e+01', 'eo:gsd': '1.447000000000000e+00', 'eo:crs': '', 'dg:catalog_id': '104001000A6A1E00', 'dg:platform': 'WV03', 'dg:product_level': 'LV2A', 'datetime': '2015-04-13T08:18:08.557519Z', 'provider': 'DigitalGlobe', 'license': 'CC 4.0 BY-SA'}, 'assets': {'MUL-PanSharpen': {'href': 's3://spacenet-dataset/AOI_5_Khartoum/srcData/rasterData/AOI_5_Khartoum_MUL-P

In [71]:
print(stac_Item.stac_item)
stac_Item.write_toJSON("/raid/nfs/workingDir/AOI_2_Vegas_-stac_v1.json")

{'id': '15OCT22183656-P2AS-055649178030_01_P001', 'type': 'Feature', 'geometry': {'type': 'Polygon', 'coordinates': [[[-115.307518, 36.263959], [-115.307518, 36.121278], [-115.152608, 36.121278], [-115.152608, 36.263959], [-115.307518, 36.263959]]]}, 'properties': {'eo:sun_azimuth': '1.640000000000000e+02', 'eo:cloud_cover': '0.000000000000000e+00', 'eo:off_nadir': '1.680000000000000e+01', 'eo:azimuth': '1.506000000000000e+02', 'eo:platform': 'WV03', 'eo:sun_elevation': '4.150000000000000e+01', 'eo:gsd': '3.330000000000000e-01', 'eo:crs': '', 'dg:catalog_id': '10400100137F4900', 'dg:platform': 'WV03', 'dg:product_level': 'LV2A', 'datetime': '2015-10-22T18:36:56.065137Z', 'provider': 'DigitalGlobe', 'license': 'CC 4.0 BY-SA'}, 'assets': {'MUL-PanSharpen': {'href': 's3://spacenet-dataset/AOI_2_Vegas/srcData/rasterData/AOI_2_Vegas_MUL-PanSharpen_Cloud.tif'}, 'imd': {'href': 's3://spacenet-dataset/AOI_2_Vegas/srcData/rasterData/Vegas_Imagery/15OCT22183656-M2AS-055649178030_01_P001.XML'}, '

In [12]:
with rasterio.open(spacenetPath2) as dataset:
    profile = dataset.profile
        # Read the dataset's valid data mask as a ndarray.
    mask = dataset.dataset_mask()
    # Extract feature shapes and values from the array.
    for geom, val in features.shapes(
            mask, transform=dataset.transform):

        # Transform shapes from the dataset's own coordinate
        # reference system to CRS84 (EPSG:4326).
        geom = rasterio.warp.transform_geom(
            dataset.crs, 'EPSG:4326', geom, precision=6)

        # Print GeoJSON shapes to stdout.
        print(geom)



{'type': 'Polygon', 'coordinates': [[[-115.307518, 36.263959], [-115.307518, 36.121278], [-115.152608, 36.121278], [-115.152608, 36.263959], [-115.307518, 36.263959]]]}
CPU times: user 37.1 s, sys: 49.7 s, total: 1min 26s
Wall time: 1min 28s


In [11]:
from rasterio import features


In [8]:
%%time


IndentationError: unexpected indent (<unknown>, line 2)

In [3]:
rasterio.__version__

'1.0b1'

In [3]:
src.profile

NameError: name 'src' is not defined

In [7]:
profile

{'driver': 'GTiff', 'dtype': 'uint16', 'nodata': None, 'width': 57374, 'height': 52845, 'count': 8, 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(2.700000010326571e-06, 0.0, -115.30751760000003,
       0.0, -2.6999999973548978e-06, 36.2639592), 'blockxsize': 512, 'blockysize': 512, 'tiled': True, 'compress': 'deflate', 'interleave': 'pixel'}

In [20]:
import xml.etree.ElementTree as ET

tree = ET.parse('/raid/nfs/data/Datasets/CosmiQ_SpaceNet_Src/AOI_2_Vegas/srcData/rasterData/Vegas_Imagery/15OCT22183656-P2AS-055649178030_01_P001.XML')
root = tree.getroot()

In [14]:
root.tag

'isd'

In [39]:
import xml.etree.ElementTree as ET
xmlTest = '/raid/nfs/data/Datasets/CosmiQ_SpaceNet_Src/AOI_2_Vegas/srcData/rasterData/Vegas_Imagery/15OCT22183656-P2AS-055649178030_01_P001.XML'
def iterate_OverXML(root, recursionTagList=[], metaDataStruct={}):
    
    for child in root:
        if child.tag in recursionTagList:
            metaDataStruct = iterate_OverXML(child, recursionTagList, metaDataStruct)
        else:
            metaDataStruct[child.tag] = child.text
    
    return metaDataStruct
            
tree = ET.parse(xmlTest)
root = tree.getroot()
metaDataStruct = iterate_OverXML(root, recursionTagList=['IMD', 'IMAGE'])
print(metaDataStruct)
metaDataStruct

{'VERSION': '28.0', 'GENERATIONTIME': '2016-08-26T22:41:55.000000Z', 'PRODUCTORDERID': '055649178030_01_P001', 'PRODUCTCATALOGID': 'A0100102027B8500', 'CHILDCATALOGID': '20300102027B8400', 'IMAGEDESCRIPTOR': 'Standard2A', 'BANDID': 'P', 'PANSHARPENALGORITHM': 'None', 'NUMROWS': '52845', 'NUMCOLUMNS': '57374', 'PRODUCTLEVEL': 'LV2A', 'PRODUCTTYPE': 'Standard', 'NUMBEROFLOOKS': '1', 'RADIOMETRICLEVEL': 'Corrected', 'RADIOMETRICENHANCEMENT': 'Off', 'BITSPERPIXEL': '16', 'COMPRESSIONTYPE': 'None', 'OUTPUTFORMAT': 'GeoTIFF', 'BAND_P': '\n\t\t\t', 'SATID': 'WV03', 'MODE': 'FullSwath', 'SCANDIRECTION': 'Forward', 'CATID': '10400100137F4900', 'FIRSTLINETIME': '2015-10-22T18:36:56.065137Z', 'AVGLINERATE': '2.000007000000000e+04', 'EXPOSUREDURATION': '5.000000000000000e-05', 'MINCOLLECTEDROWGSD': '3.370000000000000e-01', 'MAXCOLLECTEDROWGSD': '3.370000000000000e-01', 'MEANCOLLECTEDROWGSD': '3.370000000000000e-01', 'MINCOLLECTEDCOLGSD': '3.290000000000000e-01', 'MAXCOLLECTEDCOLGSD': '3.3000000000

{'VERSION': '28.0',
 'GENERATIONTIME': '2016-08-26T22:41:55.000000Z',
 'PRODUCTORDERID': '055649178030_01_P001',
 'PRODUCTCATALOGID': 'A0100102027B8500',
 'CHILDCATALOGID': '20300102027B8400',
 'IMAGEDESCRIPTOR': 'Standard2A',
 'BANDID': 'P',
 'PANSHARPENALGORITHM': 'None',
 'NUMROWS': '52845',
 'NUMCOLUMNS': '57374',
 'PRODUCTLEVEL': 'LV2A',
 'PRODUCTTYPE': 'Standard',
 'NUMBEROFLOOKS': '1',
 'RADIOMETRICLEVEL': 'Corrected',
 'RADIOMETRICENHANCEMENT': 'Off',
 'BITSPERPIXEL': '16',
 'COMPRESSIONTYPE': 'None',
 'OUTPUTFORMAT': 'GeoTIFF',
 'BAND_P': '\n\t\t\t',
 'SATID': 'WV03',
 'MODE': 'FullSwath',
 'SCANDIRECTION': 'Forward',
 'CATID': '10400100137F4900',
 'FIRSTLINETIME': '2015-10-22T18:36:56.065137Z',
 'AVGLINERATE': '2.000007000000000e+04',
 'EXPOSUREDURATION': '5.000000000000000e-05',
 'MINCOLLECTEDROWGSD': '3.370000000000000e-01',
 'MAXCOLLECTEDROWGSD': '3.370000000000000e-01',
 'MEANCOLLECTEDROWGSD': '3.370000000000000e-01',
 'MINCOLLECTEDCOLGSD': '3.290000000000000e-01',
 'MAXC

In [24]:
provider='DigitalGlobe'
license="'CC 4.0 SA' '(C) COPYRIGHT 2016 DigitalGlobe, Inc., Longmont CO USA 80503'"

def processMetaData_To_Properties(metaDataStruct, provider, license):
    eoDict = {
        "eo:sun_azimuth": metaDataStruct['MEANSUNAZ'],
        "eo:cloud_cover": metaDataSturct['CLOUDCOVER'],
        "eo:off_nadir": metaDataStruct['MEANOFFNADIRVIEWANGLE'],
        "eo:azimuth": metaDataStruct['MEANSATAZ'],
        "eo:platform": metaDataStruct['SATID'],
        "eo:sun_elevation": metaDataStruct['MEANSUNEL'],
        "eo:gsd": metaDataStruct['MEANCOLLECTEDGSD'],
        "eo:crs": null,
        "dg:catalog_id": metaDataStruct['CATID'],
        "dg:platform": metaDataStruct['SATID'],
        "dg:product_level": metaDataStruct['PRODUCTLEVEL'],
        "datetime": metaDataStruct['FIRSTLINETIME'],
        "provider": provider,
        "license": license
        }
    
    return eoDict

In [27]:
for child in imgChild:
    print(child.tag, child.attrib, child.text)

VERSION {} 28.0
GENERATIONTIME {} 2016-08-26T22:41:55.000000Z
PRODUCTORDERID {} 055649178030_01_P001
PRODUCTCATALOGID {} A0100102027B8500
CHILDCATALOGID {} 20300102027B8400
IMAGEDESCRIPTOR {} Standard2A
BANDID {} P
PANSHARPENALGORITHM {} None
NUMROWS {} 52845
NUMCOLUMNS {} 57374
PRODUCTLEVEL {} LV2A
PRODUCTTYPE {} Standard
NUMBEROFLOOKS {} 1
RADIOMETRICLEVEL {} Corrected
RADIOMETRICENHANCEMENT {} Off
BITSPERPIXEL {} 16
COMPRESSIONTYPE {} None
OUTPUTFORMAT {} GeoTIFF
BAND_P {} 
			
IMAGE {} 
			
MAP_PROJECTED_PRODUCT {} 
			


In [30]:
root[0][0].tag

'VERSION'

'11'

In [5]:
5000/2

2500.0

In [6]:
5000/4

1250.0

In [7]:
512/500

1.024

In [8]:
5000/512

9.765625