## Convert MVS Dataset hosted on SpaceNet to be Stac Compliant

In [None]:
import rasterio
import shapely
import sys,os,os.path

# Rasterio python expect ssl certs in Centos location
os.environ['CURL_CA_BUNDLE']='/etc/ssl/certs/ca-certificates.crt'

In [None]:
from stac_tools.stac_item import spacenetStacItem


imtypeDict = {'P1BS': "Pan Band 1B",
              'A1BS': "SWIR Band 1B",
              "M1BS": "Multi-Spectral 1B"
             }
imextDict = {'.tar': "Meta Data Archive",
             '.NTF': 'NITF',
             '.rm' : "RM metadata file",
             '.tif': "COG",
             '.vrt': "vrt"
            }

def write_assetDict(df, imextDict=imextDict, imtypeDict=imtypeDict):
    
    assetDict = {}
    for idx, row in df.iterrows():
        if row['ext'] == '.vrt':
            pass
        else:
            assetDict.update({row['basename']: {
                                            "href": row['s3loc'],
                                            "type": row['ext'],
                                            "name": imtypeDict[row['imtype']] + " " + imextDict[row['ext']]
                                            }})

        
    return assetDict
    



    
    
def write_linkDict(stac_path, catalog_link="", collection_link=""):
    linkDict = {"self": {"rel":"self",
                        "href":stac_path},
               }
    
    if collection_link != "":
        linkDict.update({"collection": {"rel": "collection",
                              "href": collection_link}
                        }
                       )
    
    if catalog_link != "":
        linkDict.update({"catalog": {"rel": "catalog",
                              "href": catalog_link}
                        }
                       )
        
    
    
    return linkDict




def writeStac_Item(out_file, assetDF, stac_path, cog_path, thumbnail_path, imd_path=[], vrtPath=[] ,catalog_path='', collection_path='', idStr=[]):
    
    
    linkDict = write_linkDict(stac_path, catalog_link=catalog_path, collection_link=collection_path)
    
    assetDict = write_assetDict(assetDF)
    
    if idStr:
        pass
    else: 
        idStr = os.path.splitext(os.path.basename(imd_path))[0]
    stac_Item = spacenetStacItem(rasterPath=cog_path, 
                                 provider='DigitalGlobe', 
                                 license="Commercial satellite imagery in the MVS benchmark data set was provided courtesy of DigitalGlobe.", 
                                idStr=idStr, 
                                 assetDict=assetDict, 
                                 imdPath=imd_path, 
                                 vrtPath=vrtPath,
                                 links=linkDict)
    
    stac_Item.write_toJSON(out_file)

    
    return stac_Item


In [None]:
## Collect imagery Summary

from os import walk
import os
f = []
ftype = []
mypath = '/raid/nfs/data/Datasets/CosmiQ_General_Study/MVS_Dataset/WV3/'
for (dirpath, dirnames, filenames) in walk(mypath):
    print(os.path.basename(dirpath))
    
    f.extend([os.path.join(dirpath, file) for file in filenames])
    ftype.extend([os.path.basename(dirpath) for file in filenames])

basename_list = []
ext_list      = []

cell_list = {}
idstr_list = []
imtype_list = []
sloc_list = []
for file in f:
    basename = os.path.basename(file)
    basesplit = basename.split('-')
    idStr = basesplit[0]
    imType = basesplit[1]
    
    
    idstr_list.append(basesplit[0])
    imtype_list.append(basesplit[1])
    basename_list.append(os.path.basename(file))
    ext_list.append(os.path.splitext(file)[1])
    sloc_list.append(file.replace('/raid/nfs/data/Datasets/CosmiQ_General_Study/MVS_Dataset/', 's3://spacenet-dataset/mvs_dataset/'))
    
import pandas as pd

dataList = {'idstr': idstr_list,
           "imtype": imtype_list,
           "basename": basename_list,
           "ext": ext_list,
           "filepath": f,
            "s3loc": sloc_list
           }
df = pd.DataFrame(dataList)

In [None]:
localSave = "/raid/nfs/workingDir/spacenet-stac/mvs-dataset/"
idstr = df['idstr'].unique()[3]

In [None]:
from tqdm import tqdm
itemList = []
# Create Pan Band Item
for idstr in tqdm(df['idstr'].unique()):
    print(idstr)
    tmpDF = df[df['idstr']==idstr]

    try:
        imgType = 'P1BS'
        assetDF = tmpDF[tmpDF['imtype']==imgType]
        #print(assetDF.head())
        stac_path = "{}_{}.json".format(idstr, imgType)
        out_file = os.path.join("/raid/nfs/workingDir/dlindenbaum/spacenet-stac/spacenet-stac/mvs-dataset/", stac_path)
        print(out_file)
        cog_path = assetDF[assetDF['ext']=='.vrt']['filepath'].values[0]
        thumbnail_path = ''
        vrt_path = cog_path
        #print(cog_path)
        catalog_path = "../mvs-dataset.json"
        collection_path = "../spacenet-collections/spacenet-WV3-1B.json"
        imd_path = []

        writeStac_Item(out_file, assetDF, stac_path, cog_path, thumbnail_path, imd_path, vrt_path, catalog_path, collection_path, idstr)
        itemList.append(out_file)
    except:
        print("ERROR: {}".format(stac_path))


    try:
        imgType = 'M1BS'
        assetDF = tmpDF[tmpDF['imtype']==imgType]
        #print(assetDF.head())
        stac_path = "{}_{}.json".format(idstr, imgType)
        out_file = os.path.join("/raid/nfs/workingDir/dlindenbaum/spacenet-stac/spacenet-stac/mvs-dataset/", stac_path)
        cog_path = assetDF[assetDF['ext']=='.vrt']['filepath'].values[0]
        thumbnail_path = ''
        vrt_path = cog_path
        #print(cog_path)
        catalog_path = "../mvs-dataset.json"
        collection_path = "../spacenet-collections/spacenet-WV3-1B.json"
        imd_path = []
        writeStac_Item(out_file, assetDF, stac_path, cog_path, thumbnail_path, imd_path, vrt_path, catalog_path, collection_path, idstr)
        itemList.append(out_file)
    except:
        
        print("ERROR: {}".format(stac_path))








## Get Pan DataFrame

## Read PanVRT

## Process EO Tags

In [None]:
df['idstr'].unique()

In [None]:
catalog_Dict = {'name': 'Multi-View Stereo Dataset',
               "description": r"The availability of public multiple view stereo (MVS) benchmark datasets has been instrumental in enabling research to advance the state of the art in the field and to apply and customize methods to real-world problems. In this work, we provide a public benchmark data set for multiple view stereo applied to 3D outdoor scene mapping using commercial satellite imagery. This data set includes DigitalGlobe WorldView-3 panchromatic and multispectral images of a 100 square kilometer area near San Fernando, Argentina. We also provide 20cm airborne lidar ground truth data for a 20 square kilometer subset of this area and performance analysis software to assess accuracy and completeness metrics. Commercial satellite imagery is provided courtesy of DigitalGlobe, and ground truth lidar is provided courtesy of IARPA.This data supported the IARPA Multi-View Stereo 3D Mapping Challenge and is now made publicly available with no restrictions to support continued research. JHU/APL does not plan to maintain an online benchmark leaderboard, but we welcome your feedback and would love to hear about what you’re doing with the data and include your published results on this page.  SpaceNet is hosting the Multi-View Stereo 3D Mapping dataset in the spacenet repository to ensure easy access to the data.",
               "license": {"name": "Commercial satellite imagery in the MVS benchmark data set was provided courtesy of DigitalGlobe."},
               "contact": {
        "name": "SpaceNet Team",
        "email": "@dlindenbaum",
        "url": "http://spacenetchallenge.github.io"
    },
                "formats": ["geotiff", "cog", "NITF"],

    "keywords": ["aerial", "machine-learning", "deep learning", "Stereo", "LIDAR"],
    "homepage": "https://spacenetchallenge.github.io/datasets/mvs_summary.html",

    "provider": {
        "scheme": "s3",
        "region": "us-east-1",
        "requesterPays": "false"
    }
               
               }



In [None]:
linkList = []
linkList.append({'rel': "self",
               "href": "mvs-dataset.json"})
for item in itemList:
    
    linkItem = {'rel': "item",
               "href": "mvs-dataset/{}".format(os.path.basename(item))}
    
    linkList.append(linkItem)
    
catalog_Dict.update({"links": linkList})

import json
with open("../spacenet-stac/mvs-dataset.json", 'w') as fp:
    json.dump(catalog_Dict, fp)



#https://github.com/radiantearth/stac-spec/blob/dev/json-spec/examples/digitalglobe-sample.json
    

https://github.com/radiantearth/stac-spec/blob/dev/json-spec/examples/digitalglobe-sample.json

In [None]:
from os import walk
import os
f = []
ftype = []
mypath = '/raid/nfs/data/Datasets/CosmiQ_General_Study/MVS_Dataset/WV3/'
for (dirpath, dirnames, filenames) in walk(mypath):
    print(os.path.basename(dirpath))
    
    f.extend([os.path.join(dirpath, file) for file in filenames])
    ftype.extend([os.path.basename(dirpath) for file in filenames])

basename_list = []
ext_list      = []

cell_list = {}
idstr_list = []
imtype_list = []
sloc_list = []
for file in f:
    basename = os.path.basename(file)
    basesplit = basename.split('-')
    idStr = basesplit[0]
    imType = basesplit[1]
    
    
    idstr_list.append(basesplit[0])
    imtype_list.append(basesplit[1])
    basename_list.append(os.path.basename(file))
    ext_list.append(os.path.splitext(file)[1])
    sloc_list.append(file.replace('/raid/nfs/data/Datasets/CosmiQ_General_Study/MVS_Dataset/', 's3://spacenet-dataset/mvs_dataset/'))
    
import pandas as pd

dataList = {'idstr': idstr_list,
           "imtype": imtype_list,
           "basename": basename_list,
           "ext": ext_list,
           "filepath": f,
            "s3loc": sloc_list
           }
df = pd.DataFrame(dataList)

In [None]:
print(df.columns)
print(df.shape)
df.head()

In [None]:
itemList

In [None]:
print(df['imtype'].unique())
imtypeDict = {'P1BS': "Pan Band 1B",
              'A1BS': "SWIR Band 1B",
              "M1BS": "Multi-Spectral 1B"
             }
print(df['ext'].unique())
imextDict = {'.tar': "Meta Data Archive",
             '.NTF': 'NITF',
             '.rm' : "RM metadata file",
             '.tif': "COG"
            }

In [None]:
for idstr in df['idstr'].unique():
    tmpDF = df[df['idstr']==idstr]
    

In [None]:
tmpDF = df[df['idstr']==df['idstr'].unique()[0]]
print(tmpDF['filepath'].values[0])

In [None]:
idstr = df['idstr'].unique()[3]

In [None]:
tmpDF = df[df['idstr']==idstr]

# Create Pan Band Item
imDF = tmpDF[tmpDF['imtype']=='P1BS']
vrtFilePath = imDF[imDF['ext']=='.vrt']['filepath'].values[0]
print(vrtFilePath)
imDF.head()

## Get Pan DataFrame

## Read PanVRT

## Process EO Tags

In [None]:
src = rasterio.open(vrtFilePath)

In [None]:
src.meta
tags = src.tags()

In [None]:
tags['NITF_USE00A_SUN_AZ']

In [None]:
tags

In [None]:
src = rasterio.open("/raid/nfs/data/Datasets/CosmiQ_General_Study/MVS_Dataset/WV3/PAN/30JUN15WV031000015JUN30135323-P1BS-500497282080_01_P001_________AAE_0AAAAABPABP0.NTF")

In [None]:
tagtest = src.tags()

In [None]:
tagtest.keys()

In [None]:
tagtest

In [None]:
from tqdm import tqdm
import subprocess
cogLoc = "/raid/nfs/data/Datasets/CosmiQ_General_Study/MVS_Dataset/WV3/COG/"

smallDF = df[df['ext']=='.NTF']

for filepath, basename in tqdm(zip(smallDF['filepath'].values, smallDF['basename'].values)):
    print(basename)
    cmd = ['gdalwarp', '-rpc', '-of', 'VRT', filepath, os.path.join(cogLoc, os.path.splitext(basename)[0]+".vrt")]
    subprocess.run(cmd, stdout=subprocess.PIPE)

In [None]:
df.head()

In [None]:
from datettime import datetime, date, time

In [None]:
d = date()

In [None]:
with rasterio.open("/raid/nfs/data/Datasets/CosmiQ_General_Study/MVS_Dataset/WV3/COG/01SEP15WV031000015SEP01135603-M1BS-500497284040_01_P001_________GA_E0AAAAAAKAAK0.vrt") as src:
    tags = src.tags()

In [None]:
tags

In [None]:
tags['NITF_STDIDC_ACQUISITION_DATE']

In [None]:
from datetime import datetime, date, time
dt = datetime.strptime('20150901135603', "%Y%m%d%H%M%S")
dt.isoformat('T')