### Modis Download and Processing
Script designed to process Modis images and save them one by one.

1. Find vtiles/htiles/dates for each data point
2. define windows around them, cut image
3. save images 

Potentially intended to offload eventually to azure vms

In [110]:
import pandas as pd
import geojson as gsn
from pyproj import Proj
from osgeo import gdal

import tempfile
import wget
import math
import numpy as np
import matplotlib.pyplot as plt
import os

import rioxarray as rxr
from azure.storage.blob import ContainerClient

modis_account_name = 'modissa'
modis_container_name = 'modis-006'
modis_account_url = 'https://' + modis_account_name + '.blob.core.windows.net/'
modis_blob_root = modis_account_url + modis_container_name + '/'

# This file is provided by NASA; it indicates the lat/lon extents of each
# NOTE: this was from tutorial, not actually helpful because unprojected?

modis_tile_extents_url = modis_blob_root + 'sn_bound_10deg.txt'

temp_dir = os.path.join(tempfile.gettempdir(),'modis')
os.makedirs(temp_dir,exist_ok=True)
fn = os.path.join(temp_dir,modis_tile_extents_url.split('/')[-1])
# wget.download(modis_tile_extents_url, fn)


modis_container_client = ContainerClient(account_url=modis_account_url, 
                                         container_name=modis_container_name,
                                                  credential=None)

  0% [                                                                              ]     0 / 32585 25% [...................                                                           ]  8192 / 32585 50% [.......................................                                       ] 16384 / 32585 75% [..........................................................                    ] 24576 / 32585100% [..............................................................................] 32585 / 32585

### see above TODO


In [112]:
def lat_lon_to_modis_tile(lat,lon):
    '''converts lat lon to modis tiles but reconstructing grid and its projection'''
    
    CELLS = 2400
    VERTICAL_TILES = 18
    HORIZONTAL_TILES = 36
    EARTH_RADIUS = 6371007.181
    EARTH_WIDTH = 2 * math.pi * EARTH_RADIUS

    TILE_WIDTH = EARTH_WIDTH / HORIZONTAL_TILES
    TILE_HEIGHT = TILE_WIDTH
    CELL_SIZE = TILE_WIDTH / CELLS
    
    MODIS_GRID = Proj(f'+proj=sinu +R={EARTH_RADIUS} +nadgrids=@null +wktext')
    
    x, y = MODIS_GRID(lon, lat)
    h = (EARTH_WIDTH * .5 + x) / TILE_WIDTH
    v = -(EARTH_WIDTH * .25 + y - (VERTICAL_TILES - 0) * TILE_HEIGHT) / TILE_HEIGHT
    
    return int(h), int(v)


def list_blobs_in_folder(container_name,folder_name):
    """
    List all blobs in a virtual folder in an Azure blob container
    """
    
    files = []
    generator = modis_container_client.list_blobs(name_starts_with=folder_name)
    for blob in generator:
        files.append(blob.name)
    return files
        
    
def list_hdf_blobs_in_folder(container_name,folder_name):
    """"
    List .hdf files in a folder
    """
    
    files = list_blobs_in_folder(container_name,folder_name)
    files = [fn for fn in files if fn.endswith('.hdf')]
    return files

Ingest training + testing geodata and timestamps

Note: paths are currently absolute, but happy to make them work on both machines

In [15]:
path = "C:/Users/Matt/Documents/Python Scripts/SnowComp/dat/grid_cells.geojson"
with open(path) as f:
    gj = gsn.load(f)
print(len(gj['features']))

18130


Estimate centroids for lat_lon calculations by taking mean of points (not actual centroid because of projection and great circle distance?)

In [42]:
centroids = {} #cellid : centroid
tiles = {}


for cell in range(len(gj['features'])):
    assert len(gj['features'][cell]['geometry']['coordinates'][0]) == 5 #coordinates have repeat on fifth, make sure this is universal
    
    cell_id =gj['features'][cell]['properties']['cell_id']
    centroid = list(np.mean(
        gj['features'][cell]['geometry']['coordinates'][0][0:4],
        axis = 0)) #lazy centroid calculation
    centroids[cell_id] = centroid
    
    tiles[cell_id] = lat_lon_to_modis_tile(centroid[1], centroid[0])

### Reproject from SR-ORG:6974 to EPSG:4326

Note, maybe do with xarray, src ?

In [140]:
product = 'MCD43A4'
daynum = '2014236'
folder = product + '/' + '{:0>2d}/{:0>2d}'.format(8,5) + '/' + daynum

# Find all HDF files from this tile on this day
filenames = list_hdf_blobs_in_folder(modis_container_name,folder)
print('Found {} matching file(s):'.format(len(filenames)))
for fn in filenames:
    print(fn)
file_root = filenames.copy()
    
# Work with the first returned URL
blob_name = filenames[0]

# Download to a temporary file
url = modis_blob_root + blob_name

filename = os.path.join(temp_dir,blob_name.replace('/','_'))
if not os.path.isfile(filename):
    wget.download(url,filename)

Found 1 matching file(s):
MCD43A4/08/05/2014236/MCD43A4.A2014236.h08v05.006.2016153121230.hdf


In [156]:
file_root[0] = file_root[0].replace('.', '-').replace('/','-')

In [161]:
assert len(file_root) == 1 # future these will come in multiples, not currently handled
gdal.UseExceptions()

input_raster = gdal.Open(filename)
output_raster = "C:/Users/Matt/Pictures/"+file_root[0] + "-epsg4326"+".tif" #cutoff ending, reapply
warp = gdal.Warp(output_raster, input_raster,
                 options=gdal.WarpOptions(dstSRS='EPSG:4326', format ="GTiff"))
#                  options=gdal.WarpOptions(srcSRS ='SR-ORG:6974' , dstSRS='EPSG:4326'))
warp = None # Closes the files

RuntimeError: Input file C:\Users\Matt\AppData\Local\Temp\modis\MCD43A4_08_05_2014236_MCD43A4.A2014236.h08v05.006.2016153121230.hdf has no raster bands.

### Sanity check of image

In [119]:
# print(ds.x.min())
# print(ds.y.max())

<xarray.DataArray 'x' ()>
array(-11119273.54030874)
Coordinates:
    spatial_ref  int32 0
<xarray.DataArray 'y' ()>
array(4447570.42230874)
Coordinates:
    spatial_ref  int32 0


In [137]:
ds = rxr.open_rasterio(output_raster)

norm_value = 5000
r = ds['Nadir_Reflectance_Band1'].values.squeeze() / norm_value
g = ds['Nadir_Reflectance_Band4'].values.squeeze() / norm_value
b = ds['Nadir_Reflectance_Band3'].values.squeeze() / norm_value
rgb = np.dstack((r,g,b))

np.clip(rgb,0,1,rgb)
fig = plt.figure(frameon=False); ax = plt.Axes(fig,[0., 0., 1., 1.])
ax.set_axis_off(); fig.add_axes(ax)
plt.imshow(rgb);

RasterioIOError: C:/Users/Matt/AppData/Local/Temp/modis/MCD43A4_08_05_2014236_MCD43A4.A2014236.h08v05.006.2016153121230.epsg4326.hdf: No such file or directory