# This notebook is for calculating mineral indices using Landsat 8 data

In [1]:
%pylab notebook
import datacube
import xarray as xr
from datacube.storage import masking
from datacube.storage.masking import mask_to_dict
from matplotlib.backends.backend_pdf import PdfPages
from matplotlib import pyplot as plt
import matplotlib.dates
from IPython.display import display
import ipywidgets as widgets
import rasterio
from datacube.storage.storage import write_dataset_to_netcdf

Populating the interactive namespace from numpy and matplotlib


In [2]:
dc = datacube.Datacube(app='dc-show changes in annual mean NDVI values')

In [3]:
#### DEFINE SPATIOTEMPORAL RANGE AND BANDS OF INTEREST
#Use this to manually define an upper left/lower right coords
#Define temporal range
start_of_epoch = '2013-01-01'
end_of_epoch =  '2016-12-31'

#Define wavelengths/bands of interest, remove this kwarg to retrieve all bands
bands_of_interest = ['blue',
                     'green',
                     'red', 
                     'nir',
                     'swir1', 
                     'swir2'
                     ]
sensors = ['ls8'], #May 2013 to present 
query = {'time': (start_of_epoch, end_of_epoch),}


lat_max = -15.2
lat_min = -15.5
lon_max = 128.3
lon_min = 128.6  
query['x'] = (lon_min, lon_max)
query['y'] = (lat_max, lat_min)
query['crs'] = 'EPSG:4326'

In [4]:
print (query)

{'time': ('2013-01-01', '2016-12-31'), 'x': (128.6, 128.3), 'y': (-15.2, -15.5), 'crs': 'EPSG:4326'}


## PQ and Index preparation


In [5]:
##This may have been superseded 
#Group PQ by solar day to avoid idiosyncracies of N/S overlap differences in PQ algorithm performance
pq_albers_product = dc.index.products.get_by_name('ls8_pq_albers')
valid_bit = pq_albers_product.measurements['pixelquality']['flags_definition']['contiguous']['bits']

def pq_fuser(dest, src):
    valid_val = (1 << valid_bit)

    no_data_dest_mask = ~(dest & valid_val).astype(bool)
    np.copyto(dest, src, where=no_data_dest_mask)

    both_data_mask = (valid_val & dest & src).astype(bool)
    np.copyto(dest, src & dest, where=both_data_mask)

In [6]:
#Define which pixel quality artefacts you want removed from the results
mask_components = {'cloud_acca':'no_cloud',
'cloud_shadow_acca' :'no_cloud_shadow',
'cloud_shadow_fmask' : 'no_cloud_shadow',
'cloud_fmask' :'no_cloud',
'blue_saturated' : False,
'green_saturated' : False,
'red_saturated' : False,
'nir_saturated' : False,
'swir1_saturated' : False,
'swir2_saturated' : False,
'contiguous':True}

In [7]:
#Retrieve the NBAR and PQ data for sensor n
sensor_clean = {}
for sensor in sensors:
    sensor_nbar = dc.load(product= 'ls8_nbar_albers', group_by='solar_day', measurements = bands_of_interest,  **query)
    sensor_pq = dc.load(product= 'ls8_pq_albers', group_by='solar_day', fuse_func=pq_fuser, **query)
    crs = sensor_nbar.crs
    crswkt = sensor_nbar.crs.wkt
    affine = sensor_nbar.affine
    cloud_free = masking.make_mask(sensor_pq, **mask_components)
    good_data = cloud_free.pixelquality.loc[start_of_epoch:end_of_epoch]
    sensor_nbar = sensor_nbar.where(good_data)


In [8]:
sensor_nbar

<xarray.Dataset>
Dimensions:  (time: 78, x: 1336, y: 1355)
Coordinates:
  * time     (time) datetime64[ns] 2013-04-26T01:31:31 ...
  * y        (y) float64 -1.618e+06 -1.618e+06 -1.618e+06 -1.618e+06 ...
  * x        (x) float64 -4.007e+05 -4.007e+05 -4.007e+05 -4.007e+05 ...
Data variables:
    blue     (time, y, x) float64 509.0 535.0 511.0 481.0 474.0 506.0 523.0 ...
    green    (time, y, x) float64 709.0 730.0 700.0 652.0 665.0 707.0 711.0 ...
    red      (time, y, x) float64 920.0 952.0 907.0 862.0 870.0 921.0 935.0 ...
    nir      (time, y, x) float64 1.28e+03 1.299e+03 1.265e+03 1.22e+03 ...
    swir1    (time, y, x) float64 822.0 900.0 891.0 794.0 821.0 843.0 803.0 ...
    swir2    (time, y, x) float64 374.0 464.0 445.0 367.0 367.0 399.0 394.0 ...
Attributes:
    crs:      EPSG:3577

In [9]:
#Calculate annual average NDVI values
#but you'll need to update the code below here accordingly
annual_mean_iron_oxide_ratio = annual_iron_oxide_ratio.median(dim = 'time')

NameError: name 'annual_iron_oxide_ratio' is not defined

## Plotting the iron oxide ratio

In [10]:
iron_oxide_ratio = sensor_nbar.red/sensor_nbar.blue
iron_oxide_ratio_stats = iron_oxide_ratio.max(dim = 'time') #you can change .median to .min, .mean, .max
fig = plt.figure()
plt.title('Maximum  Iron Oxide ratio')
plt.imshow(iron_oxide_ratio_stats, vmin = 0, vmax = 3,
           extent=[iron_oxide_ratio_stats.coords['x'].min(), iron_oxide_ratio_stats.coords['x'].max(), 
                   iron_oxide_ratio_stats.coords['y'].min(), iron_oxide_ratio_stats.coords['y'].max()])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7f5382a9c780>

## Plotting the ferrous iron ratio

In [11]:
ferrous_iron = sensor_nbar.swir1/sensor_nbar.nir
ferrous_iron_stats = ferrous_iron.max(dim = 'time') #you can change .median to .min, .mean, .max
fig = plt.figure()
plt.title('Maximum Ferrous Iron ratio')
plt.imshow(ferrous_iron_stats, vmin = 0, vmax  = 2,
           extent=[iron_oxide_ratio_stats.coords['x'].min(), iron_oxide_ratio_stats.coords['x'].max(), 
                   iron_oxide_ratio_stats.coords['y'].min(), iron_oxide_ratio_stats.coords['y'].max()])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7f5382d8d668>

## Plotting the Clay NIR/SWIR ratio

In [12]:
clay_nirswir = sensor_nbar.nir/sensor_nbar.swir1
clay_nirswir_stats = clay_nirswir.median(dim = 'time') #you can change .median to .min, .mean, .max
fig = plt.figure()
plt.title('Maximum Clay NIR/SWIR ratio')
plt.imshow(clay_nirswir_stats, vmin = 0, vmax = 1.2,
           extent=[iron_oxide_ratio_stats.coords['x'].min(), iron_oxide_ratio_stats.coords['x'].max(), 
                   iron_oxide_ratio_stats.coords['y'].min(), iron_oxide_ratio_stats.coords['y'].max()])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7f50d14fcbe0>

## Plotting the Clay SWIR1/SWIR2 ratio

In [13]:
clay_swir1_swir2 = sensor_nbar.swir1/sensor_nbar.nir
clay_swir1_swir2_stats = clay_swir1_swir2.median(dim = 'time') #you can change .median to .min, .mean, .max
fig = plt.figure()
plt.title('Maximum Clay SWIR1/SWIR2 ratio')
plt.imshow(clay_swir1_swir2_stats, vmin = 0, vmax = 1.6,
           extent=[iron_oxide_ratio_stats.coords['x'].min(), iron_oxide_ratio_stats.coords['x'].max(), 
                   iron_oxide_ratio_stats.coords['y'].min(), iron_oxide_ratio_stats.coords['y'].max()])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7f508df9f7b8>

## Plotting the ferruginous regolith ratio

In [14]:
ferruginous_reg = sensor_nbar.nir/sensor_nbar.green
ferruginous_reg_stats = ferruginous_reg.median(dim = 'time') #you can change .median to .min, .mean, .max
fig = plt.figure()
plt.title('Maximum ferruginous regolith ratio')
plt.imshow(ferruginous_reg_stats, vmin = 0, vmax = 5,
           extent=[iron_oxide_ratio_stats.coords['x'].min(), iron_oxide_ratio_stats.coords['x'].max(), 
                   iron_oxide_ratio_stats.coords['y'].min(), iron_oxide_ratio_stats.coords['y'].max()])

<IPython.core.display.Javascript object>

<matplotlib.image.AxesImage at 0x7f504a8e6898>

## Save ratios to netCDF

In [15]:
#make a list to write the stats to netCDF
stats = [iron_oxide_ratio_stats,ferrous_iron_stats,clay_nirswir_stats,clay_swir1_swir2_stats,ferruginous_reg_stats]
stats_names = ['iron_oxide_ratio_stats','ferrous_iron_stats','clay_nirswir_stats','clay_swir1_swir2_stats','ferruginous_reg_stats']

In [16]:
#set up a loop to write each data array to dataset, add the crs to make a proper netcdf, then write our netcdf
for stat in range(len(stats)):
    #turn array into dataset so we can write the netcdf
    dataset = stats[stat].to_dataset(name=stats_names[stat])
    #grab our crs attributes to write a spatially-referenced netcdf
    dataset.attrs['crs'] =  sensor_nbar.crs
    try:
        write_dataset_to_netcdf(dataset, stats_names[stat]+'.nc')
    #complain if the file already exists but don't fail    
    except RuntimeError as err:
        print("RuntimeError: {0}".format(err))
    print('wrote ', stats_names[stat], ' to netCDF' )

RuntimeError: Storage Unit already exists: iron_oxide_ratio_stats.nc
wrote  iron_oxide_ratio_stats  to netCDF
RuntimeError: Storage Unit already exists: ferrous_iron_stats.nc
wrote  ferrous_iron_stats  to netCDF
wrote  clay_nirswir_stats  to netCDF
wrote  clay_swir1_swir2_stats  to netCDF
wrote  ferruginous_reg_stats  to netCDF


## convert netCDFs to geotiff 
### you can edit the (commented out) gdalwarp line to increase/decrease the compression, or simply run the following script to turn all your netcdf files into tiff files

In [17]:
#run this for each file - gdalwarp: resolution, output as geotiff, compress a bit,
#!gdalwarp -tr 25.0 25.0 -of GTiff -co "ZLEVEL=1" -co 'COMPRESS=DEFLATE' -co "PREDICTOR=3" [filename.nc] [filename.tif]
#or you could just do this instead:
!gdalwarp *.nc *.tif

Processing input file clay_nirswir_stats.nc.
Using internal nodata values (e.g. 9.96921e+36) for image clay_nirswir_stats.nc.
0...10...20...30...40...50...60...70...80...90...100 - done.
Processing input file clay_swir1_swir2_stats.nc.
Using internal nodata values (e.g. 9.96921e+36) for image clay_swir1_swir2_stats.nc.
0...10...20...30...40...50...60...70...80...90...100 - done.
Processing input file ferrous_iron_stats.nc.
Using internal nodata values (e.g. 9.96921e+36) for image ferrous_iron_stats.nc.
0...10...20...30...40...50...60...70...80...90...100 - done.
Processing input file ferruginous_reg_stats.nc.
Using internal nodata values (e.g. 9.96921e+36) for image ferruginous_reg_stats.nc.
0...10...20...30...40...50...60...70...80...90...100 - done.
Processing input file iron_oxide_ratio_stats.nc.
Using internal nodata values (e.g. 9.96921e+36) for image iron_oxide_ratio_stats.nc.
0...10...20...30...40...50...60...70...80...90...100 - done.
