# MODIS DATA PROCESSING by Szymon Moliński

Algorthim works as follow:
1. Read all hdf files in the given folder and store paths to them in the Python list.
2. Process those files: get from them LST bands, crop them to the Area of Interest and save
in a given folder as a tiff file.
3. Prepare empty dictionary.
Read saved tiff files and calculate mean value of the Land Surface Temperature for a given ID.
Append data to the empty dictionary in the form:
{filename: {time_t0: value_0, time_t1: value_1, ..., time_tn: value_n}}.
4. Save created dictionary into a json file.

In [1]:
# import necessary libraries

import datetime
import json
import os
import subprocess

import numpy as np
import gdal

In [2]:
# Define functions and classes used in the script

# Data reading

def get_filelist(folder, infile='', file_ending=''):
    """Function gets files from a given directory. Set of files may be narrowed if specific phrase and
    file type is given.
    
    INPUT:
    folder -  directory where search is performed,
    infile - phrase which must be inside a file to pass it to the output,
    file_ending - phrase with the last characters of the file. Usually this means file type abbreviation.
    
    OUTPUT:
    output_filelist - list of files from a given folder which are met the conditions specified
    in the infile input and/or the file_ending input."""
    
    output_filelist = []
    
    filelist = os.listdir(folder)
    check_infile = (len(infile) > 0)
    check_file_suffix = (len(file_ending) > 0)
    
    if check_infile or check_file_suffix:
        for filename in filelist:
            if check_infile and check_file_suffix:
                if (infile in filename) and filename.endswith(file_ending):
                    output_filelist.append(filename)
            elif check_infile:
                if infile in filename:
                    output_filelist.append(filename)
            else:
                if filename.endswith(file_ending):
                    output_filelist.append(filename)
    else:
        output_filelist = filelist
    return output_filelist

# Data retrieving

def get_modis_subdataset(filename, subdataset_number):
    """Function gets path to the specific subdataset in the hdf file.
    
    INPUT:
    filename: path to the hdf file,
    subdataset_number: int indices the specific band in the hdf stack.
    
    OUTPUT:
    subdataset: full path to the desired band of the MODIS product.
    """
    dataset = gdal.Open(filename)
    subdatasets = dataset.GetSubDatasets()
    subdataset = subdatasets[subdataset_number][0]
    del dataset
    return subdataset

# Date retrieving

def get_date_from_filename(fname: str):
    """Function derives julian date of the MODIS scene acquision time.
    
    INPUT:
    fname: MODIS filename or path with the MODIS filename.
    
    OUTPUT:
    julian_date: string with the Julian date in the form YYYYDDD.
    """
    fname_parts = fname.split('.')
    for idx, part in enumerate(fname_parts):
        if 'MOD' in part:
            next_part = fname_parts[idx + 1]
            next_pt_len = len(next_part)
            if next_pt_len == 8 and next_part.startswith('A'):
                julian_date = next_part[1:]
                return julian_date
    return 0


def convert_date(julian_date: str):
    """Function converts Julian Date in the format YYYYDDD into tuple with (year, month, day)
    
    INPUT:
    julian_date - string in the form YYYYDDD.
    
    OUTPUT:
    standard_date_tuple - tuple of the (year, month, day) as int
    """
    
    string_len = len(julian_date)
    
    # Test if string has a proper number of chars
    assert string_len == 7, 'Passed string must have 7 characters in the form of YYYYDDD'
    
    standard_date = datetime.datetime.strptime(julian_date, '%Y%j').date()
    standard_date_tuple = (standard_date.year, standard_date.month, standard_date.day)
    return standard_date_tuple

# Data clipping

def clip_dataset(input_raster, input_mask, output_filename):
    """Function uses computer terminal to perform GDAL operation of clipping an image.
    
    INPUT:
    input_raster: path to the raster file which will be clipped,
    input_mask: path to the vector file (polygon) used as a clipping mask,
    output_filename: path to the output file."""
    subprocess.call(['gdalwarp',
                 '-cutline',
                 input_mask,
                 '-crop_to_cutline',
                 '-dstalpha',
                 input_raster,
                 output_filename])

# LST calculation

def calculate_mean_lst(input_raster):
    """Function calculates mean LST for a MODIS data and returns its value as a Python float.
    
    INPUT:
    input_raster: path to the input tiff image.
    
    OUTPUT:
    mean_value: mean LST value of a passed band.
    """
    scaling_factor = 0.02
    lst_day = gdal.Open(input_raster)
    array_lst_day = (lst_day.ReadAsArray().astype(np.float32))[0] * scaling_factor
    del lst_day
    mean_value = np.mean(array_lst_day[array_lst_day > 0])
    return float(mean_value)

# Save dictionary into a csv file

def dict_to_json(temperature_dict, json_path):
    """Function stores dictionary data in the given json file.
    
    INPUT:
    temperature_dict: dictionary with a given values,
    json_path: path to the json file.
    """
    with open(json_path, 'w') as json_file:
        json.dump(temperature_dict, json_file)
    print('File {} saved successfully'.format(
    json_path))

In [3]:
# Base variables
# Raw data
data_folder = 'raw/'
infile = 'h19v03'
hdf_file_end = '.hdf'
tiff_file_end = '.tiff'

# TIFF
subdatasets = {'lst_day': 0,
              'lst_night': 4}
vector = 'vector/warszawa.shp'
tiff_folder = 'processed/tiffs/'

# JSON
output_filename = 'temperature_modis.json'
json_folder = 'processed/json/'

In [4]:
# Get filelist
filelist = get_filelist(data_folder, infile, hdf_file_end)

In [5]:
# First loop of the function - raw hdf images to tiffs
for key in subdatasets:
    for file in filelist:
        tiff_filename = key + '_' + file[:-4] + tiff_file_end
        tiff_path = os.path.join(tiff_folder, tiff_filename)
        raw_file_path = os.path.join(data_folder, file)
        mod_subdataset = get_modis_subdataset(raw_file_path, subdatasets[key])
        clip_dataset(mod_subdataset, vector, tiff_path)

In [6]:
# Get new filelist with processed tiffs

tiffs = get_filelist(tiff_folder, infile, tiff_file_end)

In [7]:
# Build dictionary which simulates a JSON file

data_dicts = {}

for subdataset in subdatasets:
    if subdataset not in data_dicts:
        data_dicts[subdataset] = {}
    for tiff in tiffs:
        if subdataset in tiff:
            # Get date
            file_date = get_date_from_filename(tiff)
            converted_date = convert_date(file_date)
            date_key = str(converted_date[0]) + '-' + str(converted_date[1])
            
            # Get LST value
            mean_value = calculate_mean_lst(tiff_folder + tiff)
            
            # Update dictionary
            data_dicts[subdataset][date_key] = mean_value

In [8]:
# Save a new JSON file

dict_to_json(data_dicts, json_folder + output_filename)

File processed/json/temperature_modis.json saved successfully
