In [None]:
import os
import tarfile
import rasterio
import numpy as np

# VIIRS tiles extraction
This script extracts the cloud free tiles of high resolution satellite images contained in the archives to be downloaded from [Version 1 VIIRS Day/Night Band Nighttime Lights](https://ngdc.noaa.gov/eog/viirs/download_dnb_composites.html).

The process can take a while. You should let it run overnight if you need to extract every tile available on the NOAA website.

In [1]:
# This is the directory where the archives need to be :
archives_dir = '../data/lightrasters_noaa/monthly/'

archnames = os.listdir(archives_dir)

print('fetched list of files to extract')

for archname in archnames:
    if archname.endswith('.tgz') and archname + '.part' not in archnames:
        success = False
        with tarfile.open(archives_dir + archname, "r") as tar:
            for member in tar.getmembers():
                filename = os.path.basename(member.name)
                if filename.endswith("avg_rade9h.tif") or filename.find('orm_') is not -1:
                    tar.extract(member, archives_dir)
                    print('extracted : ' + filename)
                    success = True
                    break
        if not success:
            print('failed to locate [...]avg_rade9h.tif or [...]orm_[...] in archive ' + archname)

print('done !')

fetched list of files to extract
done !


# Combine monthly tiles into yearly tiles
Since every yearly average isn't available on [Version 1 VIIRS Day/Night Band Nighttime Lights](https://ngdc.noaa.gov/eog/viirs/download_dnb_composites.html), but monthly tiles are, we need to make the average ourselves. Every month, some pixels have a value of 0 because there were no observation without clouds. To avoid taking into account those outliers we'll take the median of each pixel over every month of a year instead of the average. It's not perfect, but good enough.

Again, this process is very slow.

In [None]:
## PARAMETERS ##
# input directory (contains the monthly tiles)
monthly_dir = '../data/lightrasters_noaa/monthly/'

# output directory
yearly_dir = '../data/lightrasters_noaa/'

# width (and height) of the window : decrease it if you don't have enough memory available
window_size = 7500
################

output_files = {}

for filename in os.listdir(monthly_dir):
    if filename.endswith('.tif'):
        # get year and region of the observation
        year = filename[filename.find('-') + 1: filename.find('-') + 5]
        region = filename[filename.find('-') + 10: filename.find('-') + 17]

        if (year, region) not in output_files:
            output_files[(year, region)] = []
        # group files by same year and region
        output_files[(year, region)].append(filename)

print('fetched file names')

# iterate over windows of every raster of the same region/year to compute their median pixels
for (year, region), input_files in output_files.items():
    # get the metadat to be used for the output file (same as input)
    reference = rasterio.open(monthly_dir + input_files[0])
    profile = reference.profile
    dimensions = (reference.height, reference.width)
    reference.close()

    # initialize matrix for the output file
    raster = np.zeros(dimensions)

    y_offset = 0
    while y_offset < dimensions[1]:
        x_offset = 0
        while x_offset < dimensions[0]:
            windows = []
            for input_file in input_files:
                with rasterio.open(monthly_dir + input_file) as src:
                    windows.append(src.read(1, window=((x_offset, x_offset + window_size),
                                                       (y_offset, y_offset + window_size))))

            # compute mediant for each point between windows, store result to raster
            raster[x_offset: x_offset + window_size, y_offset: y_offset + window_size] = np.median(windows, axis=0)
            x_offset += window_size
        y_offset += window_size

    # write to output file
    with rasterio.open(yearly_dir + year + '_' + region + '.tif', 'w', **profile) as dst:
        dst.write(raster.astype(rasterio.float32), 1)

    print('generated ' + year + '_' + region + '.tif')

print('done !')
