# IMERG Final Run Data V07 Downloader and Processor

This notebook downloads IMERGv07 final run data from the NASA server, processes the data to fit a specified geographical domain, and writes the output as GeoTIFF files.

### Steps:
1. Download files from the NASA server for a specified date range.
2. Process the downloaded files for the domain and scale them.
3. Save the processed data as GeoTIFF files.

In [1]:
# Import necessary libraries
import sys
import subprocess
import os
import datetime as DT
import osgeo.gdal as gdal
from osgeo.gdal import gdalconst
from osgeo.gdalconst import GA_ReadOnly
#from gdalconst import GA_ReadOnly
#from gdalconst import *
import numpy as np

Insert the email associated with the GPM account

In [2]:

server = 'https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis'
file_prefix = '3B-HHR-L.MS.MRG.3IMERG.'
file_suffix = '.V07B.30min.tif'
# Email associated to PMM account
email = 'vrobledodelgado@uiowa.edu'

In [4]:
def main(argv):
  # Domain coordinates for West Africa
  xmin = -21.4
  xmax = 30.4
  ymin = -2.9
  ymax = 33.1

# for initial and final date, the date of the water spike is taken,and then a 5 day buffer is given (so date starts 5 days before spike and finishes 5 days after
# And initial date then starts 3 months before that, to give data for EF5 warm up period
  # Initial Date
  year_i = 2020
  month_i = 10
  day_i = 8

  # Final Date
  year_f = 2020
  month_f = 10
  day_f = 11

  # loop through the file list and get each file
  initial_date = DT.datetime(year_i,month_i,day_i,0,0,0)
  final_date = DT.datetime(year_f,month_f,day_f,0,0,0)
  delta_time = DT.timedelta(minutes=30)

  # Loop through dates
  current_date = initial_date
  acumulador_30M = 0

  while (current_date < final_date):
    initial_time_stmp = current_date.strftime('%Y%m%d-S%H%M%S')

    final_time = current_date + DT.timedelta(minutes=29)
    final_time_stmp = final_time.strftime('E%H%M59')
    final_time_gridout = current_date + DT.timedelta(minutes=30)

    folder = current_date.strftime('%Y/%m/')

    date_stamp = initial_time_stmp + '-' + final_time_stmp + '.' + '{:04d}'.format(acumulador_30M)

    filename = folder + file_prefix + date_stamp + file_suffix
    print('Downloading ' + server + '/' + filename)
    try:
        # Download from NASA server
        get_file(filename)

        # Process file for domain and to fit EF5
        # Filename has final datestamp as it represents the accumulation upto that point in time
        gridOutName = 'imerg.' + final_time_gridout.strftime('%Y%m%d%H%M') + '.tif'
        local_filename = file_prefix + date_stamp + file_suffix
        NewGrid, nx, ny, gt, proj = processIMERG(local_filename,xmin,ymin,xmax,ymax)

        # Write out processed filename
        WriteGrid(gridOutName, NewGrid, nx, ny, gt, proj)
        os.remove(local_filename)
        os.remove('./OutTemp.tif')
    except Exception as e:
        print(e)
        print(filename)

    # Advance in time
    current_date = current_date + delta_time

    # If the day changes, reset acumulador_30M
    if (acumulador_30M < 1410):
      acumulador_30M = acumulador_30M + 30
    else:
      print("New day")
      acumulador_30M = 0

def get_file(filename):
    ''' Get the given file from jsimpsonhttps using curl. '''
    url = server + '/' + filename

    cmd = 'curl -sO -u ' + email + ':' + email + ' ' + url
    args = cmd.split()

    process = subprocess.Popen(args, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    process.wait() # wait so this program doesn't end
                        # before getting all files

def ReadandWarp(gridFile,xmin,ymin,xmax,ymax):
    #Read grid and warp to domain grid
    #Assumes no reprojection is necessary, and EPSG:4326
    rawGridIn = gdal.Open(gridFile, GA_ReadOnly)

    # Adjust grid
    pre_ds = gdal.Translate('OutTemp.tif', rawGridIn, options="-co COMPRESS=Deflate -a_nodata 29999 -a_ullr -180.0 90.0 180.0 -90.0")

    gt = pre_ds.GetGeoTransform()
    proj = pre_ds.GetProjection()
    nx = pre_ds.GetRasterBand(1).XSize
    ny = pre_ds.GetRasterBand(1).YSize
    NoData = 29999
    pixel_size = gt[1]

    #Warp to model resolution and domain extents
    ds = gdal.Warp('', pre_ds, srcNodata=NoData, srcSRS='EPSG:4326', dstSRS='EPSG:4326', dstNodata='29999', format='VRT', xRes=pixel_size, yRes=-pixel_size, outputBounds=(xmin,ymin,xmax,ymax))

    WarpedGrid = ds.ReadAsArray()
    new_gt = ds.GetGeoTransform()
    new_proj = ds.GetProjection()
    new_nx = ds.GetRasterBand(1).XSize
    new_ny = ds.GetRasterBand(1).YSize

    return WarpedGrid, new_nx, new_ny, new_gt, new_proj

def WriteGrid(gridOutName, dataOut, nx, ny, gt, proj):
    #Writes out a GeoTIFF based on georeference information in RefInfo
    driver = gdal.GetDriverByName('GTiff')
    dst_ds = driver.Create(gridOutName, nx, ny, 1, gdal.GDT_Float32, ['COMPRESS=DEFLATE'])
    dst_ds.SetGeoTransform(gt)
    dst_ds.SetProjection(proj)
    dataOut.shape = (-1, nx)
    dst_ds.GetRasterBand(1).WriteArray(dataOut, 0, 0)
    dst_ds.GetRasterBand(1).SetNoDataValue(-9999.0)
    dst_ds = None

def processIMERG(local_filename,llx,lly,urx,ury):
  # Process grid
  # Read and subset grid
  NewGrid, nx, ny, gt, proj = ReadandWarp(local_filename,llx,lly,urx,ury)

  # Scale value
  NewGrid = NewGrid*0.1

  return NewGrid, nx, ny, gt, proj

if __name__ == '__main__':
    main(sys.argv)

Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S000000-E002959.0000.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S003000-E005959.0030.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S010000-E012959.0060.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S013000-E015959.0090.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S020000-E022959.0120.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S023000-E025959.0150.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S030000-E032959.0180.V07B.30min.tif
Downloading https://

ERROR 1: TIFFFetchDirectory:3B-HHR-L.MS.MRG.3IMERG.20201008-S193000-E195959.1170.V07B.30min.tif: Can not read TIFF directory count
ERROR 1: TIFFReadDirectory:Failed to read directory at offset 722294


Received a NULL pointer.
2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S193000-E195959.1170.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S200000-E202959.1200.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S203000-E205959.1230.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S210000-E212959.1260.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S213000-E215959.1290.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S220000-E222959.1320.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg/gis/2020/10/3B-HHR-L.MS.MRG.3IMERG.20201008-S223000-E225959.1350.V07B.30min.tif
Downloading https://jsimpsonhttps.pps.eosdis.nasa.gov/imerg

KeyboardInterrupt: 

If you are using Colab, use the following cell to compress all GPM files into a .zip. Then you can download the .zip file to your local machine.

In [4]:
import os
import shutil
from zipfile import ZipFile

# Create the GPM_precip folder if it doesn't already exist
output_folder = "GPM_precip"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    print(f"Folder '{output_folder}' created.")

# Find and move all imerg*.tif files into the GPM_precip folder
for file in os.listdir("."):
    if file.startswith("imerg") and file.endswith(".tif"):
        shutil.move(file, os.path.join(output_folder, file))
        print(f"File '{file}' moved to the folder '{output_folder}'.")

# Compress the GPM_precip folder into a ZIP file
zip_filename = f"{output_folder}.zip"
with ZipFile(zip_filename, 'w') as zipf:
    for root, _, files in os.walk(output_folder):
        for file in files:
            file_path = os.path.join(root, file)
            arcname = os.path.relpath(file_path, output_folder)
            zipf.write(file_path, arcname)
            print(f"File '{file}' added to ZIP.")

print(f"Folder '{output_folder}' compressed into '{zip_filename}'.")


Folder 'GPM_precip' created.
File 'imerg.202010100530.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010101930.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010101530.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010101500.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010100300.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010102200.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010102300.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010100700.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010110000.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010100030.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010101730.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010100430.30minAccum.tif' moved to the folder 'GPM_precip'.
File 'imerg.202010100830.30minAccum.tif' moved to the folder 'GPM_preci