SCRIPT for Download HiRISE RDR files from PDS
@author: Giacomo Nodjoumi - g.nodjoumi@jacobs-university.de

README

Before start:

1) download HiRISE coverage files from https://ode.rsl.wustl.edu/mars/indextools.aspx
2) extract it and import shapefiles into GIS software
3) search by filter or select by location and create a new shapfile with the selection
4) Start the script and select the filtered shapefile

In [4]:
import os
import pandas as pd
from tqdm import tqdm
from utils.GenUtils import chunk_creator, parallel_funcs, readGPKG
from utils.FileUtils import getFileUrl, getFile
import psutil
global dst_folder

In [6]:
def main(gpkgDF):
    JOBS=psutil.cpu_count(logical=False)
    
    try:
        file_urls = pd.read_csv(dst_folder+'/file_urls.txt', header=None)[0].tolist()
    except Exception as e:
        print(e)
        print('Download list not found, creating')
        
        download_urls = [gpkgDF[gpkgDF['ProductId']== product]['FilesURL'].values[0] for product in gpkgDF['ProductId'] if 'RED' in product] 

        chunks = []
        for c in chunk_creator(download_urls, JOBS):
            chunks.append(c)
        
        # file_urls = [getFileUrl(url) ]
        file_urls = []
        with tqdm(total=len(download_urls),
                 desc = 'Generating Images',
                 unit='File') as pbar:
            
           
            for i in range(len(chunks)):
                files = chunks[i]
                results = parallel_funcs(files, 2, getFileUrl, ext)
                pbar.update(JOBS)
                [file_urls.append(url) for url in results]
        
            df = pd.DataFrame(file_urls)
            savename = dst_folder+'/file_urls.txt'
            df.to_csv(savename, index=False, header=False)
            pass
        
        
    proc_csv = dst_folder+'/Processed.csv'
    try:
        proc_df = pd.read_csv(proc_csv)
    except Exception as e:
        print(e)
        print('Processed csv created')
        proc_df = pd.DataFrame(columns=['Name'])
    pass
        
    with tqdm(total=len(file_urls),
             desc = 'Downloading Images',
             unit='File') as pbar:
        
        filerange = len(file_urls)
        chunksize = round(filerange/JOBS)
        if chunksize <1:
            chunksize=1
            JOBS = filerange
        chunks = []
        for c in chunk_creator(file_urls, JOBS):
            chunks.append(c)
            
        for i in range(len(chunks)):
            files = chunks[i]
            lambda_f = lambda element:(os.path.basename(element).split('.')[0]) not in proc_df['Name'].to_list()
            filtered = filter(lambda_f, files)
            chunk = list(filtered)
            if len(chunk)>0:
                tmp_df = parallel_funcs(files, JOBS, getFile, dst_folder)
                for df in tmp_df:
                    proc_df = proc_df.append(df,ignore_index=True)
                proc_df.to_csv(proc_csv, index=False)
                pbar.update(JOBS)
            else:
                pbar.update(len(files))
                continue
    
    print('\nAll operations completed')

In [None]:
if __name__ == "__main__":

    dst_folder = input(str('Path of the output folder '))
                
    gpkg_file = input(str('Path to data files folder:'))
    
    gpkgDF = readGPKG(gpkg_file)
    
    main(gpkgDF)