In [None]:
import os
import sys

In [None]:
package_root = os.path.abspath('../../..')
if package_root not in sys.path:
    sys.path.append(package_root)
from seismic.ASDFdatabase import FederatedASDFDataSet

In [None]:
ds = FederatedASDFDataSet.FederatedASDFDataSet("/g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt")

In [None]:
def read_correlator_config(nc_file):
    '''Read the correlator settings used for given nc file.
    '''
    import pandas as pd
    
    folder, fname = os.path.split(nc_file)
    base, _ = os.path.splitext(fname)
    base_parts = base.split('.')
    if len(base_parts) > 4:
        timestamp = '.'.join(base_parts[4:-1])
        config_filename = '.'.join(['correlator', timestamp, 'cfg'])
        config_file = os.path.join(folder, config_filename)
        if os.path.exists(config_file):
            settings_df = pd.read_csv(config_file, sep=':', names=['setting', 'value'],
                                      index_col=0, skiprows=1, skipinitialspace=True,
                                      squeeze=True, converters={'setting': str.strip})
            title_tag = '({}-{} Hz)'.format(settings_df['--fmin'], settings_df['--fmax'])
        else:
            print('WARNING: Settings file {} not found!'.format(config_file))
            settings_df = None
            title_tag = ''
    else:
        settings_df = None
        title_tag = ''

    return settings_df, title_tag

In [None]:
def batch_process_xcorr(src_files, dataset, time_window, snr_threshold, save_plots=True, underlay_rcf_xcorr=False):
    import sys
    import time
    import gc
    from tqdm.auto import tqdm
    from seismic.xcorqc.xcorr_station_clock_analysis import plot_xcorr_file_clock_analysis
    
    PY2 = (sys.version_info[0] == 2)
    
    pbar = tqdm(total=len(src_files), dynamic_ncols=True)
    found_preexisting = False
    failed_files = []
    skipped_count = 0
    success_count = 0
    for src_file in src_files:
        _, base_file = os.path.split(src_file)
        pbar.set_description(base_file)
        # Sleep to ensure progress bar is refreshed
        time.sleep(0.2)

        # Extract timestamp from nc filename if available
        settings, title_tag = read_correlator_config(src_file)

        assert os.path.exists(src_file), "File {} not found!".format(src_file)

        try:
            if save_plots:
                basename, _ = os.path.splitext(src_file)
                png_file = basename + ".png"
                # If png file already exists and has later timestamp than src_file, then skip it.
                if os.path.exists(png_file):
                    src_file_time = os.path.getmtime(src_file)
                    png_file_time = os.path.getmtime(png_file)
                    png_file_size = os.stat(png_file).st_size
                    if png_file_time > src_file_time and png_file_size > 0:
                        tqdm.write("PNG file {} is more recent than source file {}, skipping!".format(
                                   os.path.split(png_file)[1], os.path.split(src_file)[1]))
                        found_preexisting = True
                        skipped_count += 1
                        pbar.update()
                        continue
                plot_xcorr_file_clock_analysis(src_file, dataset, time_window, snr_threshold, png_file=png_file,
                                               show=False, underlay_rcf_xcorr=underlay_rcf_xcorr,
                                               title_tag=title_tag, settings=settings)
            else:
                plot_xcorr_file_clock_analysis(src_file, dataset, time_window, snr_threshold, 
                                               underlay_rcf_xcorr=underlay_rcf_xcorr, title_tag=title_tag,
                                               settings=settings)
            success_count += 1
            pbar.update()

        except Exception as e:
            tqdm.write("ERROR processing file {}".format(src_file))
            failed_files.append((src_file, str(e.message)))

        # Python 2 does not handle circular references, so it helps to explicitly clean up.
        if PY2:
            gc.collect()

    pbar.close()

    if found_preexisting:
        print("Some files were skipped because pre-existing matching png files were up-to-date.\n"
              "Remove png files to force regeneration.")

    return failed_files

In [None]:
def batch_process_folder(folder_name, dataset, time_window, snr_threshold, save_plots=True):
    import glob
    
    src_files = glob.glob(os.path.join(folder_name, '*.nc'))
    print("Found {} .nc files in {}".format(len(src_files), folder_name))

    failed_files = batch_process_xcorr(src_files, dataset, time_window=time_window, 
                                       snr_threshold=snr_threshold, save_plots=save_plots)
    if failed_files:
        print("The following files experienced errors:")
        for fname, err_msg in failed_files:
            print(" File: " + fname)
            if err_msg:
                print("Error: " + err_msg)

In [None]:
NCFILE_PATH = '/g/data/ha3/am7399/shared/xcorr/AU/QIS_Warramunga'
batch_process_folder(NCFILE_PATH, ds, time_window=600, snr_threshold=6, save_plots=True)

In [None]:
NCFILE_PATH = '/home/547/am7399/am7399/shared/xcorr/7X/MAx_MAy'
batch_process_folder(NCFILE_PATH, ds, time_window=120, snr_threshold=6, save_plots=True)