# Analyze a cross-correlation to produce station clock correction file

Prior to using this script, the quality of the correction should be visualized and confirmed using notebook `plotStationPairXcorr.ipynb`

This notebook will generate a csv file with dates and estimated clock corrections for a given station. Applying the correction to the original ASDF database will be done separately for the sake of safety, so that any changes to ASDF must be very deliberate and intentional.

In [None]:
import os
import sys
import datetime

In [None]:
import numpy as np
import scipy
import matplotlib.dates
import matplotlib.pyplot as plt
from dateutil import rrule
import pandas as pd
from scipy.interpolate import UnivariateSpline

In [None]:
import obspy

In [None]:
package_root = os.path.abspath('../../..')
if package_root not in sys.path:
    sys.path.append(package_root)
from seismic.xcorqc.xcorr_station_clock_analysis import (XcorrClockAnalyzer, 
                                                         plot_estimated_timeshift)
from seismic.xcorqc.analytic_plot_utils import timestamps_to_plottable_datetimes

In [None]:
SRC_FILE = "/g/data/ha3/Passive/SHARED_DATA/GPS_Clock/xcorr/AU/HTT_STKA/test/AU.HTT.AU.STKA.nc"
# SRC_FILE = "/g/data/ha3/Passive/SHARED_DATA/GPS_Clock/xcorr/7X/MA43_QIS/7X.MA43.AU.QIS.1.0-10.0.nc"
# SRC_FILE = "/g/data/ha3/Passive/SHARED_DATE/GPS_Clock/xcorr/7X/MA52_QIS/7X.MA52.AU.QIS.1.0-10.0.nc"
assert os.path.exists(SRC_FILE), "File not found!"

In [None]:
_, basename = os.path.split(SRC_FILE)
_, file_type = os.path.splitext(SRC_FILE)
name_parts = basename.split('.')
NETCODE = name_parts[0]
STATCODE = name_parts[1]
print("Inferred target station code: {}.{}".format(NETCODE, STATCODE))
FULL_CODE = '.'.join([NETCODE, STATCODE])

## Load file

In [None]:
TIME_WINDOW = 300 # +/-
SNR_THRESHOLD = 6
PCF_CUTOFF_THRESHOLD = 0.5

xcorr_preproc = XcorrClockAnalyzer(SRC_FILE, TIME_WINDOW, SNR_THRESHOLD, PCF_CUTOFF_THRESHOLD)

## Segment the corrections time series into coherent groups

### Perform clustering

In [None]:
tuned_coeffs = {
    '7X.MA43': (1, 1, 20),
    '7X.MA52': (1, 5, 15),
    '7D.DA41A': (0.4, 0.3, 0.0),
    'AU.HTT': (1, 1, 0)
}

assert FULL_CODE in tuned_coeffs, "Add new coefficients for {}, then manually tune for fit".FULL_CODE

In [None]:
cluster_coeffs = tuned_coeffs[FULL_CODE]
ind, ids = xcorr_preproc.do_clustering(cluster_coeffs)
num_segments = len(set(ids[ids != -1]))
print("{} clusters identified".format(num_segments))

## Plot clusters based on sample positions in time

In [None]:
plt.figure(figsize=(16,9))
ax = plt.gca()

xcorr_preproc.plot_clusters(ax, ids, cluster_coeffs, FULL_CODE)

plt.gcf().tight_layout()
plt.gcf().autofmt_xdate()
plt.savefig(FULL_CODE + "_clustering_profile.png", dpi=300)
plt.show()

## With successful segmentation, we perform regression for each cluster

In [None]:
# Fitting univariate spline
degree = dict(zip(range(num_segments), [1]*num_segments))
regressions = xcorr_preproc.do_spline_regression(ids, degree)

### Replot with fitted line

In [None]:
plt.figure(figsize=(16,9))
ax = plt.gca()

xcorr_preproc.plot_regressors(ax, ids, regressions, FULL_CODE)

plt.tight_layout()
plt.gcf().autofmt_xdate()
plt.savefig(FULL_CODE + "_regression_profile.png", dpi=300)
plt.show()

## Resample regression lines to the original sample times

In [None]:
# Dict of daily spaced time values and computed correction, since source data time
# points might not be uniformly distributed. Keyed by group ID. These are the times
# at which we will output corrections.
sec_per_day = 24*3600
regular_corrections = xcorr_preproc.do_spline_resampling(ids, regressions, sec_per_day)

In [None]:
# Replot to sanity check the final daily correction values
plt.figure(figsize=(16,9))
ax = plt.gca()

xcorr_preproc.plot_resampled_clusters(ax, ids, regular_corrections, FULL_CODE)

plt.tight_layout()
plt.gcf().autofmt_xdate()
plt.savefig(FULL_CODE + "_clock_correction_profile.png", dpi=300)
plt.show()

## Output regression results to csv file

Use tabular format for ease of use and interoperability, even though there will be some redundancy of information.

In [None]:
data_blocks = []
for k in regular_corrections.keys():
    c = regular_corrections[k]
    # BEWARE: The 'corrections' array sign is negated there, since the correction
    # we have computed up to this point is actually the clock *error*. Subtraction
    # of an error is the same as addition of a correction of opposite sign.
    data_blocks.append(pd.DataFrame(np.column_stack([c['times'], -c['corrections']]), 
                                    columns=['timestamp', 'clock_correction']))
df = pd.concat(data_blocks)

In [None]:
df['date'] = df['timestamp'].apply(obspy.UTCDateTime).apply(lambda x: x.date)
df['net'] = NETCODE
df['sta'] = STATCODE
df = df[['net', 'sta', 'date', 'clock_correction']]

In [None]:
output_file = FULL_CODE + "_clock_correction.csv"

In [None]:
print("Exporting corrections to file {}".format(output_file))
df.to_csv(output_file, index=False)