In [None]:
import os
import sys
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.width', 240)

In [None]:
import matplotlib
import matplotlib.pyplot as plt
matplotlib.rcParams['figure.figsize'] = (16.0, 9.0)

In [None]:
import seaborn as sns

In [None]:
# Progress bar helper to indicate that slow tasks have not stalled
from tqdm.auto import tqdm

In [None]:
PICKS_PATH = r"C:\data_cache\Picks\20190219\ensemble.p.txt"
#PICKS_PATH = r"C:\data_cache\Picks\20190219\ensemble_small.p.txt"
dtype = {'#eventID': object,
    'originTimestamp': np.float64,
    'mag':                    np.float64,
    'originLon':              np.float64,
    'originLat':              np.float64,
    'originDepthKm':          np.float64,
    'net':                     object,
    'sta':                     object,
    'cha':                     object,
    'pickTimestamp':          np.float64,
    'phase':                   object,
    'stationLon':             np.float64,
    'stationLat':             np.float64,
    'az':                     np.float64,
    'baz':                    np.float64,
    'distance':               np.float64,
    'ttResidual':             np.float64,
    'snr':                    np.float64,
    'qualityMeasureCWT':      np.float64,
    'domFreq':                np.float64,
    'qualityMeasureSlope':    np.float64,
    'bandIndex':              np.int64,
    'nSigma':                 np.int64}

In [None]:
df_picks = pd.read_csv(PICKS_PATH, ' ', header=0, dtype=dtype)

In [None]:
np.max(df_picks[['qualityMeasureCWT', 'qualityMeasureSlope']])

In [None]:
np.min(df_picks[['qualityMeasureCWT', 'qualityMeasureSlope']])

In [None]:
np.min(np.abs(df_picks[['qualityMeasureCWT', 'qualityMeasureSlope']]))

In [None]:
plt.hist(df_picks['qualityMeasureCWT'], bins=50, range=(0, 25))
None

In [None]:
plt.hist(df_picks['qualityMeasureSlope'], bins=50, range=(0, 50))
None

In [None]:
# Generata dataset with the zero quality metrics removed
mask_nz = ((df_picks['qualityMeasureCWT'] > 0) & (df_picks['qualityMeasureSlope'] > 0))
df_nz = df_picks[mask_nz]
len(df_nz)

In [None]:
# Compute covariance matrix
df_cov = df_nz[['ttResidual', 'qualityMeasureCWT', 'qualityMeasureSlope', 'nSigma']].sample(100000)
print(df_cov.columns)
print(np.corrcoef(np.abs(df_cov.values.T)))

In [None]:
# Filter data to only those with meaningful quality values
mask = ((df_picks['qualityMeasureCWT'] >=10) & (df_picks['qualityMeasureSlope'] >= 2))
df_filt = df_picks[mask]
len(df_filt)

In [None]:
df = df_filt[['ttResidual', 'qualityMeasureCWT', 'qualityMeasureSlope', 'nSigma']].sample(10000)
# df

In [None]:
sns.scatterplot(np.log2(df['qualityMeasureCWT']), 
                np.log2(df['qualityMeasureSlope']), 
                hue=df['ttResidual'].abs(), palette='jet', alpha=0.1)

In [None]:
plt.scatter(df['ttResidual'], df['qualityMeasureCWT'], alpha=0.1)
plt.xlim((-50, 50))
plt.ylim((0, 100))
plt.xlabel('ttResidual')
plt.ylabel('CWT')

In [None]:
plt.scatter(df['ttResidual'], df['qualityMeasureSlope'], alpha=0.1)
plt.xlim((-50, 50))
plt.ylim((0, 50))
plt.xlabel('ttResidual')
plt.ylabel('Slope')

In [None]:
plt.scatter(df['ttResidual'], df['nSigma'], alpha=0.02)
plt.xlim((-50, 50))
plt.ylim((0, 10))
plt.xlabel('ttResidual')
plt.ylabel('nSigma')