# Plot network location map based on ASDF database

In [None]:
import os
import sys
import datetime
from collections import defaultdict
import json

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
package_root = os.path.abspath(os.path.abspath('../../..'))
if package_root not in sys.path:
    sys.path.append(package_root)
from seismic.ASDFdatabase import FederatedASDFDataSet
from seismic.xcorqc.analytic_plot_utils import drawBBox
import obspy
from obspy.geodetics import locations2degrees

In [None]:
from mpl_toolkits.basemap import Basemap

In [None]:
ds = FederatedASDFDataSet.FederatedASDFDataSet("/g/data/ha3/Passive/SHARED_DATA/Index/asdf_files.txt")

In [None]:
# AU mainland lat/lon range
au_min_lat, au_max_lat = -40, -10
au_min_lon, au_max_lon = 110, 155

In [None]:
coords = ds.unique_coordinates

In [None]:
# Convert index from single level 'net.sta' format to two-level [net][sta] format.
db_coords = defaultdict(dict)
db_coords_str = defaultdict(dict)
for fullcode, (lon, lat) in coords.items():
    net, sta = fullcode.split('.')
    start_time, end_time = ds.get_global_time_range(net, sta)
    db_coords[net][sta] = (lat, lon, start_time, end_time)
    db_coords_str[net][sta] = (lat, lon, str(start_time), str(end_time))
# end for

In [None]:
with open('station_dict.json', 'w') as f:
    json.dump(db_coords_str, f, indent=4, sort_keys=True)

In [None]:
def get_network_mean_location(df, netcode):
    """
    Get the mean station latitude and longitude coordinates for all stations in a given network.

    :param df: Pandas dataframe
    :type df: pandas.DataFrame
    :param netcode: Network code for which mean coordinates will be returned
    :type netcode: str
    :return: Mean (latitude, longitude) coordinates of stations in the network
    :rtype: tuple(float, float)
    """
    mean_lat = df[df['net'] == netcode]['latitude'].mean()
    mean_lon = df[df['net'] == netcode]['longitude'].mean()
    return (mean_lat, mean_lon)

In [None]:
# Some select stations require custom date filters to remove events outside
# the known valid date range of a network. Filter out network codes from earlier
# periods which are not of interest.
def applyCustomerDateFilter(df):
    DATE_FILTER = (
        ('7D', pd.Timestamp(datetime.datetime(2010, 1, 1))),
        ('7G', pd.Timestamp(datetime.datetime(2010, 1, 1))),
    )
    before = len(df)
    for net, min_date in DATE_FILTER:
        date_mask = (df['net'] == net) & (df['start_time'] < min_date.timestamp())
        df = df[~date_mask]
    after = len(df)
    print('Removed {} stations due to known invalid timestamps'.format(before - after))
    return df

In [None]:
all_codes = [[net, sta, lat, lon, start_time, end_time] for net, st_db in db_coords.items()
             for sta, (lat, lon, start_time, end_time) in st_db.items()]

In [None]:
df_asdf = pd.DataFrame(np.array(all_codes), columns=['net', 'sta', 'lat', 'lon', 'start_time', 'end_time'])
df_asdf[0:5]

In [None]:
df_asdf = applyCustomerDateFilter(df_asdf)

In [None]:
# Hack to fix date error in AQ metadata
df_asdf.loc[df_asdf['net'] == 'AQ'].sort_values('end_time', ascending=False).iloc[0:5]

In [None]:
# df_asdf.loc[(df_asdf['net'] == 'AQ') & (df_asdf['sta'] == 'F7'), 'end_time'] = \
#     df_asdf.loc[(df_asdf['net'] == 'AQ') & (df_asdf['sta'] == 'AQT3'), 'end_time']
df_asdf.loc[(df_asdf['net'] == 'AQ') & (df_asdf['sta'] == 'AQT3'), 'end_time'].values[0]

In [None]:
# Check hack to fix date error in AQ metadata
df_asdf.loc[df_asdf['sta'] == 'F7', 'end_time'] = \
    df_asdf.loc[(df_asdf['net'] == 'AQ') & (df_asdf['sta'] == 'AQT3'), 'end_time'].values[0]
df_asdf.loc[df_asdf['sta'] == 'F7', 'end_time']

In [None]:
'GE' in df_asdf['net'], 'IR' in df_asdf['net'], 'IU' in df_asdf['net'], 'S' in df_asdf['net']

In [None]:
def plot_basemap_stations(dest_map, labels, lat, lon, size=50, color='g', label_rotation=0, label_font_size=9):
    sc_handle = dest_map.scatter(lon, lat, size, latlon=True, marker='v', c=color, edgecolor='none', alpha=0.8)
    text_handles = []
    for i, label in enumerate(labels):
        a_handle = plt.annotate(label, xy=dest_map(lon[i] + 0.02, lat[i] - 0.1), fontsize=label_font_size,
                                rotation=label_rotation)
        text_handles.append(a_handle)
    return sc_handle, text_handles

In [None]:
def plot_basemap_networks(df, label_stations=True, title=None, show_inset=True, show_legend=True,
                          label_rotation=0, label_font_size=9, marker_size=50, axis_labeling=[True, True, True, True]):
    # axis_labeling order is [left, right, top, bottom]

    # Large array station IDs
    warramunga_array = ['WB0', 'WB1', 'WB10', 'WB2', 'WB3', 'WB4', 'WB5', 'WB6', 'WB7', 'WB8', 'WB9',
                        'WC1', 'WC2', 'WC3', 'WC4', 'WR0', 'WR1', 'WR10', 'WR2', 'WR3', 'WR4', 'WR5',
                        'WR6', 'WR7', 'WR8', 'WR9']
    alice_array = ['AS01', 'AS02', 'AS03', 'AS04', 'AS05', 'AS06', 'AS07', 'AS08', 'AS09', 'AS10',
                   'AS11', 'AS12', 'AS13', 'AS14', 'AS15', 'AS16', 'AS17', 'AS18', 'AS19', 'AS31']
    pilbara_array = ['PSA00', 'PSAA1', 'PSAA2', 'PSAA3', 'PSAB1', 'PSAB2', 'PSAB3',
                     'PSAC1', 'PSAC2', 'PSAC3', 'PSAD1', 'PSAD2', 'PSAD3']
    
    # Limit to mainland
    mainland_mask = (df['lat'] >= au_min_lat) & (df['lat'] <= au_max_lat) & \
                    (df['lon'] >= au_min_lon) & (df['lon'] <= au_max_lon)
    df = df.loc[mainland_mask]

    min_lat, max_lat = df['lat'].min(), df['lat'].max()
    min_lon, max_lon = df['lon'].min(), df['lon'].max()
    assert not np.isnan(min_lat)
    assert not np.isnan(max_lat)
    assert not np.isnan(min_lon)
    assert not np.isnan(max_lon)
#     assert min_lat >= au_min_lat, min_lat
#     assert min_lon >= au_min_lon, min_lon
#     assert max_lat <= au_max_lat, max_lat
#     assert max_lon <= au_max_lon, max_lon

    # Plot stations of network
    fig = plt.figure(figsize=(16, 16))

    latlon_margin = max(0.8, max((max_lat - min_lat)/10, (max_lon - min_lon)/10))
    m = Basemap(llcrnrlon=min_lon - latlon_margin, llcrnrlat=min_lat - latlon_margin,
                urcrnrlon=max_lon + latlon_margin, urcrnrlat=max_lat + latlon_margin,
                projection='lcc', resolution='i',
                lat_1=min_lat, lat_2=max_lat,
                lat_0=(min_lat + max_lat) * 0.5, lon_0=(min_lon + max_lon) * 0.5)

    m.drawcoastlines(color='#a0a0a0', linewidth=2, zorder=0)
    state_border_color = "#9090ff"
    m.drawstates(color=state_border_color, linewidth=1.5, zorder=0)

    #draw grid
    parallels = np.linspace(np.floor(min_lat) - 5, np.ceil(max_lat) + 5, \
                            int((np.ceil(max_lat) + 5) - (np.floor(min_lat) - 5)) + 1)
    m.drawparallels(parallels, labels=[axis_labeling[0], axis_labeling[1], False, False], color="#a0a0a0")
    meridians = np.linspace(np.floor(min_lon) - 5, np.ceil(max_lon) + 5, \
                            int((np.ceil(max_lon) + 5) - (np.floor(min_lon) - 5)) + 1)
    m.drawmeridians(meridians, labels=[False, False, axis_labeling[2], axis_labeling[3]], rotation=45, color="#a0a0a0")
    main_ax = fig.gca()

    # plot stations
    color_index = 0
    legend_labels = []
    handles = []
    text_handles = []
    for net, df_net in df.groupby('net'):
        colcode = "C{}".format(color_index)
        legend_labels.append(net)
        if label_stations:
            labels = df_net['sta']
            labels_warra = labels.isin(warramunga_array)
            if np.any(labels_warra):
                first = np.argwhere(labels_warra)[0]
                labels.loc[labels_warra] = ''
                labels.iloc[first] = 'Warramunga\nArray'
            labels_alice = labels.isin(alice_array)
            if np.any(labels_alice):
                first = np.argwhere(labels_alice)[0]
                labels.loc[labels_alice] = ''
                labels.iloc[first] = 'Alice Springs\nArray'
            labels_pilbara = labels.isin(pilbara_array)
            if np.any(labels_pilbara):
                first = np.argwhere(labels_pilbara)[0]
                labels.loc[labels_pilbara] = ''
                labels.iloc[first] = 'Pilbara\nArray'
            labels = labels.values
        else:
            labels = []
        sc_h, text_h = plot_basemap_stations(m, labels, df_net['lat'].values, 
                                             df_net['lon'].values, color=colcode,
                                             size=marker_size,
                                             label_rotation=label_rotation,
                                             label_font_size=label_font_size)
        handles.append(sc_h)
        text_handles.extend(text_h)
        color_index = (color_index + 1) % 10
    if show_legend:
        plt.legend(handles, legend_labels, title='Network code')

    if title is not None:
        plt.title(title, fontsize = 18, y=1.05)

    # Draw inset of entire continent
    if show_inset:
        inset_ax = fig.add_axes([0.75, 0.77, 0.100, 0.100])
        inset = Basemap(resolution='c', ax=inset_ax, projection='merc',
                        lat_0=-20, lon_0=132,
                        llcrnrlon=au_min_lon, llcrnrlat=au_min_lat,
                        urcrnrlon=au_max_lon, urcrnrlat=au_max_lat)
        inset.fillcontinents(color='lightgray')
        inset.drawstates(color=state_border_color)
        drawBBox(min_lon, min_lat, max_lon, max_lat, inset, fill='True', facecolor='#40404080',
                 linestyle=':', edgecolor='#404040')

    plt.sca(main_ax)
    # Can be slow, so comment out for prototyping
    adjust_text(text_handles, arrowprops=dict(arrowstyle='-', color='red'))
    return m

# Plot Australian permanent network

In [None]:
# Include records from GE, IU, etc...
from seismic.gps_corrections.relative_tt_residuals_plotter import determine_alternate_matching_codes
IRIS_AU_STATIONS_FILE = "../AU_irisws-fedcatalog_20190305T012747Z.txt"
df_copy = df_asdf.copy()
name_remap = {'lat': 'stationLat', 'lon': 'stationLon'}
df_copy.rename(name_remap, inplace=True, axis='columns')
# display(df_copy)
other_nets, other_stats = determine_alternate_matching_codes(df_copy, IRIS_AU_STATIONS_FILE, 'AU')
other_dict = {'net': list(other_nets), 'sta': list(other_stats)}
extra_AU_stations_mask = df_asdf[list(other_dict)].isin(other_dict).all(axis=1)
df_extra_stations = df_asdf.loc[extra_AU_stations_mask]
display(df_extra_stations)

In [None]:
from adjustText import adjust_text

# Plot map
mask_au = (df_asdf['net'] == 'AU')

# include schools
SIS_NET = 'S'
mask_sis = (df_asdf['net'] == SIS_NET)
SIS_CODES = sorted([c for c in df_asdf.loc[mask_sis, 'sta'].unique() if c[0:2] == 'AU'])
mask_sis = (mask_sis & df_asdf['sta'].isin(SIS_CODES))
df_AU = pd.concat([df_asdf.loc[(mask_au | mask_sis)], df_extra_stations], sort=False)

m = plot_basemap_networks(df_AU, title="Australian permanent network", show_inset=False,)
m.drawmapscale(129, -37, 135, -25, 1000.0, barstyle='fancy', fillcolor2='#808080',
               linecolor='#808080', yoffset=40*1000)

plt.savefig("PERMANENT_deployments.png", dpi=300)
plt.show()

In [None]:
# Generate table of date ranges
df_code_sorted = df_AU.sort_values(['net', 'sta'])
with open('PERMANENT_date_ranges.html', 'w') as f:
    df_code_sorted.to_html(f, columns=['net', 'sta', 'start_time', 'end_time'], index=False)

# Australian temporary network deployments

In [None]:
temp_deploys = ['7B', '7D', '7E', '7F', '7G', '7J', '7K', '7W', '7X', 'AQ', 'OA']

In [None]:
TARGET_NETS = {'net': temp_deploys}

In [None]:
from adjustText import adjust_text

for target_net in TARGET_NETS['net']:
    df_net = df_asdf.loc[df_asdf['net'] == target_net]
    if df_net.empty:
        print("WARNING: No data for network {}, skipping!".format(target_net))
        continue
#     assert not np.any(np.isnan(df_net['lat']))
#     assert not np.any(np.isnan(df_net['lon']))
    plot_basemap_networks(df_net, title="Deployment Name: {}".format(target_net),
                          show_legend=False, label_font_size=12, marker_size=80)
    start_time = df_net['start_time'].min()
    end_time = df_net['end_time'].max()
    plt.text(0.01, 0.01, "Deployment date range: {} -- {}".format(start_time, end_time),
             transform=plt.gca().transAxes, fontsize=12)
    plt.savefig('{}_map.png'.format(target_net), dpi=300)
    plt.show()

In [None]:
target_mask = df_asdf[list(TARGET_NETS)].isin(TARGET_NETS).all(axis=1)
df_target = df_asdf.loc[target_mask]

plot_basemap_networks(df_target, title="Temporary Deployments", show_inset=False, 
                      label_stations=False)
plt.savefig("TEMP_deployments.png", dpi=300)
plt.show()

In [None]:
def find_nearest_time_overlapping(df, netcode, statcode, num=1, full_overlap=False):
    # Find nearest num stations to netcode.statcode in df which are themselves
    # not part of network netcode, and whose station dates overlap.
    not_target_mask = (df['net'] != netcode)
    target_mask = (df['net'] == netcode) & (df['sta'] == statcode)
    target_lat, target_lon = df.loc[target_mask, ['lat', 'lon']].values[0]
    start_time, end_time = df.loc[target_mask, ['start_time', 'end_time']].values[0]
    if full_overlap:
        time_mask = (df['start_time'] <= start_time) & (df['end_time'] >= end_time)
    else:
        time_mask = ((df['start_time'] <= end_time) & (df['end_time'] >= start_time))
    df_filt = df.loc[not_target_mask & time_mask]
    if df_filt.empty:
        return pd.DataFrame()
    # Distance is in degrees
    df_filt['distance'] = df_filt.apply(lambda r: locations2degrees(
        target_lat, target_lon, r['lat'], r['lon']), axis=1)
    df_sorted = df_filt.sort_values('distance')
    return df_sorted.iloc[0:num]

In [None]:
for target_net in TARGET_NETS['net']:
    df_net = df_asdf.loc[df_asdf['net'] == target_net]
    nearest = []
    for sta in df_net['sta']:
        result = find_nearest_time_overlapping(df_asdf, target_net, sta, num=20)
        if result.empty:
            continue
        nearest.append(result)
    if len(nearest) == 0:
        print("WARNING: No results for network {}!".format(target_net))
        continue
    nearest = pd.concat(nearest, sort=False).sort_values('distance')
    nearest = nearest.loc[~nearest['net'].isin(temp_deploys)]
    # drop_duplicates keeps first record, so 'distance' field will be the shortest
    # distance to some station in the network.
    nearest.drop_duplicates(['net', 'sta'], inplace=True)
    print("Nearest overlapping neighbours for network {}:".format(target_net))
    display(nearest.iloc[0:10])
    nearest.to_csv('{}_REF_neighbours.csv'.format(target_net), index=False, encoding='utf-8')

    # Combine temp network records with candidate reference stations for plotting to map.
    df_combined = pd.concat([df_net, nearest], sort=False)
    plot_basemap_networks(df_combined, title="Deployment Name: {}".format(target_net))
    start_time = df_net['start_time'].min()
    end_time = df_net['end_time'].max()
    plt.text(0.01, 0.01, "Deployment date range: {} -- {}".format(start_time, end_time),
             transform=plt.gca().transAxes, fontsize=12)
    plt.savefig('{}_REF_neighbours.png'.format(target_net), dpi=300)
    plt.show()

## Export station metadata for AU and temporary deployments to CSV and JSON

In [None]:
df_au = df_asdf[df_asdf['net'] == 'AU'].sort_values('sta')

In [None]:
df_au.to_csv('AU_station_metadata.csv', index=False)

In [None]:
for net in temp_deploys:
    csv_file = net + '_station_metadata.csv'
    df_net = df_asdf[df_asdf['net'] == net].sort_values('sta')
    df_net.to_csv(csv_file, index=False)