# Cell-Path Distance Matrix
This notebook is used to compute the cell-path distance matrix for the cell-specific anelastic attenuation

This script was developed as part of the Non-ergodic Methodology and Modeling Tools research project summarized in the report by Lavrentiadis G., Kuehn N., Bozorgnia Y., Seylabi E., Meng X., Goulet C., and Kottke A. (2022), "Non‐ergodic Methodology and Modeling Tools (Report GIRS-2022-04)." Natural Hazards Risk and Resiliency Research Center, University of California, Los Angeles.

Support provided by the California Department of Transportation (Caltrans) and Pacific Gas and Electric Company (PG&E) is gratefully acknowledged. 

## Load required libraries

In [None]:
# load libraries
import os
import sys
import pathlib
import numpy as np
import pandas as pd
from scipy import sparse

# geographic libraries
import pyproj

# jupyter
from IPython.display import clear_output

## User Input
Define flatfile filename, output directory, and extend of domain ($x$, $y$ and $z$ coordinates)

In [None]:
# ground-motion flatifle name and directory
name_flatfile = "Atkinson_resid_trim_inside"
dir_flatfile = "../../Data/"

# flag for UTM coordinates
flagUTM = True

# cell grid limits (UTM coordinates in km)
grid_lims_x = [-400, 700]
grid_lims_y = [4500, 6400]
grid_lims_z = [-50, 0]

# cell size
cell_size = [25, 25, 50]
utm_zone = "60S"

# output files
dir_out = "../../Data/Data_Preperation/nz_cell_distances/"

## Load Files

In [None]:
df_flatfile = pd.read_csv(dir_flatfile + name_flatfile + ".csv")
# df_flatfile = df_flatfile[df_flatfile['eqLon'] > 165] 
# df_flatfile = df_flatfile[df_flatfile['staLon'] > 165] 
# df_flatfile = df_flatfile[df_flatfile['eqLon'] < 179.2] 
# df_flatfile = df_flatfile[df_flatfile['staLon'] < 179.2] 
# df_flatfile = df_flatfile[df_flatfile['eqLat'] > -49] 
# df_flatfile = df_flatfile[df_flatfile['staLat'] > -49] 
# df_flatfile = df_flatfile[df_flatfile['eqLat'] < -32] 
# df_flatfile = df_flatfile[df_flatfile['staLat'] < -32] 
# df_flatfile.to_csv("../../Data/Atkinson_resid_trim_inside.csv")

n_rec = len(df_flatfile)
print("Number of records:", n_rec)

## Preprocessing 

### Cell Vertices Coordinates

In [None]:
from Analyses.Data_Preparation import Cells, get_source_station_matrix

cells = Cells.from_bounds(
    utm_zone=utm_zone,
    grid_lims_x=grid_lims_x,
    grid_lims_y=grid_lims_y,
    grid_lims_z=grid_lims_z,
    cell_size=cell_size,
)
df_cellinfo = cells.to_dataframe()
data4celldist = get_source_station_matrix(cells, df_flatfile)

## Compute Cell-Path Distance Matrix

In [None]:
from Analyses.Python_lib.ground_motions import pylib_cell_dist


cells4dist = cells.cells[:, [0, 1, 2, 21, 22, 23]]
distancematrix = np.zeros([len(data4celldist), len(cells4dist)])
for i in range(len(data4celldist)):
    clear_output(wait=True)
    print("Computing cell distances, record:", i)
    pt1 = data4celldist[i, (0, 1, 2)]
    pt2 = data4celldist[i, (3, 4, 5)]

    dm = pylib_cell_dist.ComputeDistGridCells(pt1, pt2, cells4dist, flagUTM)
    distancematrix[i] = dm

# print Rrup missfits
dist_diff = df_flatfile.Rrup - distancematrix.sum(axis=1)
clear_output(wait=True)
print("Cell distance calculation completed")
print("max R_rup misfit", max(dist_diff.abs()))

# convert cell distances to sparse matrix
distmatrix_sparce = sparse.coo_matrix(distancematrix)

# cell distances data-frame
# gm record info
df_recinfo = df_flatfile[["rsn", "eqid", "ssn"]]

# cell distances
df_celldist = pd.DataFrame(distancematrix, columns=cells.cell_names)
df_celldist = pd.merge(df_recinfo, df_celldist, left_index=True, right_index=True)

# spase cell distances dataframe
df_celldist_sp = pd.DataFrame(
    {
        "row": distmatrix_sparce.row + 1,
        "col": distmatrix_sparce.col + 1,
        "data": distmatrix_sparce.data,
    }
)

## Save Data

In [None]:
# create output directory
if not os.path.isdir(dir_out):
    pathlib.Path(dir_out).mkdir(parents=True, exist_ok=True)

# save cell info
name_cellinfo = name_flatfile + "_cellinfo"
# df_cellinfo.to_csv(dir_out + name_cellinfo + ".csv", index=False)

# save distance metrics
name_celldist = name_flatfile + "_distancematrix"
df_celldist.to_csv(dir_out + name_celldist + ".csv", index=False)

# save distance matrix as sparce
name_celldist = name_flatfile + "_distancematrix_sparce"
df_celldist_sp.to_csv(dir_out + name_celldist + ".csv", index=False)

## Summary Figures

### Figures Options

In [None]:
# plot limits
fig_latlon_win = np.array([[31, -126], [43.5, -113]])
# fig_latlon_win = np.array([[32, -125],[42.5, -114]])
fig_latlon_win = np.array([[-30, 160], [-60, 180]])

# color limits for number of paths
cmin = 0
cmax = 2000

# flag log scale for number of paths
flag_logscl = True

### Figure Info Summary

In [None]:
# earthquake and station ids
eq_id, eq_idx_inv = np.unique(df_flatfile["eqid"].values.astype(int), return_index=True)
sta_id, sta_idx_inv = np.unique(
    df_flatfile["ssn"].values.astype(int), return_index=True
)

# earthquake and station coordinates
eq_latlon = df_flatfile[["eqLat", "eqLon"]].values[eq_idx_inv, :]
stat_latlon = df_flatfile[["staLat", "staLon"]].values[sta_idx_inv, :]

# cell coordinates
cell_latlon_mpt = df_cellinfo[["mptLat", "mptLon"]].values
cell_latlon_edge = df_cellinfo[
    [
        "q5Lat",
        "q5Lon",
        "q6Lat",
        "q6Lon",
        "q8Lat",
        "q8Lon",
        "q7Lat",
        "q7Lon",
        "q5Lat",
        "q5Lon",
    ]
].values

# number of paths per cell
cell_n_paths = (distancematrix > 0).sum(axis=0)

In [None]:
cells.to_dataframe()
# dir_out
# eq_latlon[:, 0]
# stat_latlon
# cell_latlon_edge.min()

### Plotting

In [None]:
# Plot cell paths
from Analyses.Python_lib.plotting import pylib_contour_plots as pylib_cplt
# ---   ---   ---   ---
fname_fig = "cA_paths"
fig, ax, data_crs, gl = pylib_cplt.PlotMap()
# plot earthquake and station locations
ax.plot(
    eq_latlon[:, 1],
    eq_latlon[:, 0],
    "*",
    transform=data_crs,
    markersize=10,
    zorder=13,
    label="Events",
)
ax.plot(
    stat_latlon[:, 1],
    stat_latlon[:, 0],
    "o",
    transform=data_crs,
    markersize=6,
    zorder=12,
    label="Stations",
)
# plot earthquake-station paths
for rec in df_flatfile[["eqLat", "eqLon", "staLat", "staLon"]].iterrows():
    ax.plot(
        rec[1][["eqLon", "staLon"]],
        rec[1][["eqLat", "staLat"]],
        transform=data_crs,
        color="gray",
        linewidth=0.05,
        zorder=10,
        alpha=0.2,
    )
# plot cells
for ce_xy in cell_latlon_edge:
    ax.plot(
        ce_xy[[1, 3, 5, 7, 9]], ce_xy[[0, 2, 4, 6, 8]], color="gray", transform=data_crs
    )
# figure limits
# ax.set_xlim( fig_latlon_win[:,1] )
# ax.set_ylim( fig_latlon_win[:,0] )
# grid lines
gl = ax.gridlines(draw_labels=True)
gl.top_labels = False
gl.right_labels = False
gl.xlabel_style = {"size": 25}
gl.ylabel_style = {"size": 25}
# add legend
ax.legend(fontsize=25, loc="lower left")
# apply tight layout
# fig.show()
fig.tight_layout()
fig.savefig(dir_out + fname_fig + ".png")

# Plot cell paths
# ---   ---   ---   ---
fname_fig = "cA_num_paths"
cbar_label = "Number of paths"
data2plot = np.vstack([cell_latlon_mpt.T, cell_n_paths]).T
# log scale options
if flag_logscl:
    cmin = np.log(1)
    cmax = np.log(cmax)
# create figure
fig, ax, cbar, data_crs, gl = pylib_cplt.PlotCellsCAMap(
    data2plot, cmin=cmin, cmax=cmax, log_cbar=flag_logscl, frmt_clb="%.0f", cmap="OrRd"
)
# plot cells
for ce_xy in cell_latlon_edge:
    ax.plot(
        ce_xy[[1, 3, 5, 7, 9]], ce_xy[[0, 2, 4, 6, 8]], color="gray", transform=data_crs
    )
# figure limits
# ax.set_xlim( fig_latlon_win[:,1] )
# ax.set_ylim( fig_latlon_win[:,0] )
# grid lines
gl = ax.gridlines(draw_labels=True)
gl.top_labels = False
gl.right_labels = False
gl.xlabel_style = {"size": 25}
gl.ylabel_style = {"size": 25}
# update colorbar
cbar.set_label(cbar_label, size=30)
cbar.ax.tick_params(labelsize=25)
# apply tight layout
# fig.show()
fig.tight_layout()
fig.savefig(dir_out + fname_fig + ".png")