# Data Exploration

Each numpy array corresponds to all earthquakes on a particular day, of the form
(stations, events, 3 (axes), 6000 (time))

In [25]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tqdm import tqdm

In [26]:
data_path = '../arrays/2019-08-01.npy'
data = np.load(data_path)
data.shape

(27, 196, 3, 6000)

In [27]:
#Helper function to plot an earthquake
def plot_earthquake(quake):
    num_stations, num_dims, time = quake.shape
    fig, axs = plt.subplots(num_stations, figsize = (20, 10*num_stations))
    for station_num in range(num_stations):
        axs[station_num].plot(np.arange(time), quake[station_num].T)

In [28]:
# plot_earthquake(data[:, 0])

In [29]:
# freq = 100
# window = 2000
# vp = 6.5 
# vs = 4.5
# t = np.arange(window)/freq
# channel = 2
# degree2km = np.pi*6371/180
# for event in data['events']:
#   tmp_dist = []
#   plt.figure(figsize=(15, 10))
#   for sta in data['stations']:
#     istart = int(freq * (event['time'] - data['stations'][sta]['starttime']))
#     dist = np.sqrt((event['x'] - data['stations'][sta]['x'])**2 + (event['y'] - data['stations'][sta]['y'])**2 + event['depth']**2)
#     tmp_dist.append(dist)
#     tmp_data = data['stations'][sta]['data'][istart:istart+window,channel]
#     mean = np.mean(tmp_data)
#     std = np.std(tmp_data)
#     plt.plot(t, (tmp_data - mean)/std/2 + dist, 'grey', linewidth=1)
# #     plt.plot(t, tmp_data/std/2 + dist)
#   plt.plot(t, t*vp, '--b', label="vp = 6.5 km/s")
#   plt.plot(t, t*vs, '--r', label="vs = 4.5 km/s")
#   plt.legend(loc="lower right")
#   plt.xlim([0, window/freq])
#   plt.ylim([min(tmp_dist)-10, max(tmp_dist)+10])
#   plt.title('Magnitude: {}, Time: {}'.format(event['mag'], str(event['time'])))
#   plt.ylabel('Distance from hypocenter (km)')
#   plt.xlabel('Time after earthquake (s)')
#   plt.savefig(figure_dir.joinpath("{}.png".format(event['time'])))
#   plt.show()

In [30]:
# import pickle
# with open('../data/Ridgecrest/2019-06-07.pkl', 'rb') as f:
#     data = pickle.load(f)

In [31]:
# data['events'], len(data['events'])

In [32]:
# data["stations"], len(data['stations'])

In [33]:
# data['stations']['CI.WVP2.']['data'].shape

In [34]:
# from mpl_toolkits.basemap import Basemap
# import matplotlib.pyplot as plt
# # setup Lambert Conformal basemap.
# # set resolution=None to skip processing of boundary datasets.
# m = Basemap(width=1200000,height=900000,projection='lcc',
#             resolution='f',lat_1=45.,lat_2=55,lat_0=35.705,lon_0=-117.504)
# #m.shadedrelief()
# # plt.show()

In [35]:
# m.shadedrelief()
# plt.show()

In [36]:
%matplotlib inline

In [37]:
# plt.figure(figsize=(30, 30))
# img = plt.imread("ridgecrest.png")
# plt.imshow(img)

In [38]:
def circles(x, y, s, c='b', vmin=None, vmax=None, **kwargs):
    """
    Make a scatter plot of circles. 
    Similar to plt.scatter, but the size of circles are in data scale.
    Parameters
    ----------
    x, y : scalar or array_like, shape (n, )
        Input data
    s : scalar or array_like, shape (n, ) 
        Radius of circles.
    c : color or sequence of color, optional, default : 'b'
        `c` can be a single color format string, or a sequence of color
        specifications of length `N`, or a sequence of `N` numbers to be
        mapped to colors using the `cmap` and `norm` specified via kwargs.
        Note that `c` should not be a single numeric RGB or RGBA sequence 
        because that is indistinguishable from an array of values
        to be colormapped. (If you insist, use `color` instead.)  
        `c` can be a 2-D array in which the rows are RGB or RGBA, however. 
    vmin, vmax : scalar, optional, default: None
        `vmin` and `vmax` are used in conjunction with `norm` to normalize
        luminance data.  If either are `None`, the min and max of the
        color array is used.
    kwargs : `~matplotlib.collections.Collection` properties
        Eg. alpha, edgecolor(ec), facecolor(fc), linewidth(lw), linestyle(ls), 
        norm, cmap, transform, etc.
    Returns
    -------
    paths : `~matplotlib.collections.PathCollection`
    Examples
    --------
    a = np.arange(11)
    circles(a, a, s=a*0.2, c=a, alpha=0.5, ec='none')
    plt.colorbar()
    License
    --------
    This code is under [The BSD 3-Clause License]
    (http://opensource.org/licenses/BSD-3-Clause)
    """
    from matplotlib.patches import Circle
    from matplotlib.collections import PatchCollection


    if np.isscalar(c):
        kwargs.setdefault('color', c)
        c = None

    if 'fc' in kwargs:
        kwargs.setdefault('facecolor', kwargs.pop('fc'))
    if 'ec' in kwargs:
        kwargs.setdefault('edgecolor', kwargs.pop('ec'))
    if 'ls' in kwargs:
        kwargs.setdefault('linestyle', kwargs.pop('ls'))
    if 'lw' in kwargs:
        kwargs.setdefault('linewidth', kwargs.pop('lw'))
    # You can set `facecolor` with an array for each patch,
    # while you can only set `facecolors` with a value for all.

    zipped = np.broadcast(x, y, s)
    patches = [Circle((x_, y_), s_)
               for x_, y_, s_ in zipped]
    collection = PatchCollection(patches, **kwargs)
    if c is not None:
        c = np.broadcast_to(c, zipped.shape).ravel()
        collection.set_array(c)
        collection.set_clim(vmin, vmax)

    ax = plt.gca()
    ax.add_collection(collection)
    ax.autoscale_view()
    plt.draw_if_interactive()
    if c is not None:
        plt.sci(collection)
    return collection

In [39]:
def plot_coords(lats, lngs, intensity):
    plt.figure(figsize=(30, 30))
    plt.gca().axis('off')
    img = plt.imread("ridgecrest.png")
    width = img.shape[0]
    height = img.shape[1]
    latmin = 36.235
    lngmin = -118.046
    
    #Change the x and y to adjust to coords
    y = [int((latmin - lat)*height) for lat in lats]
    x = [int((lng - lngmin)*width) for lng in lngs]

    plt.imshow(img)
    
    out = circles(x, y, intensity, c='r', alpha=0.5, edgecolor='none')
    
    plt.show()

# Plot an earthquake

In [40]:
#Get the order of stations
stations = np.load("../extracted/stations.npy")
stations

array(['CI.WVP2.', 'CI.WRV2.', 'CI.WRC2.', 'CI.WNM.', 'CI.WMF.',
       'CI.WCS2.', 'CI.WBM.', 'CI.TOW2.', 'CI.SRT.', 'CI.SLA.', 'CI.MPM.',
       'CI.LRL.', 'CI.DTP.', 'CI.CCC.', 'CI.JRC2.'], dtype='<U8')

In [41]:
#Define the station coordinates
station_coords = {
    "CI.CCC.": (35.52495, -117.36453),
    "CI.DTP.": (35.26742, -117.84581),
    "CI.JRC2.": (35.98249, -117.80885),
    "CI.LRL.": (35.47954, -117.68212),
    "CI.MPM.": (36.05799 ,-117.48901),
    "CI.Q0072.": (35.609617, -117.666721),
    "CI.SLA.": (35.89095, -117.28332),
    "CI.SRT.": (35.69235, -117.75051),
    "CI.TOW2.": (35.80856, -117.76488),
    "CI.WBM.": (35.60839, -117.89049),
    "CI.WCS2.": (36.02521, -117.76526),
    "CI.WMF.": (36.11758, -117.85486),
    "CI.WNM.": (35.8422, -117.90616),
    "CI.WRC2.": (35.9479, -117.65038),
    "CI.WRV2.": (36.00774, -117.8904),
    "CI.WVP2.": (35.94939, -117.81769)
}

lats = [station_coords[s][0] for s in stations]
lngs = [station_coords[s][1] for s in stations]
intensity = [10]*len(lngs)

In [42]:
# Import a single dat of earthquakes
quakes = np.load("../extracted/2019-06-04.npy")
quakes.shape

(4, 15, 6000)

In [43]:
# plot_coords(lats, lngs, intensity)

In [58]:
def plot_earthquake_timeseries(lats, lngs, actual_intensities, save_path, predicted_intensities = None):
    actual_intensities = actual_intensities / 10
    if predicted_intensities is not None:
        predicted_intensities = predicted_intensities / 10
        assert(actual_intensities.shape[0] == predicted_intensities.shape[0])
        
    for i in tqdm(range(actual_intensities.shape[0])):
        plt.figure(figsize=(15, 15))
        plt.gca().axis('off')
        img = plt.imread("ridgecrest.png")
        width = img.shape[0]
        height = img.shape[1]
        latmin = 36.235
        lngmin = -118.046

        #Change the x and y to adjust to coords
        y = [int((latmin - lat)*height) for lat in lats]
        x = [int((lng - lngmin)*width) for lng in lngs]

        plt.imshow(img)

        actual_outs = circles(x, y, actual_intensities[i], c='r', alpha=0.5, edgecolor='none')
        if predicted_intensities is not None:
            predicted_outs = circles(x, y, predicted_intensities[i], c='b', alpha=0.5, edgecolor='none')

        plt.savefig(f"{save_path}/{i}.png")
        plt.close()

In [59]:
with open('../compressed/2019-08-01.npy', 'rb') as fd:
    arr = np.load(fd)
    arr = np.transpose(arr, [0, 2, 1])
    plot_earthquake_timeseries(lats, lngs, arr[5], "./plot_test/tmp", predicted_intensities = np.ones(arr[5].shape) * 10)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:29<00:00,  2.02it/s]


In [55]:
from pathlib import Path
import os
import glob

filenames = sorted(Path("./plot_test/tmp/").glob("*.png"), key=lambda i: int(os.path.splitext(os.path.basename(i))[0]))
print(filenames)

[PosixPath('plot_test/tmp/0.png'), PosixPath('plot_test/tmp/1.png'), PosixPath('plot_test/tmp/2.png'), PosixPath('plot_test/tmp/3.png'), PosixPath('plot_test/tmp/4.png'), PosixPath('plot_test/tmp/5.png'), PosixPath('plot_test/tmp/6.png'), PosixPath('plot_test/tmp/7.png'), PosixPath('plot_test/tmp/8.png'), PosixPath('plot_test/tmp/9.png'), PosixPath('plot_test/tmp/10.png'), PosixPath('plot_test/tmp/11.png'), PosixPath('plot_test/tmp/12.png'), PosixPath('plot_test/tmp/13.png'), PosixPath('plot_test/tmp/14.png'), PosixPath('plot_test/tmp/15.png'), PosixPath('plot_test/tmp/16.png'), PosixPath('plot_test/tmp/17.png'), PosixPath('plot_test/tmp/18.png'), PosixPath('plot_test/tmp/19.png'), PosixPath('plot_test/tmp/20.png'), PosixPath('plot_test/tmp/21.png'), PosixPath('plot_test/tmp/22.png'), PosixPath('plot_test/tmp/23.png'), PosixPath('plot_test/tmp/24.png'), PosixPath('plot_test/tmp/25.png'), PosixPath('plot_test/tmp/26.png'), PosixPath('plot_test/tmp/27.png'), PosixPath('plot_test/tmp/28.p

In [64]:
import imageio
from pygifsicle import optimize

images = []
for filename in tqdm(filenames):
    images.append(imageio.imread(filename))
    
gif_path = './plot_test/sample.gif'
with imageio.get_writer(gif_path, mode='I') as writer:
    for image in images:
        writer.append_data(image)

optimize(gif_path)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [00:02<00:00, 22.36it/s]


FileNotFoundError: [Errno 2] No such file or directory: 'gifsicle': 'gifsicle'