# Setup distance matrix calculation

In [24]:
%load_ext autoreload
%autoreload 2

In [1]:
import sys
sys.path.insert(0, "../src")

In [25]:
import numpy as np
import pandas as pd
import xarray
from scipy.spatial.distance import cdist
from geopy.distance import geodesic
from sklearn.metrics.pairwise import haversine_distances

import geostats as gs

In [3]:
# read 5-degree monthly data and subset to US for faster testing
ds = xarray.open_dataset("../data/exp_pro/OCO2_5deg_monthly.nc")

ds = ds.where((-125 < ds.lon) & (ds.lon < -70) 
              & (25 < ds.lat) & (ds.lat < 50), drop=True)

ds

In [4]:
Z1 = gs.Field(ds.xco2, "2019-07-01")
Z2 = gs.Field(ds.sif, "2019-07-01")

In [42]:
gs.distance_matrix(Z1.coords[:3,], Z1.coords[:3,], fast_dist=True)

array([[   0.        ,  555.97463322, 1111.94926645],
       [ 555.97463322,    0.        ,  555.97463322],
       [1111.94926645,  555.97463322,    0.        ]])

In [74]:
geodesic((ds.lat.values[0], ds.lon.values[0]), (ds.lat.values[1], ds.lon.values[1])).miles

347.9753814124189

In [6]:
coords = expand_grid(ds.lat.values[:2], ds.lon.values[:2])
distance_matrix(coords, units="miles")

array([[  0.        ,  15.13178313, 346.98826934, 347.97538141],
       [ 15.13178313,   0.        , 347.97538141, 346.98826934],
       [346.98826934, 347.97538141,   0.        ,  45.27809565],
       [347.97538141, 346.98826934,  45.27809565,   0.        ]])

In [81]:
geodesic((-87.5, -177.5), (-87.5, -172.5)).km

396 µs ± 5.87 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [73]:
coords = expand_grid(ds.lat.values, ds.lon.values)
distance_matrix(coords)

array([[    0.        ,    24.3522444 ,    48.65822076, ...,
        18894.21642581, 18890.234938  , 18887.84064812],
       [   24.3522444 ,     0.        ,    24.3522444 , ...,
        18899.77158275, 18894.21642581, 18890.234938  ],
       [   48.65822076,    24.3522444 ,     0.        , ...,
        18906.88139558, 18899.77158275, 18894.21642581],
       ...,
       [18894.21642581, 18899.77158275, 18906.88139558, ...,
            0.        ,    72.86803157,   145.59970868],
       [18890.234938  , 18894.21642581, 18899.77158275, ...,
           72.86803157,     0.        ,    72.86803157],
       [18887.84064812, 18890.234938  , 18894.21642581, ...,
          145.59970868,    72.86803157,     0.        ]])

In [107]:
distance_matrix_fast(coords, coords)

array([[    0.        ,    24.24359308,    48.44112457, ...,
        18910.27792814, 18906.31551111, 18903.93269494],
       [   24.24359308,     0.        ,    24.24359308, ...,
        18915.80649549, 18910.27792814, 18906.31551111],
       [   48.44112457,    24.24359308,     0.        , ...,
        18922.88230969, 18915.80649549, 18910.27792814],
       ...,
       [18910.27792814, 18915.80649549, 18922.88230969, ...,
            0.        ,    72.54661905,   144.95748508],
       [18906.31551111, 18910.27792814, 18915.80649549, ...,
           72.54661905,     0.        ,    72.54661905],
       [18903.93269494, 18906.31551111, 18910.27792814, ...,
          144.95748508,    72.54661905,     0.        ]])