In [87]:
import geopandas as gpd
import pandas as pd

In [88]:
# read in shapefile as GeoDataFrame
shps_rec = gpd.read_file("../../datasets/gpkg_v0/catchment_data.geojson")
shps_don = gpd.read_file("../../datasets/gpkg_v1.2/catchment_data.geojson")

In [89]:
# get donor and receiver catchment IDs
donors = pd.read_csv('../data/all_attrs_donors.csv')['id'].to_list()
receivers = pd.read_csv('../data/all_attrs_receivers.csv')['id'].to_list()

In [90]:
# filter donors and receiver GeoDataFrames to those needed
shps_rec = shps_rec[shps_rec['id'].isin(receivers)]
shps_don = shps_don[shps_don['id'].isin(donors)]

In [91]:
# reindex the GeoDataFrames by order of ids in donors and receivers
shps_rec = shps_rec.set_index('id')
shps_rec = shps_rec.reindex(receivers)
shps_don = shps_don.set_index('id')
shps_don = shps_don.reindex(donors)

In [92]:
# reproject from geodetic coordinates to meters
shps_don = shps_don.to_crs(crs=3857)
shps_rec = shps_rec.to_crs(crs=3857)

In [93]:
# calculate centroids of donor and receiver catchments
cent_don = shps_don['geometry'].centroid
cent_rec = shps_rec['geometry'].centroid


In [94]:
# convert centroids from GeoSeries to GeoDataFrame
cent_don = gpd.GeoDataFrame(geometry=cent_don)
cent_rec = gpd.GeoDataFrame(geometry=cent_rec)

In [95]:
# calcualte distance between all receiver and donor centroids
def calculate_distances(row):
    return cent_don.distance(row.geometry)

distances = cent_rec.apply(calculate_distances,axis=1)

In [96]:
# reset the columns and index of the distance matrix
distances.columns = donors
distances.index = receivers

In [97]:
distances = distances.div(1000)
distances = distances.astype(int)

## comparing the distances calculated in R and python

In [104]:
# distance computed in R
f1 = '../data/dist_spatial_donor_receiver_R.csv'
ds1 = pd.read_csv(f1,index_col=0)

# distance computed in Python
f2 = '../data/dist_spatial_donor_receiver.csv'
ds2 = pd.read_csv(f2,index_col=0)

In [None]:
# compute difference
ds2 = ds2.reindex(ds1.index)
ds2 = ds2[ds1.columns.to_list()]
df1 = ds1 - ds2
print(pd.DataFrame(df1.values.flatten()).describe())

In [106]:
# comparing a given cell
print(ds1.loc['cat-100','cat-12790'])
print(ds2.loc['cat-100','cat-12790'])

736
735
