In [1]:
import pandas as pd
import geopandas # used to load and analyze gdb files
from itertools import combinations # used for forming pairwise combinations


from joblib import Parallel, delayed
import multiprocessing

In [2]:
concept_list = ['Peromyscus arcticus',
                'Peromyscus gambelii',
                'Peromyscus labecula',
                'Peromyscus maniculatus',
                'Peromyscus sonoriensis']

# load in MDD ranges, project to cea for area calculations
gdf = geopandas.read_file("MDD_Rodentia_NAm_393species.gpkg").to_crs({'proj':'cea'})

# restrict the gdf to those ranges of interest
gdf = gdf[gdf['sciname'].isin(concept_list)].copy()

# calculate area for each concept's range
gdf.sindex # prepare spatial index
gdf["area"] = gdf['geometry'].area # precalculate area accross entire gdf

# generate a list of pairwise matches
concept_pairs = [x for x in combinations(concept_list, 2)]

  for feature in features_lst:


In [16]:
def check_pct_overlap(concepts):
    # concepts = tuple of concept name strings
    concept_a, concept_b = concepts
    dfa = gdf.loc[gdf['sciname'] == concept_a].copy()
    dfb = gdf.loc[gdf['sciname'] == concept_b].copy()
    
    # get area of each 
    dfa_area = dfa['area'].item()
    dfb_area = dfb['area'].item()

    intersection = geopandas.overlay(dfa, dfb, how='intersection')#['geometry'].to_crs({'proj':'cea'}) 
    try:
        inter_area = intersection['geometry'].area.item()
    except ValueError:
        inter_area = 0

    a_overlap_with_b = inter_area / dfa_area
    b_overlap_with_a = inter_area / dfb_area
    
    row_data = [concept_a, concept_b, a_overlap_with_b, b_overlap_with_a]
    return row_data


# allocate the work
ncores = multiprocessing.cpu_count() - 1
rows = Parallel(n_jobs=ncores, prefer="threads", verbose=10)(delayed(check_pct_overlap)(concept) for concept in concept_pairs)
# container to organize the results
results = pd.DataFrame(rows, columns = ["a_concept",
                                        "b_concept",
                                        "a_overlap_with_b",
                                        "b_overlap_with_a"])
# write out results
results.to_csv("Peromyscus_range_overlaps.csv", index=False)

[Parallel(n_jobs=15)]: Using backend ThreadingBackend with 15 concurrent workers.
[Parallel(n_jobs=15)]: Done   3 out of  10 | elapsed:    4.0s remaining:    9.3s
[Parallel(n_jobs=15)]: Done   5 out of  10 | elapsed:   10.7s remaining:   10.7s
[Parallel(n_jobs=15)]: Done   7 out of  10 | elapsed:   21.2s remaining:    9.1s
[Parallel(n_jobs=15)]: Done  10 out of  10 | elapsed:  9.9min finished


In [17]:
results

Unnamed: 0,a_concept,b_concept,a_overlap_with_b,b_overlap_with_a
0,Peromyscus arcticus,Peromyscus gambelii,0.0,0.0
1,Peromyscus arcticus,Peromyscus labecula,0.0,0.0
2,Peromyscus arcticus,Peromyscus maniculatus,0.0,0.0
3,Peromyscus arcticus,Peromyscus sonoriensis,0.0,0.0
4,Peromyscus gambelii,Peromyscus labecula,0.0,0.0
5,Peromyscus gambelii,Peromyscus maniculatus,0.0,0.0
6,Peromyscus gambelii,Peromyscus sonoriensis,0.009441,0.000735
7,Peromyscus labecula,Peromyscus maniculatus,0.0,0.0
8,Peromyscus labecula,Peromyscus sonoriensis,0.150458,0.027444
9,Peromyscus maniculatus,Peromyscus sonoriensis,0.016088,0.01247
