In [1]:
import geopandas as gpd
import pandas as pd
import numpy as np
import shapely
from shapely import Polygon, MultiPolygon, LineString, Point
import time

# Read in Data

In [2]:
#takes 45 seconds

gdf = gpd.read_file('data/shapefiles/MAMMALS_TERRESTRIAL_ONLY')

df = pd.read_csv('data/observations.csv')

In [3]:
len(gdf['sci_name'].unique())

5633

In [4]:
gdf['presence'].value_counts()

presence
1    11678
3      257
6      162
5      160
4      140
2       93
Name: count, dtype: int64

# Make Mass Table

In [5]:
df_highest_mass = df[df['body mass'].notnull()].sort_values(by = 'body mass', ascending = False)\
                    .drop_duplicates(subset = ['species'], keep = 'first')

masses = gdf[['sci_name']].drop_duplicates().merge(df_highest_mass[['species','body mass']], 
                                                   left_on = 'sci_name', right_on = 'species', how = 'left')\
                                                   .drop(columns = ['species'])

In [6]:
#masses.to_csv('data/masses_from_ipynb.csv', index = False)

# Read in Masses

In [7]:
masses = pd.read_csv('data/masses.csv')

# Create new Geometries

In [8]:
gdf = gdf.merge(masses, how = 'left', on = 'sci_name').dropna(subset = 'body mass')\
    .sort_values(by = 'body mass', ascending = False)
gdf = gdf[gdf['presence'] != 5] #drop extinct territories

In [9]:
#collapse each species into one shape
#takes 90 sec
gdf = gdf.sort_values(by = 'body mass', ascending = False)
gdf = gdf.head(500).copy()
gdf = gdf.dissolve(by = 'sci_name').reset_index()
avoid = ['Gulo gulo', 'Hyaena hyaena']
gdf = gdf[~gdf['sci_name'].isin(avoid)]
gdf = gdf.sort_values(by = 'body mass', ascending = False)
gdf = gdf.head(50)

In [10]:
def has_line(multipoly):
    if isinstance(multipoly, shapely.geometry.multipolygon.MultiPolygon):
        for geo in multipoly.geoms:
            if isinstance(geo, shapely.geometry.linestring.LineString):
                return(True)
        return(False)
    else:
        return(isinstance(multipoly, shapely.geometry.linestring.LineString))

def remove_line(geometry):
    new_geo = MultiPolygon([g for g in geometry.geoms if not isinstance(g, shapely.geometry.linestring.LineString)])
    return(new_geo)

In [11]:
df['no_overlap_geometry'] = np.nan
union = Polygon([])

start = time.time()
abs_times = [0]
rel_times = []
j = 0
for i, row in gdf.iterrows():
    print(row['sci_name'])
    this_geo = row['geometry']
    new_geo = this_geo - union
    gdf.loc[i, 'no_overlap_geometry'] = new_geo
    if new_geo.area > 0:
        union = this_geo.union(union)
    hl = has_line(union)
    if hl:
        union = remove_line(union)
    j += 1
    rel_times.append(time.time() - abs_times[-1] - start)
    abs_times.append(time.time()-start)
    print(str(j) +f"/{len(gdf)}", rel_times[-1], abs_times[-1], hl)

Loxodonta africana
1/50 0.002480030059814453 0.0024819374084472656 False
Elephas maximus
2/50 0.14952802658081055 0.15201115608215332 False
Loxodonta cyclotis
3/50 0.2582128047943115 0.41022491455078125 False
Ceratotherium simum
4/50 0.38393425941467285 0.7941598892211914 False
Rhinoceros sondaicus
5/50 0.2771761417388916 1.0713369846343994 False
Diceros bicornis
6/50 0.6181612014770508 1.6894989013671875 False
Giraffa camelopardalis
7/50 0.27898621559143066 1.9684858322143555 False
Bos gaurus
8/50 0.3378770351409912 2.306364059448242 False
Bos mutus
9/50 0.19123387336730957 2.4975979328155518 False
Bison bison
10/50 0.2026970386505127 2.700295925140381 False
Bison bonasus
11/50 0.39383506774902344 3.0941319465637207 False
Syncerus caffer
12/50 0.4734160900115967 3.567548990249634 False
Bos javanicus
13/50 0.44307708740234375 4.010627031326294 False
Alces alces
14/50 0.5865449905395508 4.597172021865845 False
Tragelaphus eurycerus
15/50 0.7733051776885986 5.370477199554443 False
Ursus 

In [12]:
geos = gdf['no_overlap_geometry']
a_gdf = gdf.drop(columns = ['geometry','no_overlap_geometry'])
new_gdf = gpd.GeoDataFrame(a_gdf, crs="EPSG:4326", geometry=geos)
new_gdf.head(44).to_file('data/shapefiles/largest-mammals')

Including Canis Lupus (line 45) makes dataframe fail to print to file