In [None]:
!pip install -q geopandas
!apt install -q proj-bin libproj-dev libgeos-dev -y
!pip install -q https://github.com/matplotlib/basemap/archive/master.zip
!pip install -q rasterio

# Pandas is a package containing additional functions to use data frames in Python
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.basemap import Basemap
import warnings
import rasterio
import rasterio.features
import numpy as np
import seaborn as sns
import shapely
import os
import copy
warnings.simplefilter('ignore')
# These two lines allow the notebook to access the Google Drive.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# This is the path to the project folder within the Google Drive.
file_path = "/content/drive/My Drive/"

%load_ext rpy2.ipython
from rpy2.robjects import pandas2ri  # activate pandas R  interface
pandas2ri.activate()

Reading package lists...
Building dependency tree...
Reading state information...
libgeos-dev is already the newest version (3.6.2-1build2).
libproj-dev is already the newest version (4.9.3-2).
proj-bin is already the newest version (4.9.3-2).
The following package was automatically installed and is no longer required:
  libnvidia-common-440
Use 'apt autoremove' to remove it.
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.
  Building wheel for basemap (setup.py) ... [?25l[?25hdone
Mounted at /content/drive
The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [None]:
def convert_xy_to_longlat(grid_x, grid_y):
  lon = ((grid_x / 6) - 180)
  lat = -((grid_y / 6) - 90)
  return (lon, lat)

def convert_longlat_to_xy(lon, lat):
  grid_x = int((lon + 180) * 6)
  grid_y = int((-lat + 90) * 6)
  return (grid_x, grid_y)

---
## Notebook 12
# Adding Continent Information

This notebook is just to add a "continent" column to the observation data and predicted data for the worldclim variables.

First, we read in a shapefile showing the location of each continent, so that we can automatically assign observation points to the correct continent - source https://www.arcgis.com/home/item.html?id=5cf4f223c4a642eb9aa7ae1216a04372

In [None]:
worldmap = gpd.read_file(file_path + "/continent_shapefile/continent.shp", index_col=0)
worldmap = worldmap.to_crs('epsg:4088')
worldmap = worldmap[worldmap['CONTINENT'] != 'Antarctica']
worldmap.index = worldmap['CONTINENT']
continents = dict(zip(worldmap['CONTINENT'], worldmap['geometry']))
worldmap['geometry'] = worldmap['geometry'].simplify(tolerance=0.15)

We also read the list of species which we have observations for.

In [None]:
species_list = [line.strip() for line in open(file_path + "species_names.tsv")]

We want to know which continent each species is in - read the observation tables for each species then check if the point is within the polygon for each continent.

In [None]:
species_type_obs = dict()
gdfD = dict()
for species in species_list:
    if not os.path.exists(file_path + "species_plus_climate_observed_geo/" + species + ".tsv"):
      obs_table = pd.read_csv(file_path + "species_plus_climate_observed/" + species + "_observations.tsv", sep="\t")
      if len(obs_table) != 0:
          gdf = gpd.GeoDataFrame(obs_table,
                                geometry=gpd.points_from_xy(obs_table['decimalLongitude'], obs_table['decimalLatitude']),
                                crs="epsg:4326")
          gdf = gdf.to_crs('epsg:4088')
          conts = []
          this_cont = 0
          for point in gdf['geometry']:
            for continent in ["North America", "Europe", "Asia", "South America", "Africa", "Australia", "Oceania"]:
                poly = continents[continent]
                if point.within(poly):
                  this_cont = continent
                  conts.append(this_cont)
                  break
          gdf['Continent'] = conts
          gdf.to_csv(file_path + "species_plus_climate_observed_geo/" + species + ".tsv", sep="\t", index=None)