In [None]:
!pip install -q geopandas
!apt install -q proj-bin libproj-dev libgeos-dev -y
!pip install -q https://github.com/matplotlib/basemap/archive/master.zip
!pip install -q rasterio

# Pandas is a package containing additional functions to use data frames in Python
import pandas as pd
import geopandas as gpd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
from mpl_toolkits.basemap import Basemap
import warnings
import rasterio
import numpy as np
import seaborn as sns
warnings.simplefilter('ignore')
# These two lines allow the notebook to access the Google Drive.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# This is the path to the project folder within the Google Drive.
file_path = "/content/drive/My Drive/"


[K     |████████████████████████████████| 972kB 2.6MB/s 
[K     |████████████████████████████████| 10.9MB 7.4MB/s 
[K     |████████████████████████████████| 14.7MB 257kB/s 
[?25hReading package lists...
Building dependency tree...
Reading state information...
The following package was automatically installed and is no longer required:
  libnvidia-common-440
Use 'apt autoremove' to remove it.
Suggested packages:
  libgdal-doc
The following NEW packages will be installed:
  libgeos-dev libproj-dev proj-bin
0 upgraded, 3 newly installed, 0 to remove and 35 not upgraded.
Need to get 305 kB of archives.
After this operation, 1,706 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libgeos-dev amd64 3.6.2-1build2 [73.1 kB]
Get:2 http://archive.ubuntu.com/ubuntu bionic/universe amd64 libproj-dev amd64 4.9.3-2 [199 kB]
Get:3 http://archive.ubuntu.com/ubuntu bionic/universe amd64 proj-bin amd64 4.9.3-2 [32.3 kB]
Fetched 305 kB in 2s (153 k

  import pandas.util.testing as tm


Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [19]:
import os

In [15]:
def convert_xy_to_longlat(grid_x, grid_y):
  lon = ((grid_x / 6) - 180)
  lat = -((grid_y / 6) - 90)
  return (lon, lat)

def convert_longlat_to_xy(lon, lat):
  grid_x = int((lon + 180) * 6)
  grid_y = int((-lat + 90) * 6)
  return (grid_x, grid_y)

---
## Notebook 11
# WORLDCLIM Data - Extracting climate predictions for presence points

In this notebook we extract the measurements for the 19 worldclim variables for all points where species have been observed for each model-ssp-timepoint combination.

For each species, we generate a table of the 19 measurements at each point for each of the 48 (3 models * 4 scenarios * 4 timepoints) predicted futures.

The code is otherwise identical to the ```batch_data_presence_observed_all_species``` table.

In [16]:
bioclim = pd.read_csv(file_path + "bioclim.tsv", sep="\t")
bioclim_name = dict(zip(bioclim['variable_number'], bioclim['name']))

In [17]:
models = ['BCC-CSM2-MR',
         'CanESM5',
         'MIROC6']
scenarios = ['ssp126', 'ssp245', 'ssp370', 'ssp585']

time_periods = ['2021-2040', '2041-2060', '2061-2080', '2081-2100']

In [None]:
species_list = [line.strip() for line in open(file_path + "species_names.tsv")]
for model in models:
  if not os.path.exists(file_path + "species_plus_climate_predicted/" + model):
    os.mkdir(file_path + "species_plus_climate_predicted/" + model)
  for scenario in scenarios:
    if not os.path.exists(file_path +"species_plus_climate_predicted/" + model + "/" + scenario):
      os.mkdir(file_path + "species_plus_climate_predicted/" + model + "/" + scenario)
    for time_period in time_periods:
        if not os.path.exists(file_path +"species_plus_climate_predicted/" + model + "/" + scenario + "/" + time_period):
          os.mkdir(file_path + "species_plus_climate_predicted/" + model + "/" + scenario + "/" + time_period)
        # set the paths to the raster files for current and predicted climate
        raster_path_current = file_path + "climate_data/" + model + "/" + scenario + "/" + time_period + ".tiff"
        # open the raster files
        raster_current = rasterio.open(raster_path_current)

        # convert the data into a matrix, round to 6dp, replace inf with nan
        grid_current = raster_current.read()
        grid_current = np.round(grid_current, 6)
        grid_current[grid_current == float('-inf'), ] = float('nan')
        # exclude very low latitudes
        grid_current = grid_current[:, 0:930, :]

        for species_name in species_list:
            distribution_table = pd.read_csv(file_path + "geo_filtered_main_tables/" + species_name + ".csv", sep="\t")
            distribution_table['x_pos_raster'] = [convert_longlat_to_xy(x, 0)[0] for x in distribution_table['decimalLongitude']]
            distribution_table['y_pos_raster'] = [convert_longlat_to_xy(0, y)[1] for y in distribution_table['decimalLatitude']]
            distribution_table['ID'] = [species_name + "_obs_" +  str(x) for x in np.arange(1, len(distribution_table) + 1)]

            obs_coord_dict = dict()
            for obs_ID, x, y, g1, g2 in zip(distribution_table['ID'],
                                            distribution_table['x_pos_raster'],
                                            distribution_table['y_pos_raster'],
                                            distribution_table['decimalLongitude'],
                                            distribution_table['decimalLatitude']):
              obs_coord_dict[obs_ID] = ((x, y, g1, g2))

            raster_results = []

            # for each observation
            for obs_ID in obs_coord_dict:
                # get the co-ordinates of the observation
                x, y, g1, g2 = obs_coord_dict[obs_ID]

                # make a list to store the results for this observation
                this_obs = [obs_ID, x, y, g1, g2]
                # for each bioclim variable
                for i in range(0, 19):
                    # get the grid for this variable
                    var_grid = grid_current[i]
                    
                    # get the value of this variable at this grid point
                    gridpoint_current = var_grid[y, x]
                    print (gridpoint_current)
                    # store the results for this observation
                    this_obs.append(gridpoint_current)
                  
                raster_results.append(this_obs)

            results = pd.DataFrame(raster_results, columns=['obs_ID', 'x', 'y', 'decimalLongitude', 'decimalLatitude'] + [bioclim_name[x] for x in range(1, 20)])
            results.to_csv(file_path + "species_plus_climate_predicted/" + model + "/" + scenario + "/" + time_period + "/" + species_name + "_observations.tsv", sep="\t", index=None)
