In [None]:
!pip install wget
!pip install rasterio

Collecting rasterio
[?25l  Downloading https://files.pythonhosted.org/packages/02/7e/eed7dfd109fc89ed3cf8b5ed3f26f841b03b92f6ca1c31c4745f938a081b/rasterio-1.1.5-cp36-cp36m-manylinux1_x86_64.whl (18.2MB)
[K     |████████████████████████████████| 18.2MB 1.3MB/s 
[?25hCollecting snuggs>=1.4.1
  Downloading https://files.pythonhosted.org/packages/cc/0e/d27d6e806d6c0d1a2cfdc5d1f088e42339a0a54a09c3343f7f81ec8947ea/snuggs-1.4.7-py3-none-any.whl
Collecting click-plugins
  Downloading https://files.pythonhosted.org/packages/e9/da/824b92d9942f4e472702488857914bdd50f73021efea15b4cad9aca8ecef/click_plugins-1.1.1-py2.py3-none-any.whl
Collecting cligj>=0.5
  Downloading https://files.pythonhosted.org/packages/e4/be/30a58b4b0733850280d01f8bd132591b4668ed5c7046761098d665ac2174/cligj-0.5.0-py3-none-any.whl
Collecting affine
  Downloading https://files.pythonhosted.org/packages/ac/a6/1a39a1ede71210e3ddaf623982b06ecfc5c5c03741ae659073159184cd3e/affine-2.3.0-py2.py3-none-any.whl
Installing collected pa

In [None]:
# Pandas is a package containing additional functions to use data frames in Python
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.simplefilter('ignore')
import numpy as np
import wget
import zipfile
import glob
import os
import shutil
import rasterio
# These two lines allow the notebook to access the Google Drive.
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

# This is the path to the project folder within the Google Drive.
file_path = "/content/drive/My Drive/"

Mounted at /content/drive


---
## Notebook 6
# Downloading WORLDCLIM Data

This code downloads all the data from WORLDCLIM under six climate models and four SSPs, for four time periods plus the present.

Skip GFDL-ESM4 - doesn't have data for every SSP - and IPSL-CM6A-LR, MRI-ESM2	- some of the links are broken.

In [None]:
models = ['BCC-CSM2-MR',
         'CNRM-CM6-1',
         'CNRM-ESM2-1',
         'CanESM5',
         'MIROC-ES2L',
         'MIROC6']

In [None]:
ssps = ['ssp126', 'ssp245', 'ssp370', 'ssp585']

In [None]:
intervals = ['2021-2040', '2041-2060', '2061-2080', '2081-2100']

In [None]:
base = "http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc"

Download the files for every possible combination of model, ssp and interval.

In [None]:
for model in models:
  for ssp in ssps:
    for interval in intervals:
      url = (base + "_" + model + "_" + ssp + "_" + interval + ".zip")
      print (url)
      wget.download(url, out=file_path + "/climate_data/zipped/" + model + "_" + ssp + "_" + interval + ".zip")

http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc_BCC-CSM2-MR_ssp126_2021-2040.zip
http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc_BCC-CSM2-MR_ssp126_2041-2060.zip
http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc_BCC-CSM2-MR_ssp126_2061-2080.zip
http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc_BCC-CSM2-MR_ssp126_2081-2100.zip
http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc_BCC-CSM2-MR_ssp245_2021-2040.zip
http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc_BCC-CSM2-MR_ssp245_2041-2060.zip
http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc_BCC-CSM2-MR_ssp245_2061-2080.zip
http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc_BCC-CSM2-MR_ssp245_2081-2100.zip
http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc_BCC-CSM2-MR_ssp370_2021-2040.zip
http://biogeo.ucdavis.edu/data/worldclim/v2.1/fut/10m/wc2.1_10m_bioc_BCC-CSM2-MR_s

Extract all the zipped data

In [None]:
for model in models:
  for ssp in ssps:
    for interval in intervals:
      dir_path = file_path + "climate_data/unzipped/" + model + "_" + ssp + "_" + interval
      if not os.path.exists(dir_path):
          os.mkdir(dir_path)
      with zipfile.ZipFile(file_path + "/climate_data/zipped/" + model + "_" + ssp + "_" + interval + ".zip") as zip_ref:
        zip_ref.extractall(dir_path)

Tidy up the directory structure

In [None]:
newpath = file_path + "unzipped/" + model + "_" + ssp + "_" + interval + "/share/spatial03/worldclim/cmip6/7_fut/10m/" + model + "/" + ssp + "/" + model + "_" + ssp + "_" + interval + ".tif"

In [None]:
for model in models:
    if not os.path.exists(file_path + "climate_data/" + model):
      os.mkdir(file_path + "climate_data/" + model)
    for ssp in ssps:
      if not os.path.exists(file_path + "climate_data/" + model + "/" + ssp):
        os.mkdir(file_path + "climate_data/" + model + "/" + ssp)


In [None]:
for model in models:
  for ssp in ssps:
    for interval in intervals:
      full_path = file_path + "climate_data/unzipped/" + model + "_" + ssp + "_" + interval + "/share/spatial03/worldclim/cmip6/7_fut/10m/" + model + "/" + ssp + "/" + "wc2.1_10m_bioc_" + model + "_" + ssp + "_" + interval + ".tif"
      shutil.move(full_path, file_path + "climate_data/" + "/"  +model + "/" + ssp + "/" + interval + ".tiff")

Combine all the present day raster data into a single stack so it resembles the future data and can be processed in the same way.

In [None]:
file_list = [file_path + "climate_data/near_present/" + 'wc2.1_10m_bio_%i.tif' % i for i in range(1, 20)]

# Read metadata of first file
with rasterio.open(file_list[0]) as src0:
    meta = src0.meta

# Update meta to reflect the number of layers
meta.update(count = len(file_list))

# Read each layer and write it to stack
with rasterio.open(file_path + "climate_data/" + 'near_present.tif', 'w', **meta) as dst:
    for id, layer in enumerate(file_list, start=1):
        with rasterio.open(layer) as src1:
            dst.write_band(id, src1.read(1))

Find the maximum and minimum of each climate variable (to use for plotting later)

In [206]:
D_min = dict()
D_max = dict()
for model in models:
  for ssp in ssps:
    for interval in intervals:
      raster = rasterio.open(file_path + "climate_data/" + "/"  +model + "/" + ssp + "/" + interval + ".tiff")
      # convert the data into a matrix
      grid = raster.read()

      # round it to six decimal places
      grid = np.round(grid, 6)

      # replace "-inf" with "nan"
      grid[grid == float('-inf'), ] = float('nan')

      for i in range(0, 19):
        D_min.setdefault(i, [])
        D_max.setdefault(i, [])
        D_min[i].append(np.nanmin(grid[i][0:930, :]))
        D_max[i].append(np.nanmax(grid[i][0:930, :]))

Clean the data and write it to file

In [182]:
clim = pd.read_csv(file_path + "bioclim_variables.tsv", sep="\t", header=None, names=['ID', 'name'])

In [183]:
clim['variable_number'] = clim.index.values + 1

In [184]:
clim = clim.merge(results)

In [185]:
clim = clim[['variable_number', 'ID', 'name', 'minimum', 'maximum']]

In [186]:
clim.to_csv(file_path + "bioclim.tsv", sep="\t", index=None)