In [1]:
import numpy as np 
import pandas as pd 
from pathlib import Path

# Problems

## Why protected areas above 100%

Some countries have some protected areas recorded as points with a reported area. This point data is more uncertain than protected areas reported as polygons because overlaps cannot be identified or resolved. For this reason the total for countries is provided with and without the inclusion of the total reported area of protected areas recorded as points. The reported area of protected areas recorded as points can (individually or cumulatively) exceed the reference area used to calculate these indicators so coverages of greater than 100% can sometimes be reported.

## Last update 

June 2020

## Historical data

Not all protected areas have a designation date recorded. When there is no designation date the protected area is deemed to have always existed, therefore historical data maybe be overestimated.


# Load Data

## Protected Areas

Total for category is the total area protected under that IUCN management designation. Note that because overlaps among protected areas are relatively common, the total protected area for a country is typically less than the sum of the disaggregated areas.

In [2]:
DATA_PATH = Path('./data')
PROTECTED_AREAS = DATA_PATH / 'OECD' / 'PROTECTED_AREAS.csv'

protected_areas = pd.read_csv(PROTECTED_AREAS)

In [3]:
protected_areas.shape

(2108, 23)

In [4]:
protected_areas.head()

Unnamed: 0,COU,Country,DESIG,Designation,DOMAIN,Domain,MEASURE,Measure,CALCULATION,Calculation method,...,Year,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,Value,Flag Codes,Flags
0,AUS,Australia,ALL_INC_POINTS,"All, including data recorded as points",TERRESTRIAL,Terrestrial,PCNT,Percent of total land/EEZ area,NAIVE,Total for category,...,1970,PC,Percentage,0,Units,,,1.08,,
1,AUS,Australia,ALL_INC_POINTS,"All, including data recorded as points",TERRESTRIAL,Terrestrial,PCNT,Percent of total land/EEZ area,NAIVE,Total for category,...,1980,PC,Percentage,0,Units,,,2.81,,
2,AUS,Australia,ALL_INC_POINTS,"All, including data recorded as points",TERRESTRIAL,Terrestrial,PCNT,Percent of total land/EEZ area,NAIVE,Total for category,...,1990,PC,Percentage,0,Units,,,5.06,,
3,AUS,Australia,ALL_INC_POINTS,"All, including data recorded as points",TERRESTRIAL,Terrestrial,PCNT,Percent of total land/EEZ area,NAIVE,Total for category,...,1995,PC,Percentage,0,Units,,,6.11,,
4,AUS,Australia,ALL_INC_POINTS,"All, including data recorded as points",TERRESTRIAL,Terrestrial,PCNT,Percent of total land/EEZ area,NAIVE,Total for category,...,2000,PC,Percentage,0,Units,,,7.06,,


In [5]:
# unique values per column
protected_areas.nunique()

COU                      124
Country                  124
DESIG                      1
Designation                1
DOMAIN                     1
Domain                     1
MEASURE                    1
Measure                    1
CALCULATION                1
Calculation method         1
SCOPE                      1
Scope                      1
YEA                       17
Year                      17
Unit Code                  1
Unit                       1
PowerCode Code             1
PowerCode                  1
Reference Period Code      0
Reference Period           0
Value                    883
Flag Codes                 0
Flags                      0
dtype: int64

In [6]:
# select subset of columns needed
protected_areas = protected_areas[['Country', 'Year', 'Value']]
# rename columns
protected_areas = protected_areas.rename(columns={
    'Country': 'country',
    'Year': 'year',
    'Value': 'protected_area_perc'
    })

In [7]:
# filter only last 5 years
protected_areas = protected_areas[protected_areas['year'] >= 2015]
protected_areas.shape

(744, 3)

In [8]:
# check for missing values
protected_areas.isna().sum()

country                0
year                   0
protected_area_perc    0
dtype: int64

In [9]:
protected_areas.describe()

Unnamed: 0,year,protected_area_perc
count,744.0,744.0
mean,2017.5,37.122688
std,1.708974,138.387658
min,2015.0,0.0
25%,2016.0,8.33
50%,2017.5,16.82
75%,2019.0,26.48
max,2020.0,1526.88


## Protected Area by management objective

In [10]:
PROTECTED_AREAS_OBJECTIVE = DATA_PATH / 'OECD' / 'PROTECTED_AREAS_OBJECTIVE.csv'

protected_area_obj = pd.read_csv(PROTECTED_AREAS_OBJECTIVE)

In [11]:
protected_area_obj.shape

(71910, 23)

In [12]:
protected_area_obj.head()

Unnamed: 0,COU,Country,DESIG,Designation,DOMAIN,Domain,MEASURE,Measure,CALCULATION,Calculation method,...,Year,Unit Code,Unit,PowerCode Code,PowerCode,Reference Period Code,Reference Period,Value,Flag Codes,Flags
0,AUS,Australia,IA,Ia: Strict Nature Reserve,TERRESTRIAL,Terrestrial,SQKM,Square kilometers,ERASED,Erased against precedent categories,...,1970,KM2,Square kilometres,0,Units,,,34770.33,,
1,AUS,Australia,IA,Ia: Strict Nature Reserve,TERRESTRIAL,Terrestrial,SQKM,Square kilometers,ERASED,Erased against precedent categories,...,1980,KM2,Square kilometres,0,Units,,,103556.25,,
2,AUS,Australia,IA,Ia: Strict Nature Reserve,TERRESTRIAL,Terrestrial,SQKM,Square kilometers,ERASED,Erased against precedent categories,...,1990,KM2,Square kilometres,0,Units,,,127115.19,,
3,AUS,Australia,IA,Ia: Strict Nature Reserve,TERRESTRIAL,Terrestrial,SQKM,Square kilometers,ERASED,Erased against precedent categories,...,1995,KM2,Square kilometres,0,Units,,,133699.95,,
4,AUS,Australia,IA,Ia: Strict Nature Reserve,TERRESTRIAL,Terrestrial,SQKM,Square kilometers,ERASED,Erased against precedent categories,...,2000,KM2,Square kilometres,0,Units,,,136012.95,,


In [13]:
# unique values per column
protected_area_obj.nunique()

COU                        127
Country                    127
DESIG                        9
Designation                  9
DOMAIN                       2
Domain                       2
MEASURE                      2
Measure                      2
CALCULATION                  1
Calculation method           1
SCOPE                        1
Scope                        1
YEA                         17
Year                        17
Unit Code                    2
Unit                         2
PowerCode Code               1
PowerCode                    1
Reference Period Code        0
Reference Period             0
Value                    10090
Flag Codes                   0
Flags                        0
dtype: int64

In [14]:
# filter only Terrestrial protected area
protected_area_obj = protected_area_obj[protected_area_obj['Domain'] == 'Terrestrial']
protected_area_obj.shape

(37944, 23)

In [15]:
# filter only percentages (squer km make no sense by different sizes of countrys)
protected_area_obj = protected_area_obj[protected_area_obj['Unit'] == 'Percentage']
protected_area_obj.shape

(18972, 23)

In [16]:
# filter only last 5 years
protected_area_obj = protected_area_obj[protected_area_obj['Year'] >= 2015]
protected_area_obj.shape

(6696, 23)

In [17]:
# select subset of columns needed
protected_area_obj = protected_area_obj[['Country', 'Year', 'Designation', 'Value']]
# rename columns
protected_area_obj = protected_area_obj.rename(columns={
    'Country': 'country',
    'Year': 'year',
    'Designation': 'designation',
    'Value': 'protected_area_perc'
    })

In [18]:
protected_area_obj.head()

Unnamed: 0,country,year,designation,protected_area_perc
28,Australia,2015,Ia: Strict Nature Reserve,1.96
29,Australia,2016,Ia: Strict Nature Reserve,1.97
30,Australia,2017,Ia: Strict Nature Reserve,2.0
31,Australia,2018,Ia: Strict Nature Reserve,2.01
32,Australia,2019,Ia: Strict Nature Reserve,2.01


In [19]:
protected_area_obj.describe()

Unnamed: 0,year,protected_area_perc
count,6696.0,6696.0
mean,2017.5,6.513519
std,1.707953,48.089838
min,2015.0,0.0
25%,2016.0,0.0
50%,2017.5,0.16
75%,2019.0,3.14
max,2020.0,1526.88


In [20]:
protected_area_obj['designation'].unique()

array(['Ia: Strict Nature Reserve', 'Ib: Wilderness Area',
       'II: National Park', 'III: Natural Monument or Feature',
       'IV: Habitat or Species Management Area',
       'V: Protected Landscape or Seascape',
       'VI: Protected area with sustainable use of natural resources',
       'No IUCN category provided',
       'All, including data recorded as points'], dtype=object)

In [21]:
# check for missing values
protected_area_obj.isna().sum()

country                0
year                   0
designation            0
protected_area_perc    0
dtype: int64