# Biodiversity in National Parks

In [1]:
# Import required libraries
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt

This `species_info.csv` is a dataset that show the list of species that located on the National Parks. This dataset contain columns :
1. category : The biological taxonomy of the species
2. scientific_name : The Scientific Name of the species
3. common_names : The Common Name of the species
4. conservation_status : The status of Conservation of the species

In [2]:
species = pd.read_csv('./species_info.csv')
species.head()

Unnamed: 0,category,scientific_name,common_names,conservation_status
0,Mammal,Clethrionomys gapperi gapperi,Gapper's Red-Backed Vole,
1,Mammal,Bos bison,"American Bison, Bison",
2,Mammal,Bos taurus,"Aurochs, Aurochs, Domestic Cattle (Feral), Dom...",
3,Mammal,Ovis aries,"Domestic Sheep, Mouflon, Red Sheep, Sheep (Feral)",
4,Mammal,Cervus elaphus,Wapiti Or Elk,


In [None]:
species[species['conservation_status'].notna()].head()

Unnamed: 0,category,scientific_name,common_names,conservation_status
7,Mammal,Canis latrans,Coyote,Species of Concern
8,Mammal,Canis lupus,Gray Wolf,Endangered
9,Mammal,Canis rufus,Red Wolf,Endangered
29,Mammal,Eptesicus fuscus,Big Brown Bat,Species of Concern
30,Mammal,Lasionycteris noctivagans,Silver-Haired Bat,Species of Concern


In [None]:
species.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5824 entries, 0 to 5823
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   category             5824 non-null   object
 1   scientific_name      5824 non-null   object
 2   common_names         5824 non-null   object
 3   conservation_status  191 non-null    object
dtypes: object(4)
memory usage: 182.1+ KB


In [18]:
species.describe(include='all')

Unnamed: 0,category,scientific_name,common_names,conservation_status
count,5824,5824,5824,191
unique,7,5541,5504,4
top,Vascular Plant,Procyon lotor,Brachythecium Moss,Species of Concern
freq,4470,3,7,161


This `observations.csv` contains information from recorded sightings of different species throughout the national parks in the past 7 days. The columns included are:

- **scientific_name** - The scientific name of each species
- **park_name** - The name of the national park
- **observations** - The number of observations in the past 7 days

In [13]:
observation = pd.read_csv('./observations.csv')
observation.head()

Unnamed: 0,scientific_name,park_name,observations
0,Vicia benghalensis,Great Smoky Mountains National Park,68
1,Neovison vison,Great Smoky Mountains National Park,77
2,Prunus subcordata,Yosemite National Park,138
3,Abutilon theophrasti,Bryce National Park,84
4,Githopsis specularioides,Great Smoky Mountains National Park,85


In [14]:
observation.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23296 entries, 0 to 23295
Data columns (total 3 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   scientific_name  23296 non-null  object
 1   park_name        23296 non-null  object
 2   observations     23296 non-null  int64 
dtypes: int64(1), object(2)
memory usage: 546.1+ KB


In [17]:
observation.describe(include='all')

Unnamed: 0,scientific_name,park_name,observations
count,23296,23296,23296.0
unique,5541,4,
top,Puma concolor,Great Smoky Mountains National Park,
freq,12,5824,
mean,,,142.287904
std,,,69.890532
min,,,9.0
25%,,,86.0
50%,,,124.0
75%,,,195.0


## The Shape of each dataset (Species and Observation)

In [21]:
print("Species Info Shape :",  species.shape)
print("Observation Shape :",  observation.shape)

Species Info Shape : (5824, 4)
Observation Shape : (23296, 3)


In [27]:
print("Total distinct species :", species['scientific_name'].nunique())

Total distinct species : 5541


In [23]:
print("Number of unique Categories : ", species['category'].nunique())
print("List unique Categories : ", species['category'].unique())

Number of unique Categories :  7
List unique Categories :  ['Mammal' 'Bird' 'Reptile' 'Amphibian' 'Fish' 'Vascular Plant'
 'Nonvascular Plant']


In [34]:
species['category'].value_counts()

category
Vascular Plant       4470
Bird                  521
Nonvascular Plant     333
Mammal                214
Fish                  127
Amphibian              80
Reptile                79
Name: count, dtype: int64

In [24]:
species.head()

Unnamed: 0,category,scientific_name,common_names,conservation_status
0,Mammal,Clethrionomys gapperi gapperi,Gapper's Red-Backed Vole,
1,Mammal,Bos bison,"American Bison, Bison",
2,Mammal,Bos taurus,"Aurochs, Aurochs, Domestic Cattle (Feral), Dom...",
3,Mammal,Ovis aries,"Domestic Sheep, Mouflon, Red Sheep, Sheep (Feral)",
4,Mammal,Cervus elaphus,Wapiti Or Elk,


In [25]:
# Conservation Status
print("Number of unique Conservation Status : ", species['conservation_status'].nunique())
print("List unique Conservation Status : ", species['conservation_status'].unique())

Number of unique Conservation Status :  4
List unique Conservation Status :  [nan 'Species of Concern' 'Endangered' 'Threatened' 'In Recovery']


In [45]:
print("Total NaN value for Conservation Status :", species['conservation_status'].isna().count())
species['conservation_status'].value_counts()

Total NaN value for Conservation Status : 5824


conservation_status
Species of Concern    161
Endangered             16
Threatened             10
In Recovery             4
Name: count, dtype: int64

## Observation

In [46]:
observation.head()

Unnamed: 0,scientific_name,park_name,observations
0,Vicia benghalensis,Great Smoky Mountains National Park,68
1,Neovison vison,Great Smoky Mountains National Park,77
2,Prunus subcordata,Yosemite National Park,138
3,Abutilon theophrasti,Bryce National Park,84
4,Githopsis specularioides,Great Smoky Mountains National Park,85


In [49]:
print("Total Unique Species :", observation['scientific_name'].nunique())

Total Unique Species : 5541


In [51]:
print("Total Park Location of the observation :", observation['park_name'].nunique())
print("List of park name :", observation['park_name'].unique())

Total Park Location of the observation : 4
List of park name : ['Great Smoky Mountains National Park' 'Yosemite National Park'
 'Bryce National Park' 'Yellowstone National Park']


In [57]:
print(f"number of observations: {observation['observations'].sum()}")

number of observations: 3314739


Analysis

In [60]:
species['conservation_status'].fillna('No Intervention', inplace=True)
species['conservation_status'].value_counts()

conservation_status
No Intervention       5633
Species of Concern     161
Endangered              16
Threatened              10
In Recovery              4
Name: count, dtype: int64