In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(style="darkgrid")
import warnings
warnings.simplefilter("ignore")

In [25]:
df = pd.read_csv('parks.csv')
df

Unnamed: 0,Park Code,Park Name,State,Acres,Latitude,Longitude
0,ACAD,Acadia National Park,ME,47390,44.35,-68.21
1,ARCH,Arches National Park,UT,76519,38.68,-109.57
2,BADL,Badlands National Park,SD,242756,43.75,-102.5
3,BIBE,Big Bend National Park,TX,801163,29.25,-103.25
4,BISC,Biscayne National Park,FL,172924,25.65,-80.08
5,BLCA,Black Canyon of the Gunnison National Park,CO,32950,38.57,-107.72
6,BRCA,Bryce Canyon National Park,UT,35835,37.57,-112.18
7,CANY,Canyonlands National Park,UT,337598,38.2,-109.93
8,CARE,Capitol Reef National Park,UT,241904,38.2,-111.17
9,CAVE,Carlsbad Caverns National Park,NM,46766,32.17,-104.44


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 6 columns):
Park Code    56 non-null object
Park Name    56 non-null object
State        56 non-null object
Acres        56 non-null int64
Latitude     56 non-null float64
Longitude    56 non-null float64
dtypes: float64(2), int64(1), object(3)
memory usage: 2.8+ KB


In [4]:
df.describe()

Unnamed: 0,Acres,Latitude,Longitude
count,56.0,56.0,56.0
mean,927929.1,41.233929,-113.234821
std,1709258.0,10.908831,22.440287
min,5550.0,19.38,-159.28
25%,69010.5,35.5275,-121.57
50%,238764.5,38.55,-110.985
75%,817360.2,46.88,-103.4
max,8323148.0,67.78,-68.21


In [4]:
df.isnull().sum()

Park Code    0
Park Name    0
State        0
Acres        0
Latitude     0
Longitude    0
dtype: int64

In [5]:
df['Park Name'].value_counts()

Lassen Volcanic National Park                     1
Guadalupe Mountains National Park                 1
Crater Lake National Park                         1
Mount Rainier National Park                       1
Kobuk Valley National Park                        1
Biscayne National Park                            1
Rocky Mountain National Park                      1
Carlsbad Caverns National Park                    1
Shenandoah National Park                          1
Big Bend National Park                            1
Olympic National Park                             1
Bryce Canyon National Park                        1
Canyonlands National Park                         1
Badlands National Park                            1
Black Canyon of the Gunnison National Park        1
Hawaii Volcanoes National Park                    1
Death Valley National Park                        1
North Cascades National Park                      1
Yosemite National Park                            1
Sequoia and 

In [27]:
df.groupby(['State','Park Name'])

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x1a2a02fc50>

##### Create US National Parks Maps Using Choropleth    
    . Using built-in State Geometries to graph US National Parks with area each ocupied. 

In [36]:
import plotly.graph_objects as go
for col in df.columns:
    df[col] = df[col].astype(str)
df['text'] = df['Park Name']
fig = go.Figure(data=go.Choropleth(
    locations=df['State'],
    z=df['Acres'].astype(float),
    locationmode='USA-states',
    colorscale='Reds',
    autocolorscale=False,
    text=df['text'],
    marker_line_color='white', # line markers between states
    colorbar_title="Acres"
))
fig.update_layout(
    title_text='US National Parks',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
)


fig.show()

In [30]:
df2 = pd.read_csv('species.csv')
df2.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Record Status,Occurrence,Nativeness,Abundance,Seasonality,Conservation Status,Unnamed: 13
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,Approved,Present,Native,Rare,Resident,,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",Approved,Present,Native,Abundant,,,
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Approved,Present,Not Native,Common,,Species of Concern,
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Approved,Not Confirmed,Native,,,Endangered,
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",Approved,Present,Unknown,Common,Breeder,,


In [31]:
df2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119248 entries, 0 to 119247
Data columns (total 14 columns):
Species ID             119248 non-null object
Park Name              119248 non-null object
Category               119248 non-null object
Order                  117776 non-null object
Family                 117736 non-null object
Scientific Name        119248 non-null object
Common Names           119248 non-null object
Record Status          119248 non-null object
Occurrence             99106 non-null object
Nativeness             94203 non-null object
Abundance              76306 non-null object
Seasonality            20157 non-null object
Conservation Status    4718 non-null object
Unnamed: 13            5 non-null object
dtypes: object(14)
memory usage: 12.7+ MB


In [32]:
df2.isnull().sum()

Species ID                  0
Park Name                   0
Category                    0
Order                    1472
Family                   1512
Scientific Name             0
Common Names                0
Record Status               0
Occurrence              20142
Nativeness              25045
Abundance               42942
Seasonality             99091
Conservation Status    114530
Unnamed: 13            119243
dtype: int64

In [33]:
df2 = df2.iloc[:,:-1]
df2.head()

Unnamed: 0,Species ID,Park Name,Category,Order,Family,Scientific Name,Common Names,Record Status,Occurrence,Nativeness,Abundance,Seasonality,Conservation Status
0,ACAD-1000,Acadia National Park,Mammal,Artiodactyla,Cervidae,Alces alces,Moose,Approved,Present,Native,Rare,Resident,
1,ACAD-1001,Acadia National Park,Mammal,Artiodactyla,Cervidae,Odocoileus virginianus,"Northern White-Tailed Deer, Virginia Deer, Whi...",Approved,Present,Native,Abundant,,
2,ACAD-1002,Acadia National Park,Mammal,Carnivora,Canidae,Canis latrans,"Coyote, Eastern Coyote",Approved,Present,Not Native,Common,,Species of Concern
3,ACAD-1003,Acadia National Park,Mammal,Carnivora,Canidae,Canis lupus,"Eastern Timber Wolf, Gray Wolf, Timber Wolf",Approved,Not Confirmed,Native,,,Endangered
4,ACAD-1004,Acadia National Park,Mammal,Carnivora,Canidae,Vulpes vulpes,"Black Fox, Cross Fox, Eastern Red Fox, Fox, Re...",Approved,Present,Unknown,Common,Breeder,


In [35]:
df_cons_s = pd.crosstab(df2['Category'], df2['Conservation Status'], margins=True)
df_cons_spe

Conservation Status,Breeder,Endangered,Extinct,In Recovery,Migratory,Proposed Endangered,Proposed Threatened,Resident,Species of Concern,Threatened,Under Review,All
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Amphibian,0,5,0,0,0,0,0,0,61,8,26,100
Bird,4,64,0,65,0,2,0,4,2371,51,70,2631
Crab/Lobster/Shrimp,0,1,0,0,0,0,0,0,0,0,1,2
Fish,1,33,1,0,0,0,0,0,66,40,10,151
Fungi,0,1,0,0,0,0,0,0,2,0,1,4
Insect,0,17,0,0,0,1,0,0,107,4,12,141
Invertebrate,0,9,0,0,0,0,0,0,1,7,4,21
Mammal,1,93,0,9,2,0,3,0,552,19,1,680
Nonvascular Plant,0,0,0,0,0,0,0,0,27,0,4,31
Reptile,1,17,0,1,0,0,2,1,98,17,16,153
