In [171]:
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

## 1. Data Acquisition

In [172]:
# Data was acquired from an organization called Landcare Research: Manaaki Whenua
# URL: https://datastore.landcareresearch.co.nz/dataset/ecological-requirements-of-nz-birds

## 2. Data Import & Data Wrangling

In [173]:
resource_use_file = ('~/Google Drive/Data Analytics Files/Personal Projects/NZ Birds/nzbirdresourceuse.csv')
resource_avail_file = ('~/Google Drive/Data Analytics Files/Personal Projects/NZ Birds/nzbirdresourceavail.csv')

use_data = pd.read_csv(resource_use_file)
avail_data = pd.read_csv(resource_avail_file)

In [174]:
# Get a feel for data with head() and info() methods.

use_data.head()

Unnamed: 0,species_code_atlas,rifman,myna,skylark,chukor,wrybil,teagre,mallar,shonz,ducgre,...,dovspo,dovbar,starli,shepar,kinfis,blabir,thrson,plospu,wreroc,sileye
0,Nest_floating,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Nest_burrow,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Nest_ground,0,0,1,1,1,1,1,1,1,...,0,0,0,1,0,0,0,1,1,0
3,Nest_shrub,0,1,0,0,0,0,1,0,1,...,1,1,1,0,0,1,1,0,0,1
4,Nest_tree,1,1,0,0,0,0,1,0,1,...,1,1,1,1,1,1,1,0,0,1


In [175]:
avail_data.head()

Unnamed: 0,LCDB3_class,Built-up_Area,Urban_Parkland/Open_Space,Surface_Mine_and_Dumps,Transport_Infrastructure,Sand_and_Gravel,Gravel_and_Rock,Landslide,Permanent_Snow_and_Ice,Alpine_Grass-/_Herbfield,...,Manuka_and/or_Kanuka,Matagouri_or_Grey_Scrub,Broadleaved_Indigenous_Hardwoods,Sub_Alpine_Shrubland,Mixed_Exotic_Shrubland,Exotic_Forest,Forest_-_Harvested,Deciduous_Hardwoods,Indigenous_Forest,Mangrove
0,Nest_floating,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Nest_burrow,0,0,0,0,1,1,1,0,2,...,2,2,3,2,2,3,1,2,3,0
2,Nest_ground,0,2,0,0,2,2,2,0,2,...,2,1,2,2,2,2,1,2,2,2
3,Nest_shrub,2,3,0,0,0,0,0,0,0,...,3,3,3,3,3,3,1,3,3,3
4,Nest_tree,2,3,0,0,0,0,0,0,0,...,1,1,3,0,0,3,0,3,3,3


In [176]:
use_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163 entries, 0 to 162
Data columns (total 95 columns):
species_code_atlas    163 non-null object
rifman                163 non-null int64
myna                  163 non-null int64
skylark               163 non-null int64
chukor                163 non-null int64
wrybil                163 non-null int64
teagre                163 non-null int64
mallar                163 non-null int64
shonz                 163 non-null int64
ducgre                163 non-null int64
belbir                163 non-null int64
pipit                 163 non-null int64
kiwbro                163 non-null int64
kiwgre                163 non-null int64
kiwlit                163 non-null int64
herwhi                163 non-null int64
owllit                163 non-null int64
scanz                 163 non-null int64
bitaus                163 non-null int64
ferbir                163 non-null int64
goocan                163 non-null int64
kokako                163 non-nul

In [177]:
avail_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 163 entries, 0 to 162
Data columns (total 34 columns):
LCDB3_class                                 163 non-null object
Built-up_Area                               163 non-null int64
Urban_Parkland/Open_Space                   163 non-null int64
Surface_Mine_and_Dumps                      163 non-null int64
Transport_Infrastructure                    163 non-null int64
Sand_and_Gravel                             163 non-null int64
Gravel_and_Rock                             163 non-null int64
Landslide                                   163 non-null int64
Permanent_Snow_and_Ice                      163 non-null int64
Alpine_Grass-/_Herbfield                    163 non-null int64
Lake_or_Pond                                163 non-null int64
River                                       163 non-null int64
Estuarine_Open_Water                        163 non-null int64
Short-rotation_Cropland                     163 non-null int64
Orchard_Vine

In [178]:
# Transpose the datasets.

avail_data = avail_data.set_index('LCDB3_class').T
use_data = use_data.set_index('species_code_atlas').T

In [179]:
# Have a quick look at one of the transposed datasets.

avail_data.head()

LCDB3_class,Nest_floating,Nest_burrow,Nest_ground,Nest_shrub,Nest_tree,Nest_cliff_or_crevice,Nest_man_made_structure,aquatic_marine fish,aquatic_cephalopod,aquatic_marine crustacean,...,aerial_buds,aerial_vegetables,aerial_fruits,aerial_fungi,aerial_bark,aerial_nectar,aerial_crayfish,aerial_aquatic inverts,aerial_fish,aerial_aquatic veg
Built-up_Area,0,0,0,2,2,0,3,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Urban_Parkland/Open_Space,0,0,2,3,3,0,2,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Surface_Mine_and_Dumps,0,0,0,0,0,0,2,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Transport_Infrastructure,0,0,0,0,0,0,2,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Sand_and_Gravel,0,1,2,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [180]:
# Display list of columns of dataset.

avail_data.columns.unique()

array(['Nest_floating', 'Nest_burrow', 'Nest_ground', 'Nest_shrub',
       'Nest_tree', 'Nest_cliff_or_crevice', 'Nest_man_made_structure',
       'aquatic_marine fish', 'aquatic_cephalopod',
       'aquatic_marine crustacean', 'aquatic_marine mollusca',
       'aquatic_carrion', 'aquatic_frogs', 'aquatic_eggs/mammals',
       'aquatic_soil inverts', 'aquatic_surface inverts',
       'aquatic_flying inverts', 'aquatic_roots', 'aquatic_seeds',
       'aquatic_pollen', 'aquatic_turf/monocot', 'aquatic_leaves',
       'aquatic_shoots', 'aquatic_buds', 'aquatic_vegetables',
       'aquatic_fruits', 'aquatic_fungi', 'aquatic_bark', 'aquatic_nectar',
       'aquatic_crayfish', 'aquatic_aquatic inverts', 'aquatic_fish',
       'aquatic_aquatic veg', 'ground_marine fish', 'ground_cephalopod',
       'ground_marine crustacean', 'ground_marine mollusca',
       'ground_carrion', 'ground_frogs', 'ground_eggs/mammals',
       'ground_soil inverts', 'ground_surface inverts',
       'ground_flying i

In [200]:
# Combine the two datasets using concat() function.

df = pd.concat([use_data, avail_data], keys = ['Birds', 'Land_Type'])

In [201]:
# Create list of unique natural resource headings (i.e., 'topos').

topos = sorted(list({x[:x.index("_")] for x in df.columns}))
print topos

['Nest', 'aerial', 'aquatic', 'canopy', 'ground', 'shrub', 'subcanopy']


In [202]:
# Create a dictionary containing each topo and its associated list of columns from overall DataFrame.

topo_dict = {}
for topo in topos:
    topo_dict[topo] = [x for x in df.columns if x.startswith(topo)]

In [203]:
# Create DataFrames for each of the topos.

dfs = {}
for topo in topos:
    dfs[topo] = pd.DataFrame(df.ix[:, topo_dict[topo]])

In [204]:
# Create new DataFrame containing all data from both datasets, organized with a MultiIndex.

df = pd.concat(dfs, axis=1, keys = topos)

In [205]:
# Have a look at the new DataFrame using head(), index, and columns methods.

df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Nest,Nest,Nest,Nest,Nest,Nest,Nest,aerial,aerial,aerial,...,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy
Unnamed: 0_level_1,species_code_atlas,Nest_floating,Nest_burrow,Nest_ground,Nest_shrub,Nest_tree,Nest_cliff_or_crevice,Nest_man_made_structure,aerial_marine fish,aerial_cephalopod,aerial_marine crustacean,...,subcanopy_buds,subcanopy_vegetables,subcanopy_fruits,subcanopy_fungi,subcanopy_bark,subcanopy_nectar,subcanopy_crayfish,subcanopy_aquatic inverts,subcanopy_fish,subcanopy_aquatic veg
Birds,rifman,0,0,0,0,1,0,0,0,0,0,...,0,0,3,0,0,0,0,0,0,0
Birds,myna,0,0,0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Birds,skylark,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Birds,chukor,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Birds,wrybil,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [208]:
# Have a look at index and columns of new DataFrame.

df.index

MultiIndex(levels=[[u'Birds', u'Land_Type'], [u'Alpine_Grass-/_Herbfield', u'Broadleaved_Indigenous_Hardwoods', u'Built-up_Area', u'Deciduous_Hardwoods', u'Depleted_Grassland', u'Estuarine_Open_Water', u'Exotic_Forest', u'Fernland', u'Flaxland', u'Forest_-_Harvested', u'Gorse_and/or_Broom', u'Gravel_and_Rock', u'Herbaceous_Freshwater_Vegetation', u'Herbaceous_Saline_Vegetation', u'High_Producing_Exotic_Grassland', u'Indigenous_Forest', u'Lake_or_Pond', u'Landslide', u'Low_Producing_Grassland', u'Mangrove', u'Manuka_and/or_Kanuka', u'Matagouri_or_Grey_Scrub', u'Mixed_Exotic_Shrubland', u'Orchard_Vineyard_&_Other_Perennial_Crops', u'Permanent_Snow_and_Ice', u'River', u'Sand_and_Gravel', u'Short-rotation_Cropland', u'Sub_Alpine_Shrubland', u'Surface_Mine_and_Dumps', u'Tall_Tussock_Grassland', u'Transport_Infrastructure', u'Urban_Parkland/Open_Space', u'belbir', u'bitaus', u'blabir', u'brocre', u'buncir', u'chafin', u'chukor', u'cooaus', u'cramar', u'craspo', u'cuclon', u'cucshi', u'dabchi

In [209]:
df.columns

MultiIndex(levels=[[u'Nest', u'aerial', u'aquatic', u'canopy', u'ground', u'shrub', u'subcanopy'], [u'Nest_burrow', u'Nest_cliff_or_crevice', u'Nest_floating', u'Nest_ground', u'Nest_man_made_structure', u'Nest_shrub', u'Nest_tree', u'aerial_aquatic inverts', u'aerial_aquatic veg', u'aerial_bark', u'aerial_buds', u'aerial_carrion', u'aerial_cephalopod', u'aerial_crayfish', u'aerial_eggs/mammals', u'aerial_fish', u'aerial_flying inverts', u'aerial_frogs', u'aerial_fruits', u'aerial_fungi', u'aerial_leaves', u'aerial_marine crustacean', u'aerial_marine fish', u'aerial_marine mollusca', u'aerial_nectar', u'aerial_pollen', u'aerial_roots', u'aerial_seeds', u'aerial_shoots', u'aerial_soil inverts', u'aerial_surface inverts', u'aerial_turf/monocot', u'aerial_vegetables', u'aquatic_aquatic inverts', u'aquatic_aquatic veg', u'aquatic_bark', u'aquatic_buds', u'aquatic_carrion', u'aquatic_cephalopod', u'aquatic_crayfish', u'aquatic_eggs/mammals', u'aquatic_fish', u'aquatic_flying inverts', u'aqu

## Here are the ways to index along a MultiIndex...

Excellent breakdown:
URL: http://stackoverflow.com/questions/13226029/benefits-of-pandas-multiindex

In [210]:
# Show a little bit of everything.

df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Nest,Nest,Nest,Nest,Nest,Nest,Nest,aerial,aerial,aerial,...,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy,subcanopy
Unnamed: 0_level_1,species_code_atlas,Nest_floating,Nest_burrow,Nest_ground,Nest_shrub,Nest_tree,Nest_cliff_or_crevice,Nest_man_made_structure,aerial_marine fish,aerial_cephalopod,aerial_marine crustacean,...,subcanopy_buds,subcanopy_vegetables,subcanopy_fruits,subcanopy_fungi,subcanopy_bark,subcanopy_nectar,subcanopy_crayfish,subcanopy_aquatic inverts,subcanopy_fish,subcanopy_aquatic veg
Birds,rifman,0,0,0,0,1,0,0,0,0,0,...,0,0,3,0,0,0,0,0,0,0
Birds,myna,0,0,0,1,1,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Birds,skylark,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Birds,chukor,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Birds,wrybil,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [211]:
# Show data within only one MultiIndex column category.
# Note: df.xs('Nest',axis=1) also works.

df['Nest']

Unnamed: 0,species_code_atlas,Nest_floating,Nest_burrow,Nest_ground,Nest_shrub,Nest_tree,Nest_cliff_or_crevice,Nest_man_made_structure
Birds,rifman,0,0,0,0,1,0,0
Birds,myna,0,0,0,1,1,1,1
Birds,skylark,0,0,1,0,0,0,0
Birds,chukor,0,0,1,0,0,0,0
Birds,wrybil,0,0,1,0,0,0,0
Birds,teagre,0,0,1,0,0,0,0
Birds,mallar,0,0,1,1,1,0,0
Birds,shonz,0,0,1,0,0,0,0
Birds,ducgre,0,0,1,1,1,0,0
Birds,belbir,0,0,0,1,1,0,0


In [213]:
# Show data for only 'Nest' column sub-category and only 'Birds' row category using loc method..
# Note: df.xs('Birds').xs('Nest', axis=1) also works.

df.loc['Birds']['Nest'].head()

species_code_atlas,Nest_floating,Nest_burrow,Nest_ground,Nest_shrub,Nest_tree,Nest_cliff_or_crevice,Nest_man_made_structure
rifman,0,0,0,0,1,0,0
myna,0,0,0,1,1,1,1
skylark,0,0,1,0,0,0,0
chukor,0,0,1,0,0,0,0
wrybil,0,0,1,0,0,0,0


In [212]:
# Show data within a specific sub-category of column MultiIndex category.

df.xs(('Nest', 'Nest_floating'),axis=1)

Birds      rifman                                      0
           myna                                        0
           skylark                                     0
           chukor                                      0
           wrybil                                      0
           teagre                                      0
           mallar                                      0
           shonz                                       0
           ducgre                                      0
           belbir                                      0
           pipit                                       0
           kiwbro                                      0
           kiwgre                                      0
           kiwlit                                      0
           herwhi                                      0
           owllit                                      0
           scanz                                       0
           bitaus              

In [214]:
# Show data for one sub-category under 'Nest' column category and "Birds" row category.

df.loc['Birds']['Nest', 'Nest_floating']

rifman     0
myna       0
skylark    0
chukor     0
wrybil     0
teagre     0
mallar     0
shonz      0
ducgre     0
belbir     0
pipit      0
kiwbro     0
kiwgre     0
kiwlit     0
herwhi     0
owllit     0
scanz      0
bitaus     0
ferbir     0
goocan     0
kokako     0
quacal     0
golfin     0
grefin     0
redpol     0
dotban     0
dotnz      0
terbla     0
cucshi     0
harrie     0
          ..
morpor     0
spahou     0
robnz      0
tomtit     0
shabl      0
shalit     0
shalbl     0
shapie     0
pheasa     0
sporoy     0
roseas     0
dabchi     1
pukeko     0
cramar     0
craspo     0
tui        0
spahed     0
fantai     0
terwhi     0
shaspo     0
dovspo     0
dovbar     0
starli     0
shepar     0
kinfis     0
blabir     0
thrson     0
plospu     0
wreroc     0
sileye     0
Name: (Nest, Nest_floating), dtype: int64

# Exploratory Data Analysis

In [None]:
# Show the most important resource across all bird species.

use_data.mean().sort_values(ascending=False).head()