# Installs

In [22]:
!pip install xlrd

Collecting xlrd
  Downloading xlrd-2.0.1-py2.py3-none-any.whl (96 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m96.5/96.5 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: xlrd
Successfully installed xlrd-2.0.1


In [106]:
!pip install us

Collecting us
  Downloading us-3.1.1.tar.gz (14 kB)
  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting jellyfish==0.11.2
  Downloading jellyfish-0.11.2-cp39-cp39-macosx_10_7_x86_64.whl (328 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m329.0/329.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hBuilding wheels for collected packages: us
  Building wheel for us (setup.py) ... [?25ldone
[?25h  Created wheel for us: filename=us-3.1.1-py3-none-any.whl size=12550 sha256=c78672257e7a09dbbe1235f219294dd2b549f0e0969a1bfd5bc50720322c4230
  Stored in directory: /Users/joehack/Library/Caches/pip/wheels/29/9e/92/3672525fc19ac574d668402d739c8e8ff4650012304d2f3f06
Successfully built us
Installing collected packages: jellyfish, us
  Attempting uninstall: jellyfish
    Found existing installation: jellyfish 0.9.0
    Uninstalling jellyfish-0.9.0:
      Successfully uninstalled jellyfish-0.9.0
[31mERROR: pip's dependency resolver does not curre

In [55]:
!pip install pycountry

Collecting pycountry
  Downloading pycountry-23.12.11-py3-none-any.whl (6.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.2/6.2 MB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: pycountry
Successfully installed pycountry-23.12.11


# Imports

In [1]:
import pandas as pd
import numpy as np
import us
import pycountry

# Functions

In [25]:
# Environmental Variables:
checklist_sample = pd.read_excel("ebird_checklist_format_sample.xls")

# get row names:
row_names = checklist_sample['Unnamed: 0'][:13]

# instantiate default values for row metadata as dict:
default_metadata = {'Latitude': 'NaN',
                    'Longitude': 'NaN',
                    'Date': '05/09/1995',
                    'Start Time': '00:00:00',
                    'State': 'NaN',
                    'Country': None,
                    'Protocol': 'Historical',
                    'Num Observers': 1,
                    'Duration (min)': 'NaN',
                    'All Obs Reported (Y/N)': 'Y',
                    'Dist Traveled (Miles)': 'NaN',
                    'Area Covered (Acres)': 'NaN',
                    'Notes': 'Pre-eBird Life List.'}


# main function:
def make_ebird_xlist(df, species_col_name='CommonName', spatial_scale_col_name='Country'):

    # Isolate species sitings for each country and store in dict:
    sitings_by_region = split_observations_by_region(df, species_col_name, spatial_scale_col_name)
        
    # Make a list of checklists (pd.series) one for each country:
    xlists_by_region = make_regional_xlists(df, sitings_by_region, spatial_scale_col_name, species_col_name)

    # concatenate checklists together:
    ebird_checklist = pd.concat(xlists_by_region, axis=1)

    # replace NaN's in bird observations with '':
    ebird_checklist[''] = ''
    ebird_checklist.iloc[13:,:] = ebird_checklist.iloc[13:,:].replace(np.NAN, '')
    
    return ebird_checklist


# Function to create an ebird checklist index from a life list:
def create_xlist(sitings, count=None):
    
    unique_species = {k: count for k in set(sitings)}
    xlist = pd.concat((pd.Series(default_metadata), pd.Series(unique_species)))
    
    return xlist
    

# Function to split list into a dictionary of regions and species seen:    
def split_observations_by_region(df, species_col_name, spatial_scale_col_name):
    
    observations_by_region = dict()
    for region in set(df[spatial_scale_col_name]):
        subset = df[df[spatial_scale_col_name] == region]
        observations_by_region[region] = subset[species_col_name].values
        
    return observations_by_region


# Function to transform sitings into formatted ebird checklists
def make_regional_xlists(df, sitings_by_region, spatial_scale_col_name, species_col_name):
    
    xlists_by_region = list()
    
    # Make an empty checklist for the required empty column 2 in the ebird checklist format
    empty_xlist = create_xlist(df[species_col_name].values, count='')
    empty_xlist.name = ''
    xlists_by_region.append(empty_xlist)
    
    for region, sitings in sitings_by_region.items():
        xlist = create_xlist(sitings, count='x')
        
        if 'country' in spatial_scale_col_name.lower():
            region_code = get_country_code(region)
            xlist['Country'] = region_code
        elif 'state' in spatial_scale_col_name.lower():
            region_code = get_state_code(region)
            xlist['Country'] = 'US'
            xlist['State'] = region_code
        
        xlist.name = region 
        xlists_by_region.append(xlist)
        
    return xlists_by_region


# returns ISO 3166-2 code
def get_state_code(state_name):
    state = us.states.lookup(state_name)
    if state:
        return state.abbr
    else:
        return None


# returns ISO 3166-2 code
def get_country_code(country_name):
    try:
        country = pycountry.countries.lookup(country_name)
        return country.alpha_2
    except LookupError:
        return None  # Return None if the country was not found

# Import a BB Lifelist and Calculate Basic Statistics

In [3]:
# Read in dad's lifelist and slap some damn labels on it:
dads_ll = pd.read_csv("Exported BB7 Sightings.csv", encoding='latin1', header=None, 
                      names=['SpeciesCode', 'CommonName', 'SciName', 'Count', 'Date', 'Country', 'State', 'County', 'City',
                            'Site', 'Notes1', 'Notes2', 'Notes3', 'Notes4', 'Notes5', 'Notes6'])

In [4]:
# unique species in list:
len(set(dads_ll['SpeciesCode']))

2541

In [5]:
# unique countries
print(set(dads_ll['Country']))
len(set(dads_ll['Country']))

{'Hungary', 'USA', 'Switzerland', 'Canada', 'Czech Republic', 'Puerto Rico', 'Guatemala', 'Virgin Islands (U.S.)', 'UK', 'France', 'Austria', 'Colombia', 'Kenya', 'Italy', 'Slovakia', 'Tanzania', 'Costa Rica', 'Israel', 'Peru', 'Mexico', 'Latvia', 'Bahamas', 'Ecuador', 'Venezuela'}


24

# Make a Typical Life List into an Ebird Checklist

In [63]:
ebird_life_list = make_ebird_xlist(dads_ll)
ebird_life_list.loc['State', 'Virgin Islands (U.S.)'] = 'VI'
ebird_life_list.loc['Country', 'Virgin Islands (U.S.)'] = 'US'
ebird_life_list.loc['Country', 'UK'] = 'UK'

ebird_life_list.to_csv("test_world_life_list.csv")

In [64]:
us_life_list = pd.read_csv('life list - ABA (4).csv')
us_life_list = us_life_list.replace(['California ', 'New Jersey ', 'Nebraska '], ['California', 'New Jersey', 'Nebraska'])
us_ebird_life_list = make_ebird_xlist(us_life_list, species_col_name='Common Name', spatial_scale_col_name='State') 

us_ebird_life_list.to_csv("test_us_life_list.csv")