# 🐦 eBird Data Processing and Exploration

This notebook processes personal bird observation data exported from the [Merlin Bird ID](https://merlin.allaboutbirds.org/) app for data analysis with Tableau. The dataset includes location, time, species, and observational metadata from multiple birdwatching sessions. Some additional information is merged in the dataframe from the eBird taxonomy dataframe

In [2]:
#import libraries

import pandas as pd
import numpy as np
import json


In [None]:
#load and check data

df = pd.read_csv("data/MyEBirdData.csv") #the data is not public 
pd.set_option('display.max_columns', 500)
df.head(20)

Unnamed: 0,Submission ID,Common Name,Scientific Name,Taxonomic Order,Count,State/Province,County,Location ID,Location,Latitude,Longitude,Date,Time,Protocol,Duration (Min),All Obs Reported,Distance Traveled (km),Area Covered (ha),Number of Observers,Breeding Code,Observation Details,Checklist Comments,ML Catalog Numbers
0,S253493577,Canada Goose,Branta canadensis,330,X,US-MN,Hennepin,L48323731,Loring Lake,44.968562,-93.283645,2025-06-20,12:33 PM,eBird - Casual Observation,,0,,,1,,,,
1,S250906203,White-winged Dove,Zenaida asiatica,2350,X,US-TX,Travis,L47746401,Joe Sayer’s Tree,30.325708,-97.736916,2025-05-10,07:15 PM,eBird - Casual Observation,,0,,,1,,,,
2,S250916505,White-winged Dove,Zenaida asiatica,2350,X,US-TX,Travis,L47747193,Paddock Tree,30.325626,-97.737256,2024-03-07,03:22 PM,eBird - Casual Observation,,0,,,1,,,,
3,S250916389,White-winged Dove,Zenaida asiatica,2350,X,US-TX,Travis,L47747193,Paddock Tree,30.325626,-97.737256,2024-03-07,03:49 PM,eBird - Casual Observation,,0,,,1,,,,
4,S250916181,White-winged Dove,Zenaida asiatica,2350,X,US-TX,Travis,L47747193,Paddock Tree,30.325626,-97.737256,2024-03-14,05:00 PM,eBird - Casual Observation,,0,,,1,,,,
5,S250912169,White-winged Dove,Zenaida asiatica,2350,X,US-TX,Travis,L47747193,Paddock Tree,30.325626,-97.737256,2024-03-22,09:12 AM,eBird - Casual Observation,,0,,,1,,,,
6,S250912070,White-winged Dove,Zenaida asiatica,2350,X,US-TX,Travis,L47747193,Paddock Tree,30.325626,-97.737256,2024-03-22,09:21 AM,eBird - Casual Observation,,0,,,1,,,,
7,S250910817,White-winged Dove,Zenaida asiatica,2350,X,US-TX,Travis,L47747193,Paddock Tree,30.325626,-97.737256,2024-04-04,09:16 AM,eBird - Casual Observation,,0,,,1,,,,
8,S250903038,White-winged Dove,Zenaida asiatica,2350,X,US-TX,Travis,L47746775,Phone Line,30.332433,-97.734297,2025-06-15,07:43 PM,eBird - Casual Observation,,0,,,1,,,,
9,S250902915,White-winged Dove,Zenaida asiatica,2350,X,US-TX,Travis,L47746739,Phone Line,30.327368,-97.735602,2025-06-15,08:08 PM,eBird - Casual Observation,,0,,,1,,,,


In [24]:
df.describe(include='all')

Unnamed: 0,Submission ID,Common Name,Scientific Name,Taxonomic Order,Count,State/Province,County,Location ID,Location,Latitude,Longitude,Date,Time,Protocol,Duration (Min),All Obs Reported,Distance Traveled (km),Area Covered (ha),Number of Observers,Breeding Code,Observation Details,Checklist Comments,ML Catalog Numbers
count,196,196,196,196.0,196,196,196,196,196,196.0,196.0,196,196,196,0.0,196.0,0.0,0.0,196.0,0.0,0.0,0.0,0.0
unique,196,46,46,,1,4,5,20,19,,,43,74,1,,,,,,,,,
top,S253493577,Northern Cardinal,Cardinalis cardinalis,,X,US-OH,Hamilton,L47747769,Norwood,,,2024-03-16,01:43 PM,eBird - Casual Observation,,,,,,,,,
freq,1,26,26,,196,96,96,96,96,,,63,10,196,,,,,,,,,
mean,,,,24807.903061,,,,,,35.390214,-90.908057,,,,,0.0,,,1.0,,,,
std,,,,10213.611899,,,,,,4.776588,6.546675,,,,,0.0,,,0.0,,,,
min,,,,330.0,,,,,,29.943833,-98.024564,,,,,0.0,,,1.0,,,,
25%,,,,21066.0,,,,,,30.330086,-97.737256,,,,,0.0,,,1.0,,,,
50%,,,,27870.0,,,,,,39.154179,-88.862183,,,,,0.0,,,1.0,,,,
75%,,,,32110.0,,,,,,39.154179,-84.454607,,,,,0.0,,,1.0,,,,


In [4]:
'''
Remove columns:
Submission ID
Count
Location ID 
Protocol
Duration (Min)
All Obs Reported
Distance Traveled (km)
Area Covered (ha)
Number of Observers
Breeding Code
Observation Details
Checklist Comments
ML Catalog Numbers
''' 

df = df.drop(['Submission ID', 
            'Count',
            'Location ID',
            'Protocol',
            'Duration (Min)',
            'All Obs Reported',
            'Distance Traveled (km)',
            'Area Covered (ha)',
            'Number of Observers',
            'Breeding Code',
            'Observation Details',
            'Checklist Comments',
            'ML Catalog Numbers'], axis=1)

In [5]:
#Anonymize Location Data

# Add random noise (adjust scale as needed)
noise_level = 0.005 # adjust this value for the desired level of masking
df['Masked_Latitude'] = df['Latitude'] + np.random.uniform(-noise_level, noise_level, size=len(df))
df['Masked_Longitude'] = df['Longitude'] + np.random.uniform(-noise_level, noise_level, size=len(df))

df = df.drop(['Latitude', 
            'Longitude'], axis=1)

df.head(20)


Unnamed: 0,Common Name,Scientific Name,Taxonomic Order,State/Province,County,Location,Date,Time,Masked_Latitude,Masked_Longitude
0,Canada Goose,Branta canadensis,330,US-MN,Hennepin,Loring Lake,2025-06-20,12:33 PM,44.963917,-93.283301
1,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Joe Sayer’s Tree,2025-05-10,07:15 PM,30.327089,-97.733747
2,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Paddock Tree,2024-03-07,03:22 PM,30.320823,-97.735276
3,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Paddock Tree,2024-03-07,03:49 PM,30.32795,-97.740439
4,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Paddock Tree,2024-03-14,05:00 PM,30.321879,-97.73997
5,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Paddock Tree,2024-03-22,09:12 AM,30.327382,-97.734392
6,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Paddock Tree,2024-03-22,09:21 AM,30.329232,-97.732596
7,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Paddock Tree,2024-04-04,09:16 AM,30.326285,-97.740226
8,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Phone Line,2025-06-15,07:43 PM,30.332551,-97.733949
9,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Phone Line,2025-06-15,08:08 PM,30.32599,-97.739576


In [6]:
df['Location'].unique()

array(['Loring Lake', 'Joe Sayer’s Tree', 'Paddock Tree', 'Phone Line',
       'Zilker Park', 'Norwood', 'In flight', 'Parmer Parking Lot',
       'Lone Star Gun Range', 'Arroyo Seco Tree', 'Pace Bend Park',
       'Arroyo Seco', 'Sunshine Community Gardens', 'Arroyo seco Tree',
       'Willow', 'Loring Park', 'University Of Tennessee',
       'Nicollet Mall light rail Station', 'Burnet HEB Parking Lot'],
      dtype=object)

In [7]:
#rename locations (optional)
loc_dict = {'Joe Sayer’s Tree':'Neighborhood Walk',
            'Paddock Tree':'Backyard',
            'Phone Line':'Neighborhood Walk',
            'In flight':'Neighborhood Walk',
            'Parmer Parking Lot':'Work',
            'Arroyo Seco Tree':'Neighborhood Walk',
            'Arroyo Seco':'Neighborhood Walk',
            'Arroyo seco Tree':'Neighborhood Walk',
            'Willow':'Neighborhood Walk',
            'Nicollet Mall light rail Station':'Downtown Minneapolis'
}
df = df.replace({'Location': loc_dict})
df['Location'].unique()

array(['Loring Lake', 'Neighborhood Walk', 'Backyard', 'Zilker Park',
       'Norwood', 'Work', 'Lone Star Gun Range', 'Pace Bend Park',
       'Sunshine Community Gardens', 'Loring Park',
       'University Of Tennessee', 'Downtown Minneapolis',
       'Burnet HEB Parking Lot'], dtype=object)

In [None]:
#load eBird taxonomy data to include eBird species code into the main dataframe

eBird_taxonomy = pd.read_csv("data/eBird_taxonomy_v2024.csv")
eBird_taxonomy_species = eBird_taxonomy[eBird_taxonomy['CATEGORY'] == 'species'] #filter for bird species in the dataset
eBird_taxonomy_species.head(20)


Unnamed: 0,TAXON_ORDER,CATEGORY,SPECIES_CODE,TAXON_CONCEPT_ID,PRIMARY_COM_NAME,SCI_NAME,ORDER,FAMILY,SPECIES_GROUP,REPORT_AS
0,2,species,ostric2,,Common Ostrich,Struthio camelus,Struthioniformes,Struthionidae (Ostriches),Ostriches,
1,7,species,ostric3,,Somali Ostrich,Struthio molybdophanes,Struthioniformes,Struthionidae (Ostriches),Ostriches,
3,10,species,soucas1,,Southern Cassowary,Casuarius casuarius,Casuariiformes,Casuariidae (Cassowaries and Emu),Cassowaries and Emu,
4,11,species,dwacas1,,Dwarf Cassowary,Casuarius bennetti,Casuariiformes,Casuariidae (Cassowaries and Emu),Cassowaries and Emu,
5,12,species,norcas1,,Northern Cassowary,Casuarius unappendiculatus,Casuariiformes,Casuariidae (Cassowaries and Emu),Cassowaries and Emu,
6,13,species,emu1,,Emu,Dromaius novaehollandiae,Casuariiformes,Casuariidae (Cassowaries and Emu),Cassowaries and Emu,
7,19,species,sobkiw1,,Southern Brown Kiwi,Apteryx australis,Apterygiformes,Apterygidae (Kiwis),Kiwis,
10,22,species,okbkiw1,,Okarito Brown Kiwi,Apteryx rowi,Apterygiformes,Apterygidae (Kiwis),Kiwis,
11,23,species,nibkiw1,,North Island Brown Kiwi,Apteryx mantelli,Apterygiformes,Apterygidae (Kiwis),Kiwis,
12,24,species,liskiw1,,Little Spotted Kiwi,Apteryx owenii,Apterygiformes,Apterygidae (Kiwis),Kiwis,


In [9]:
#remove unnecessary columns

eBird_taxonomy_species = eBird_taxonomy_species.drop([
    'TAXON_ORDER',
    'CATEGORY',
    'TAXON_CONCEPT_ID',
    'PRIMARY_COM_NAME',
    'REPORT_AS'], axis=1)

eBird_taxonomy_species.head(20)

Unnamed: 0,SPECIES_CODE,SCI_NAME,ORDER,FAMILY,SPECIES_GROUP
0,ostric2,Struthio camelus,Struthioniformes,Struthionidae (Ostriches),Ostriches
1,ostric3,Struthio molybdophanes,Struthioniformes,Struthionidae (Ostriches),Ostriches
3,soucas1,Casuarius casuarius,Casuariiformes,Casuariidae (Cassowaries and Emu),Cassowaries and Emu
4,dwacas1,Casuarius bennetti,Casuariiformes,Casuariidae (Cassowaries and Emu),Cassowaries and Emu
5,norcas1,Casuarius unappendiculatus,Casuariiformes,Casuariidae (Cassowaries and Emu),Cassowaries and Emu
6,emu1,Dromaius novaehollandiae,Casuariiformes,Casuariidae (Cassowaries and Emu),Cassowaries and Emu
7,sobkiw1,Apteryx australis,Apterygiformes,Apterygidae (Kiwis),Kiwis
10,okbkiw1,Apteryx rowi,Apterygiformes,Apterygidae (Kiwis),Kiwis
11,nibkiw1,Apteryx mantelli,Apterygiformes,Apterygidae (Kiwis),Kiwis
12,liskiw1,Apteryx owenii,Apterygiformes,Apterygidae (Kiwis),Kiwis


In [10]:
#merge my eBird data and the eBird taxnomy data left on the Scientific Name 

merged_df = pd.merge(df, eBird_taxonomy_species, how='left', left_on='Scientific Name', right_on='SCI_NAME')
merged_df

Unnamed: 0,Common Name,Scientific Name,Taxonomic Order,State/Province,County,Location,Date,Time,Masked_Latitude,Masked_Longitude,SPECIES_CODE,SCI_NAME,ORDER,FAMILY,SPECIES_GROUP
0,Canada Goose,Branta canadensis,330,US-MN,Hennepin,Loring Lake,2025-06-20,12:33 PM,44.963917,-93.283301,cangoo,Branta canadensis,Anseriformes,"Anatidae (Ducks, Geese, and Waterfowl)",Waterfowl
1,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Neighborhood Walk,2025-05-10,07:15 PM,30.327089,-97.733747,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves
2,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Backyard,2024-03-07,03:22 PM,30.320823,-97.735276,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves
3,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Backyard,2024-03-07,03:49 PM,30.327950,-97.740439,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves
4,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Backyard,2024-03-14,05:00 PM,30.321879,-97.739970,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
191,Northern Cardinal,Cardinalis cardinalis,34224,US-TX,Travis,Backyard,2024-04-04,09:16 AM,30.328026,-97.739410,norcar,Cardinalis cardinalis,Passeriformes,Cardinalidae (Cardinals and Allies),"Cardinals, Grosbeaks, and Allies"
192,Northern Cardinal,Cardinalis cardinalis,34224,US-TX,Travis,Backyard,2025-05-11,11:42 AM,30.324691,-97.734410,norcar,Cardinalis cardinalis,Passeriformes,Cardinalidae (Cardinals and Allies),"Cardinals, Grosbeaks, and Allies"
193,Northern Cardinal,Cardinalis cardinalis,34224,US-TX,Travis,Work,2025-05-07,08:44 AM,30.425695,-97.746354,norcar,Cardinalis cardinalis,Passeriformes,Cardinalidae (Cardinals and Allies),"Cardinals, Grosbeaks, and Allies"
194,Northern Cardinal,Cardinalis cardinalis,34224,US-TX,Travis,Sunshine Community Gardens,2024-03-15,02:41 PM,30.314589,-97.739736,norcar,Cardinalis cardinalis,Passeriformes,Cardinalidae (Cardinals and Allies),"Cardinals, Grosbeaks, and Allies"


In [11]:
#map the common name for the scientific order names in the df 

order_dict = { "Struthioniformes": "Ostriches", 
              "Rheiformes": "Rheas", 
              "Apterygiformes": "Kiwis",  
              "Casuariiformes": "Emu, cassowaries",
              "Tinamiformes": "Tinamous",
              "Galliformes": "Landfowls",
              "Anseriformes": "Waterfowl",
              "Caprimulgiformes": "Nightjars, frogmouths, potoos, oilbirds",
              "Apodiformes": "Owlet-nightjars, treeswifts, swifts, hummingbirds", 
              "Musophagiformes": "Turacos",
              "Otidiformes": "Bustards",
              "Cuculiformes": "Cuckoos" ,
              "Mesitornithiformes": "Mesites",
              "Pterocliformes": "Sandgrouse",
              "Columbiformes": "Pigeons",
              "Gruiformes": "Diverse terrestrial and marshbirds",
              "Podicipediformes": "Grebes",
              "Phoenicopteriformes": "Flamingo",
              "Charadriiformes": "Shorebirds and relatives",
              "Eurypygiformes": "Sunbittern, Kagu",
              "Phaethontiformes": "Tropicbirds",
              "Gaviiformes": "Loons",
              "Sphenisciformes": "Penguins",
              "Procellariiformes": "Tube-nosed seabirds",
              "Ciconiiformes": "Storks",
              "Suliformes": "Totipalmate water and diving birds",
              "Pelecaniformes": "Ibis, herons, pelicans,  Hammerkop, Shoebill",
              "Opisthocomiformes": "Hoatzin",
              "Accipitriformes": "Raptors including New World Vultures",
              "Strigiformes": "Owls",
              "Coliiformes": "Mousebirds",
              "Leptosomiformes": "Cuckoo Roller",
              "Trogoniformes": "Trogons, quetzals",
              "Bucerotiformes": "Hornbills, hoopoes, wood hoopoes",
              "Coraciiformes": "Kingfishers and allies",
              "Piciformes": "Woodpeckers and allies",
              "Cariamiformes": "Seriemas",
              "Falconiformes": "Falcons", 
              "Psittaciformes": "Parrots", 
              "Passeriformes": "Perching birds"    
}

merged_df['ORDER_GROUP'] = merged_df['ORDER'].map(order_dict)

In [13]:
#sanity check

merged_df.head(20)

Unnamed: 0,Common Name,Scientific Name,Taxonomic Order,State/Province,County,Location,Date,Time,Masked_Latitude,Masked_Longitude,SPECIES_CODE,SCI_NAME,ORDER,FAMILY,SPECIES_GROUP,ORDER_GROUP
0,Canada Goose,Branta canadensis,330,US-MN,Hennepin,Loring Lake,2025-06-20,12:33 PM,44.963917,-93.283301,cangoo,Branta canadensis,Anseriformes,"Anatidae (Ducks, Geese, and Waterfowl)",Waterfowl,Waterfowl
1,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Neighborhood Walk,2025-05-10,07:15 PM,30.327089,-97.733747,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves,Pigeons
2,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Backyard,2024-03-07,03:22 PM,30.320823,-97.735276,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves,Pigeons
3,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Backyard,2024-03-07,03:49 PM,30.32795,-97.740439,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves,Pigeons
4,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Backyard,2024-03-14,05:00 PM,30.321879,-97.73997,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves,Pigeons
5,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Backyard,2024-03-22,09:12 AM,30.327382,-97.734392,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves,Pigeons
6,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Backyard,2024-03-22,09:21 AM,30.329232,-97.732596,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves,Pigeons
7,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Backyard,2024-04-04,09:16 AM,30.326285,-97.740226,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves,Pigeons
8,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Neighborhood Walk,2025-06-15,07:43 PM,30.332551,-97.733949,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves,Pigeons
9,White-winged Dove,Zenaida asiatica,2350,US-TX,Travis,Neighborhood Walk,2025-06-15,08:08 PM,30.32599,-97.739576,whwdov,Zenaida asiatica,Columbiformes,Columbidae (Pigeons and Doves),Pigeons and Doves,Pigeons


In [None]:
#save the transformed data

merged_df.to_csv('data/bird_data_revised.csv', index=False)