In [98]:
import pandas as pd
pd.set_option('display.max_columns', None)

In [112]:
GRID_SIZE = 0.0015

df = pd.read_csv("eBird Data 2019-2024/ebd_IN-MH-MS_201901_202401_unv_smp_relSep-2024.txt", delimiter='\t',
                 low_memory=False,
                 usecols=['COMMON NAME',
                   'OBSERVATION COUNT',
                   'LATITUDE', 'LONGITUDE', 'OBSERVATION DATE',
                   'CATEGORY']
                )

df['OBSERVATION DATE'] = pd.to_datetime(df['OBSERVATION DATE'])
df['month'] = df['OBSERVATION DATE'].dt.month
df = df[df['CATEGORY'] == 'species']
df['OBSERVATION COUNT'] = pd.to_numeric(df['OBSERVATION COUNT'], errors='coerce')
df['OBSERVATION COUNT'].clip(upper=500, inplace=True)
df = df.rename(columns={'LATITUDE': 'lat', 'LONGITUDE': 'lng'})
unique_species = list(df['COMMON NAME'].unique())
# unique_species = list(df['COMMON NAME'].value_counts().keys())
print(unique_species[:5])

['Alexandrine Parakeet', 'Alpine Swift', 'Asian Brown Flycatcher', 'Ashy Drongo', 'Ashy Prinia']


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['OBSERVATION COUNT'].clip(upper=500, inplace=True)


In [113]:
df.head()

Unnamed: 0,CATEGORY,COMMON NAME,OBSERVATION COUNT,lat,lng,OBSERVATION DATE,month
11,species,Alexandrine Parakeet,5.0,19.152,72.859,2019-01-02,1
12,species,Alexandrine Parakeet,8.0,19.173939,72.83274,2019-01-12,1
13,species,Alexandrine Parakeet,2.0,19.216553,72.814391,2019-01-13,1
14,species,Alexandrine Parakeet,,19.133559,72.913911,2019-01-22,1
15,species,Alexandrine Parakeet,,19.218255,72.814448,2019-01-20,1


In [114]:
df.shape

(430814, 7)

In [115]:
df['OBSERVATION DATE'].max()

Timestamp('2024-01-31 00:00:00')

In [116]:
df['OBSERVATION DATE'].min()

Timestamp('2019-01-01 00:00:00')

In [117]:
df.to_csv("birdMapper/backend/data.csv", index=False)

In [96]:
df = pd.read_csv("eBird Data 2019-2024/ebd_IN-MH-MS_201901_202401_unv_smp_relSep-2024.txt", delimiter='\t',
                 low_memory=False,
                 usecols=['CATEGORY', 'COMMON NAME',
                   'OBSERVATION COUNT',
                   'LATITUDE', 'LONGITUDE']
                 ,
                 nrows=100000
                )

In [97]:
df['CATEGORY'].value_counts()

CATEGORY
species     93908
domestic     3780
issf         1208
spuh         1027
slash          58
form           19
Name: count, dtype: int64

In [95]:
df.head(2)

Unnamed: 0,CATEGORY,COMMON NAME,OBSERVATION COUNT,LATITUDE,LONGITUDE
0,spuh,Accipiter sp.,2,19.143764,72.818327
1,spuh,Accipiter sp.,1,19.217495,72.816238


In [None]:
df = df[df['CT']]

In [56]:
df = pd.read_csv("eBird Data 2019-2024/ebd_IN-MH-MS_201901_202401_unv_smp_relSep-2024.txt", delimiter='\t',
                 low_memory=False, 
                 usecols=['CATEGORY', 'COMMON NAME', 'SCIENTIFIC NAME',
                   'OBSERVATION COUNT', 'COUNTY', 'LOCALITY', 
                   'LATITUDE', 'LONGITUDE', 'OBSERVATION DATE',
                   'SAMPLING EVENT IDENTIFIER',
                   'NUMBER OBSERVERS',
                   'ALL SPECIES REPORTED',
                   'REVIEWED']
                )

In [57]:
df.sample(10)

Unnamed: 0,CATEGORY,COMMON NAME,SCIENTIFIC NAME,OBSERVATION COUNT,COUNTY,LOCALITY,LATITUDE,LONGITUDE,OBSERVATION DATE,SAMPLING EVENT IDENTIFIER,NUMBER OBSERVERS,ALL SPECIES REPORTED,REVIEWED
272672,species,White-throated Kingfisher,Halcyon smyrnensis,1,Mumbai Suburban,"Malad - Marve Road, Mumbai, Maharashtra, IN (1...",19.19614,72.821323,2022-02-02,S101876848,1.0,1,0
119886,species,Asian Palm Swift,Cypsiurus balasiensis,3,Mumbai Suburban,Home,19.140829,72.863328,2020-12-17,S77578719,1.0,1,0
278836,species,Eurasian Spoonbill,Platalea leucorodia,6,Mumbai Suburban,Bhandup Pumping Station (BPS),19.141094,72.960988,2022-03-13,S104792301,5.0,1,0
424066,species,Coppersmith Barbet,Psilopogon haemacephalus,7,Mumbai Suburban,"IIT Bombay, Powai, Mumbai",19.133497,72.913276,2023-10-15,S152295865,5.0,1,0
159807,species,Black-winged Stilt,Himantopus himantopus,2,Mumbai Suburban,Sanjay Gandhi NP--Safari Trail,19.232165,72.877065,2021-03-14,S83358231,1.0,1,0
288701,species,Black Kite,Milvus migrans,50,Mumbai Suburban,IIT Bombay,19.127856,72.913599,2022-04-09,S106607414,2.0,1,0
245312,domestic,Rock Pigeon,Columba livia,4,Mumbai Suburban,My Terrace,19.227067,72.821473,2021-12-02,S98352331,1.0,1,0
147423,species,Common Myna,Acridotheres tristis,3,Mumbai Suburban,Lokhandwala Lake--Mangroves,19.143764,72.818327,2021-02-14,S81352682,1.0,0,0
207266,species,House Crow,Corvus splendens,5,Mumbai Suburban,Home,19.140829,72.863328,2021-09-11,S94492658,1.0,1,0
158373,species,Ashy Drongo,Dicrurus leucophaeus,3,Mumbai Suburban,"CEC-Goregaon Mumbai, Maharashtra, IN (19.163, ...",19.163212,72.89245,2021-03-31,S84456332,1.0,1,0


In [58]:
df.shape

(459306, 13)

In [59]:
df['CATEGORY'].value_counts()

CATEGORY
species     430814
domestic     14581
issf          6741
spuh          6061
slash         1074
form            24
hybrid          11
Name: count, dtype: int64

In [18]:
df.columns

Index(['GLOBAL UNIQUE IDENTIFIER', 'LAST EDITED DATE', 'TAXONOMIC ORDER',
       'CATEGORY', 'TAXON CONCEPT ID', 'COMMON NAME', 'SCIENTIFIC NAME',
       'SUBSPECIES COMMON NAME', 'SUBSPECIES SCIENTIFIC NAME', 'EXOTIC CODE',
       'OBSERVATION COUNT', 'BREEDING CODE', 'BREEDING CATEGORY',
       'BEHAVIOR CODE', 'AGE/SEX', 'COUNTRY', 'COUNTRY CODE', 'STATE',
       'STATE CODE', 'COUNTY', 'COUNTY CODE', 'IBA CODE', 'BCR CODE',
       'USFWS CODE', 'ATLAS BLOCK', 'LOCALITY', 'LOCALITY ID', 'LOCALITY TYPE',
       'LATITUDE', 'LONGITUDE', 'OBSERVATION DATE',
       'TIME OBSERVATIONS STARTED', 'OBSERVER ID', 'SAMPLING EVENT IDENTIFIER',
       'PROTOCOL TYPE', 'PROTOCOL CODE', 'PROJECT CODE', 'DURATION MINUTES',
       'EFFORT DISTANCE KM', 'EFFORT AREA HA', 'NUMBER OBSERVERS',
       'ALL SPECIES REPORTED', 'GROUP IDENTIFIER', 'HAS MEDIA', 'APPROVED',
       'REVIEWED', 'REASON', 'TRIP COMMENTS', 'SPECIES COMMENTS',
       'Unnamed: 49'],
      dtype='object')

In [46]:
df['GLOBAL UNIQUE IDENTIFIER'].nunique()

100000

In [40]:
df['ALL SPECIES REPORTED'].value_counts()

ALL SPECIES REPORTED
1    94268
0     5732
Name: count, dtype: int64

In [41]:
df['REVIEWED'].value_counts()

REVIEWED
0    98942
1     1058
Name: count, dtype: int64

In [34]:
df['LATITUDE'].describe()

count    100000.000000
mean         19.153129
std           0.042285
min          19.027666
25%          19.127856
50%          19.141094
75%          19.173116
max          19.265614
Name: LATITUDE, dtype: float64

In [23]:
df['APPROVED'].value_counts()

APPROVED
1    100000
Name: count, dtype: int64

In [24]:
df['OBSERVATION COUNT'].describe()

count     100000
unique       171
top            1
freq       30207
Name: OBSERVATION COUNT, dtype: object

In [33]:
cnt = pd.to_numeric(df['OBSERVATION COUNT'], errors='coerce')
cnt.describe()

count    91781.000000
mean         7.179830
std         56.539974
min          1.000000
25%          1.000000
50%          2.000000
75%          5.000000
max      10000.000000
Name: OBSERVATION COUNT, dtype: float64

In [48]:
df.sample(3)

Unnamed: 0,GLOBAL UNIQUE IDENTIFIER,LAST EDITED DATE,TAXONOMIC ORDER,CATEGORY,TAXON CONCEPT ID,COMMON NAME,SCIENTIFIC NAME,SUBSPECIES COMMON NAME,SUBSPECIES SCIENTIFIC NAME,EXOTIC CODE,OBSERVATION COUNT,BREEDING CODE,BREEDING CATEGORY,BEHAVIOR CODE,AGE/SEX,COUNTRY,COUNTRY CODE,STATE,STATE CODE,COUNTY,COUNTY CODE,IBA CODE,BCR CODE,USFWS CODE,ATLAS BLOCK,LOCALITY,LOCALITY ID,LOCALITY TYPE,LATITUDE,LONGITUDE,OBSERVATION DATE,TIME OBSERVATIONS STARTED,OBSERVER ID,SAMPLING EVENT IDENTIFIER,PROTOCOL TYPE,PROTOCOL CODE,PROJECT CODE,DURATION MINUTES,EFFORT DISTANCE KM,EFFORT AREA HA,NUMBER OBSERVERS,ALL SPECIES REPORTED,GROUP IDENTIFIER,HAS MEDIA,APPROVED,REVIEWED,REASON,TRIP COMMENTS,SPECIES COMMENTS,Unnamed: 49
28943,URN:CornellLabOfOrnithology:EBIRD:OBS811327238,2023-10-19 05:09:05.847284,22862,species,avibase-0E6F19BB,Ashy Prinia,Prinia socialis,,,,2,,,,,India,IN,Maharashtra,IN-MH,Mumbai Suburban,IN-MH-MS,,,,,IIT Bombay,L1190942,H,19.127856,72.913599,2019-09-30,11:32:00,obsr672706,S60235788,Traveling,P22,EBIRD,37.0,0.4,,1.0,1,,0,1,0,,,,
71121,URN:CornellLabOfOrnithology:EBIRD:OBS929857282,2023-10-19 03:30:32.125678,10478,species,avibase-2B7AC50E,Coppersmith Barbet,Psilopogon haemacephalus,,,,2,,,,,India,IN,Maharashtra,IN-MH,Mumbai Suburban,IN-MH-MS,,,,,"Opposite Malcolm Baug, Jogeshwari (West)",L10997674,P,19.132553,72.847763,2020-05-22,18:15:00,obsr811385,S69522061,Stationary,P21,EBIRD,15.0,,,1.0,1,G5372601,0,1,0,,,"Call heard, bird not seen.",
15532,URN:CornellLabOfOrnithology:EBIRD:OBS723667829,2024-04-28 22:51:40.489403,23257,species,avibase-B22EAFC8,Blyth's Reed Warbler,Acrocephalus dumetorum,,,,7,,,,,India,IN,Maharashtra,IN-MH,Mumbai Suburban,IN-MH-MS,BIRDLIFE_18281,,,,"Unnamed Road, Mumbai, Maharashtra, IN (19.148,...",L8829387,P,19.148366,72.963909,2019-03-11,07:06:00,obsr1155971,S53757150,Traveling,P22,EBIRD,202.0,5.0,,3.0,1,G3936094,0,1,0,,,,


In [51]:
df['APPROVED'].value_counts()

APPROVED
1    100000
Name: count, dtype: int64

In [55]:
df[['CATEGORY', 'COMMON NAME', 'SCIENTIFIC NAME',
       'OBSERVATION COUNT', 'COUNTY', 'LOCALITY', 
       'LATITUDE', 'LONGITUDE', 'OBSERVATION DATE',
       'SAMPLING EVENT IDENTIFIER',
       'NUMBER OBSERVERS',
       'ALL SPECIES REPORTED',
       'REVIEWED']]

Unnamed: 0,CATEGORY,COMMON NAME,SCIENTIFIC NAME,OBSERVATION COUNT,COUNTY,LOCALITY,LATITUDE,LONGITUDE,OBSERVATION DATE,SAMPLING EVENT IDENTIFIER,NUMBER OBSERVERS,ALL SPECIES REPORTED,REVIEWED
0,spuh,Accipiter sp.,Accipiter sp.,2,Mumbai Suburban,Lokhandwala Lake--Mangroves,19.143764,72.818327,2019-01-11,S51547747,1.0,1,0
1,spuh,Accipiter sp.,Accipiter sp.,1,Mumbai Suburban,"Tarzon Lake, Charkop",19.217495,72.816238,2019-01-03,S62659169,1.0,1,0
2,spuh,Accipiter sp.,Accipiter sp.,1,Mumbai Suburban,"Turzon Hill Lake, Charkop, Mumbai",19.218297,72.814174,2019-01-11,S51555671,1.0,1,0
3,spuh,Accipiter sp.,Accipiter sp.,2,Mumbai Suburban,Lokhandwala Lake--Mangroves,19.143764,72.818327,2019-01-11,S51575674,1.0,1,0
4,spuh,Acrocephalus sp.,Acrocephalus sp.,2,Mumbai Suburban,Sanjay Gandhi (Borivali) NP,19.213394,72.908192,2019-01-08,S51546959,6.0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
99995,species,Black Drongo,Dicrurus macrocercus,2,Mumbai Suburban,IIT Bombay,19.127856,72.913599,2020-10-20,S75247641,2.0,1,0
99996,species,Black Drongo,Dicrurus macrocercus,2,Mumbai Suburban,IIT Bombay,19.127856,72.913599,2020-10-03,S74390913,1.0,1,0
99997,species,Black Drongo,Dicrurus macrocercus,1,Mumbai Suburban,IIT Bombay,19.127856,72.913599,2020-10-30,S81719530,,1,0
99998,species,Black Drongo,Dicrurus macrocercus,2,Mumbai Suburban,Bhandup Pumping Station (BPS),19.141094,72.960988,2020-10-11,S74674876,1.0,1,0


## Plotting

In [63]:
df['COMMON NAME'].value_counts().sample(20)

COMMON NAME
Red-headed Bunting                      21
Pied Bushchat                          125
Malabar Lark                             7
Tawny Pipit                             12
Black-headed Cuckooshrike              160
Greater/Mongolian Short-toed Lark        6
passerine sp.                           27
Fork-tailed Drongo-Cuckoo               66
Variable Wheatear                        3
Rose-ringed Parakeet                 15921
Egyptian Vulture                         4
roller sp.                               3
Red-whiskered/Red-vented Bulbul        156
cuckoo sp. (Cuculidae sp.)              45
lapwing sp.                             15
Greater Coucal                       10305
Pallas's Gull                           99
woodpecker sp.                         220
prinia sp.                              68
Tickell's Thrush                         8
Name: count, dtype: int64

In [64]:
tdf = df[df['COMMON NAME'] == 'Pied Bushchat']

In [71]:
import folium
from folium.plugins import HeatMap, MarkerCluster
import pandas as pd


In [65]:
tdf.head()

Unnamed: 0,CATEGORY,COMMON NAME,SCIENTIFIC NAME,OBSERVATION COUNT,COUNTY,LOCALITY,LATITUDE,LONGITUDE,OBSERVATION DATE,SAMPLING EVENT IDENTIFIER,NUMBER OBSERVERS,ALL SPECIES REPORTED,REVIEWED
12753,species,Pied Bushchat,Saxicola caprata,2,Mumbai Suburban,Pench,19.110417,72.862099,2019-02-06,S52544140,1.0,1,0
12754,species,Pied Bushchat,Saxicola caprata,1,Mumbai Suburban,Mamachi Wadi,19.14087,72.855857,2019-02-10,S52548616,7.0,1,0
12755,species,Pied Bushchat,Saxicola caprata,2,Mumbai Suburban,Mamachivadi,19.118585,72.911369,2019-02-10,S52643889,8.0,1,0
12756,species,Pied Bushchat,Saxicola caprata,1,Mumbai Suburban,"Mumbai, Maharashtra, IN (19.093, 72.933)",19.092877,72.932708,2019-02-10,S52548408,30.0,1,0
12757,species,Pied Bushchat,Saxicola caprata,2,Mumbai Suburban,Mamachivadi,19.118585,72.911369,2019-02-10,S52551236,8.0,1,0


In [91]:
tdf[['LATITUDE', 'LONGITUDE']].to_csv("bushchat.csv", index=False)

In [None]:
m = folium.Map(location=[tdf['LATITUDE'].mean(), tdf['LONGITUDE'].mean()], zoom_start=13)
marker_cluster = MarkerCluster().add_to(m)

# Add points to the cluster
for index, row in tdf.iterrows():
    folium.Marker(location=[row['LATITUDE'], row['LONGITUDE']]).add_to(marker_cluster)


# Prepare the data for the heatmap
heat_data = [[row['LATITUDE'], row['LONGITUDE']] for index, row in tdf.iterrows()]

# Create the heatmap
HeatMap(heat_data, radius=10, blur=10, max_zoom=1).add_to(m)

# Save the map to an HTML file
m.save('heatmap.html')