## Read updated data with correct directions

In [1]:
import pandas as pd
pd.set_option('display.max_columns', None)  # or 1000
pd.set_option('display.max_rows', None)  # or 1000
pd.set_option('display.max_colwidth', None)  # or 199

data = pd.read_csv('Data/oregon2020-zerofilled_correctdir.csv', encoding="cp1252")
data = data.drop(columns=['Unnamed: 0'])
print(data.columns)
print(len(data))
display(data.head(5))

  data = pd.read_csv('Data/oregon2020-zerofilled_correctdir.csv', encoding="cp1252")


Index(['Unique_Checklist_ID', 'Common_Name', 'Occur', 'correct_direction',
       'Direction', 'Distance', 'Latitude', 'Longitude', 'Year'],
      dtype='object')
2751623


Unnamed: 0,Unique_Checklist_ID,Common_Name,Occur,correct_direction,Direction,Distance,Latitude,Longitude,Year
0,42.00128_-118.83407_2017-06-10_WDRobinson,Marsh Wren,0,NAN,,,42.00128,-118.83407,2017
1,42.00175_-118.84133_2017-06-10_WDRobinson,Marsh Wren,0,NAN,,,42.00175,-118.84133,2017
2,42.0022_-118.85239_2017-06-10_WDRobinson,Marsh Wren,0,NAN,,,42.0022,-118.85239,2017
3,42.00246_-118.6301_2017-06-08_WDRobinson,Marsh Wren,0,NAN,,,42.00246,-118.6301,2017
4,42.00321_-118.85844_2017-06-10_WDRobinson,Marsh Wren,0,NAN,,,42.00321,-118.85844,2017


In [2]:
print(data['correct_direction'].unique())

['NAN' 'N' 'NNE' 'E' 'NE' 'WNW' 'ESE' 'SSW' 'S' 'W' 'NW' 'ENE' 'SW' 'SE'
 'WSW' 'NNW' 'SSE']


In [3]:
data['correct_direction'].value_counts(dropna=False)

correct_direction
NAN    2604522
S        22708
N        22631
W        22179
E        19850
SW       11107
NW       11062
NE       10239
SE        9915
ESE       2451
WNW       2328
SSW       2220
ENE       2185
WSW       2139
NNW       2098
NNE       2006
SSE       1983
Name: count, dtype: int64

## Map Degree (Bearing Value) to Directions

In [4]:
#from http://ajltct.weebly.com/direction-and-bearing.html

direction_dict = {"N": 360, "NNE": 22.5, "NE": 45, "ENE": 67.5,
                  "E": 90, "ESE": 112.5, "SE": 135, "SSE": 157.5, 
                  "S": 180, "SSW": 202.5, "SW": 225, "WSW": 247.5,
                  "W": 270, "WNW": 292.5, "NW": 315, "NNW": 337.5}

In [5]:
# Create new column named "bearing" which includes the degree value based on the direction (refer to above dictionary)

data['bearing'] = data['correct_direction'].map(direction_dict)
#data['bearing'] = data['bearing'].fillna('NAN')
data.insert(4, 'bearing', data.pop('bearing'))

In [6]:
display(data.head(10))

Unnamed: 0,Unique_Checklist_ID,Common_Name,Occur,correct_direction,bearing,Direction,Distance,Latitude,Longitude,Year
0,42.00128_-118.83407_2017-06-10_WDRobinson,Marsh Wren,0,NAN,,,,42.00128,-118.83407,2017
1,42.00175_-118.84133_2017-06-10_WDRobinson,Marsh Wren,0,NAN,,,,42.00175,-118.84133,2017
2,42.0022_-118.85239_2017-06-10_WDRobinson,Marsh Wren,0,NAN,,,,42.0022,-118.85239,2017
3,42.00246_-118.6301_2017-06-08_WDRobinson,Marsh Wren,0,NAN,,,,42.00246,-118.6301,2017
4,42.00321_-118.85844_2017-06-10_WDRobinson,Marsh Wren,0,NAN,,,,42.00321,-118.85844,2017
5,42.00654_-119.41909_2018-06-03_JRCurtis,Marsh Wren,0,NAN,,,,42.00654,-119.41909,2018
6,42.00808_-119.42516_2018-06-03_JRCurtis,Marsh Wren,0,NAN,,,,42.00808,-119.42516,2018
7,42.00967_-120.84208_2018-06-03_WDRobinson,Marsh Wren,0,NAN,,,,42.00967,-120.84208,2018
8,42.01001_-120.97085_2017-06-23_WDRobinson,Marsh Wren,0,NAN,,,,42.01001,-120.97085,2017
9,42.01034_-119.43141_2018-06-03_JRCurtis,Marsh Wren,0,NAN,,,,42.01034,-119.43141,2018


In [7]:
print(data['bearing'].unique())

[  nan 360.   22.5  90.   45.  292.5 112.5 202.5 180.  270.  315.   67.5
 225.  135.  247.5 337.5 157.5]


In [8]:
df_absence = data[data["Occur"]==0]
df_absence.reset_index(drop=True, inplace=True) 
print(len(df_absence))

df_presence = data[data["Occur"]>0]
print(len(df_presence))
df_presence.dropna(inplace=True)
df_presence.reset_index(drop=True, inplace=True) 
print(len(df_presence))

2603813
147810
146783


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_presence.dropna(inplace=True)


In [9]:
print(len(df_presence) + len(df_absence))

2750596


In [10]:
print(df_presence['bearing'].value_counts(dropna=False))
print(df_presence['bearing'].unique())
print(df_presence['correct_direction'].value_counts(dropna=False))
print(df_presence['correct_direction'].unique())

bearing
180.0    22653
360.0    22574
270.0    22126
90.0     19824
225.0    11083
315.0    11041
45.0     10222
135.0     9893
112.5     2449
292.5     2309
202.5     2219
67.5      2183
247.5     2136
337.5     2088
22.5      2002
157.5     1981
Name: count, dtype: int64
[360.   22.5  90.   45.  292.5 112.5 202.5 180.  270.  315.   67.5 225.
 135.  247.5 337.5 157.5]
correct_direction
S      22653
N      22574
W      22126
E      19824
SW     11083
NW     11041
NE     10222
SE      9893
ESE     2449
WNW     2309
SSW     2219
ENE     2183
WSW     2136
NNW     2088
NNE     2002
SSE     1981
Name: count, dtype: int64
['N' 'NNE' 'E' 'NE' 'WNW' 'ESE' 'SSW' 'S' 'W' 'NW' 'ENE' 'SW' 'SE' 'WSW'
 'NNW' 'SSE']


## Calculate bird lat and long using Vincenty's distance formulae

In [11]:
import folium
from folium import plugins
from geopy.point import Point
from geopy.distance import geodesic
from IPython.display import display, HTML

# Function to calculate the new coordinates
def calculate_new_coordinates(row):
    start = Point(row['Latitude'], row['Longitude'])
    # Calculate the destination point using Vincenty's formulae
    destination = geodesic(meters=row['Distance']).destination(start, row['bearing'])     
    return pd.Series({'Latitude': destination.latitude, 'Longitude': destination.longitude})

# Apply the function to the entire dataframe
df_presence[['Latitude', 'Longitude']] = df_presence.apply(calculate_new_coordinates, axis=1)

# Display the result
display(df_presence.head(10))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_presence[['Latitude', 'Longitude']] = df_presence.apply(calculate_new_coordinates, axis=1)


Unnamed: 0,Unique_Checklist_ID,Common_Name,Occur,correct_direction,bearing,Direction,Distance,Latitude,Longitude,Year
0,44.60923_-123.23987_2012-05-09_WDRobinson,Marsh Wren,1,N,360.0,n,18.0,44.609392,-123.23987,2012
1,44.60923_-123.23987_2012-05-09_WDRobinson,Marsh Wren,1,N,360.0,n,20.0,44.60941,-123.23987,2012
2,44.60923_-123.23987_2012-05-09_WDRobinson,American Bittern,1,NNE,22.5,nne,80.0,44.609895,-123.239484,2012
3,44.60923_-123.23987_2012-05-09_WDRobinson,Song Sparrow,1,N,360.0,n,30.0,44.6095,-123.23987,2012
4,44.60923_-123.23987_2012-05-09_WDRobinson,Common Yellowthroat,1,N,360.0,n,8.0,44.609302,-123.23987,2012
5,44.60923_-123.23987_2012-05-09_WDRobinson,American Robin,1,E,90.0,e,110.0,44.60923,-123.238484,2012
6,44.60923_-123.23987_2012-05-09_WDRobinson,Red-winged Blackbird,1,NE,45.0,ne,90.0,44.609803,-123.239068,2012
7,44.60923_-123.23987_2012-05-09_WDRobinson,Red-winged Blackbird,1,N,360.0,n,80.0,44.60995,-123.23987,2012
8,44.60923_-123.23987_2012-05-09_WDRobinson,Common Yellowthroat,1,WNW,292.5,wnw,40.0,44.609368,-123.240336,2012
9,44.60923_-123.23987_2012-05-09_WDRobinson,Spotted Towhee,1,ESE,112.5,ese,38.0,44.609099,-123.239428,2012


In [12]:
combined_df = pd.concat([df_presence, df_absence])
combined_df.reset_index(drop=True, inplace=True) 
print(len(combined_df))

2750596


In [13]:
display(combined_df.head(10))

Unnamed: 0,Unique_Checklist_ID,Common_Name,Occur,correct_direction,bearing,Direction,Distance,Latitude,Longitude,Year
0,44.60923_-123.23987_2012-05-09_WDRobinson,Marsh Wren,1,N,360.0,n,18.0,44.609392,-123.23987,2012
1,44.60923_-123.23987_2012-05-09_WDRobinson,Marsh Wren,1,N,360.0,n,20.0,44.60941,-123.23987,2012
2,44.60923_-123.23987_2012-05-09_WDRobinson,American Bittern,1,NNE,22.5,nne,80.0,44.609895,-123.239484,2012
3,44.60923_-123.23987_2012-05-09_WDRobinson,Song Sparrow,1,N,360.0,n,30.0,44.6095,-123.23987,2012
4,44.60923_-123.23987_2012-05-09_WDRobinson,Common Yellowthroat,1,N,360.0,n,8.0,44.609302,-123.23987,2012
5,44.60923_-123.23987_2012-05-09_WDRobinson,American Robin,1,E,90.0,e,110.0,44.60923,-123.238484,2012
6,44.60923_-123.23987_2012-05-09_WDRobinson,Red-winged Blackbird,1,NE,45.0,ne,90.0,44.609803,-123.239068,2012
7,44.60923_-123.23987_2012-05-09_WDRobinson,Red-winged Blackbird,1,N,360.0,n,80.0,44.60995,-123.23987,2012
8,44.60923_-123.23987_2012-05-09_WDRobinson,Common Yellowthroat,1,WNW,292.5,wnw,40.0,44.609368,-123.240336,2012
9,44.60923_-123.23987_2012-05-09_WDRobinson,Spotted Towhee,1,ESE,112.5,ese,38.0,44.609099,-123.239428,2012


In [14]:
display(combined_df.tail(10))

Unnamed: 0,Unique_Checklist_ID,Common_Name,Occur,correct_direction,bearing,Direction,Distance,Latitude,Longitude,Year
2750586,46.16658_-123.69349_2015-05-30_JRCurtis,Eared Grebe,0,NAN,,,,46.16658,-123.69349,2015
2750587,46.16878_-123.6872_2015-05-30_JRCurtis,Eared Grebe,0,NAN,,,,46.16878,-123.6872,2015
2750588,46.16881_-123.67372_2015-05-30_JRCurtis,Eared Grebe,0,NAN,,,,46.16881,-123.67372,2015
2750589,46.17006_-123.67671_2015-05-30_JRCurtis,Eared Grebe,0,NAN,,,,46.17006,-123.67671,2015
2750590,46.17025_-123.67956_2015-05-30_JRCurtis,Eared Grebe,0,NAN,,,,46.17025,-123.67956,2015
2750591,46.17115_-123.94958_2015-05-30_JRCurtis,Eared Grebe,0,NAN,,,,46.17115,-123.94958,2015
2750592,46.17461_-123.95184_2015-05-30_JRCurtis,Eared Grebe,0,NAN,,,,46.17461,-123.95184,2015
2750593,46.17752_-123.95338_2015-05-30_JRCurtis,Eared Grebe,0,NAN,,,,46.17752,-123.95338,2015
2750594,46.18056_-123.95496_2015-05-30_JRCurtis,Eared Grebe,0,NAN,,,,46.18056,-123.95496,2015
2750595,46.18355_-123.96072_2015-05-30_JRCurtis,Eared Grebe,0,NAN,,,,46.18355,-123.96072,2015


In [15]:
combined_df.to_csv("Data/oregon2020-zerofilled_birdloc.csv", index=False)

In [16]:
americanrobin = combined_df[combined_df["Common_Name"]=="American Robin"]
print(americanrobin.head(10))
americanrobin.to_csv("Data/americanrobin_oregon2020-zerofilled_birdloc.csv", index=False)

                           Unique_Checklist_ID     Common_Name  Occur  \
5    44.60923_-123.23987_2012-05-09_WDRobinson  American Robin      1   
11   44.60923_-123.23987_2012-05-09_WDRobinson  American Robin      1   
21    44.5979_-123.24033_2012-05-09_WDRobinson  American Robin      1   
32    44.5975_-123.22921_2012-05-09_WDRobinson  American Robin      1   
35    44.5975_-123.22921_2012-05-09_WDRobinson  American Robin      1   
42   44.59149_-123.22935_2012-05-09_WDRobinson  American Robin      1   
46   44.59149_-123.22935_2012-05-09_WDRobinson  American Robin      1   
216  44.59148_-123.23836_2012-05-09_WDRobinson  American Robin      1   
229  44.59055_-123.24892_2012-05-09_WDRobinson  American Robin      1   
256  44.59779_-123.25011_2012-05-09_WDRobinson  American Robin      1   

    correct_direction  bearing Direction  Distance   Latitude   Longitude  \
5                   E     90.0         e     110.0  44.609230 -123.238484   
11                SSW    202.5       ssw  

## Plot observer and bird coordinates for a particular bird

In [None]:
# For a particular bird

# bird_df = filtered_df[(filtered_df['Common_Name'] == 'American Robin')]
# print(len(bird_df))

In [None]:
import folium
from IPython.display import display, HTML
from folium import plugins

# center = [44.60923, -123.23987]

# # Creating map
# map_obj = folium.Map(location=center, zoom_start=15, control_scale=True)

# for index, row in bird_df.iterrows():
#     observer = [row['Latitude'], row['Longitude']]
#     bird = [row['BirdLatitude'], row['BirdLongitude']]
#     # Add markers
#     folium.Marker(observer, popup=f'Observer', icon=folium.Icon("black")).add_to(map_obj)
#     folium.Marker(bird, popup=f'Bird', icon=folium.Icon("blue")).add_to(map_obj)

# # Display map
# map_obj

In [None]:
# print(filtered_df.dtypes)