In [1]:
import pandas as pd
import matplotlib.patches as mpatches
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

In [2]:
from bokeh.io import push_notebook, output_notebook
from bokeh.plotting import figure, show, output_file
from bokeh.models.markers import Hex
from bokeh.models import Legend, HoverTool, ColumnDataSource
output_notebook()

In [3]:
DIR = "./"

# Import Data

In [4]:
df_allbirds = pd.read_csv(DIR + "AllBirdsv4.csv")
df_testbirds = pd.read_csv(DIR + "Test Birds Location.csv")

# Preview and preprocess data sets

## All birds recordings

In [5]:
df_allbirds.head()

Unnamed: 0,File ID,English_name,Vocalization_type,Quality,Time,Date,X,Y
0,402254,Rose-crested Blue Pipit,call,no score,13:30,2/8/2018,49,63
1,406171,Rose-crested Blue Pipit,call,A,7:48,6/7/2017,125,133
2,405901,Rose-crested Blue Pipit,call,A,12:00,2/8/2018,58,76
3,405548,Rose-crested Blue Pipit,song,A,11:00,3/10/2018,55,125
4,401782,Rose-crested Blue Pipit,song,A,6:00,6/29/2008,129,123


### Data cleaning

In [6]:
# Replace values that are not digits with ''
df_allbirds['Y'].replace(regex=True,inplace=True,to_replace=r'\D',value=r'')
# Convert strings to numeric values (here, int)
df_allbirds['Y'] = pd.to_numeric(df_allbirds['Y'])

###  Describe data

In [7]:
print("Number of bird recordings:", len(df_allbirds["File ID"].unique()))
print("Number of distinct bird families:", len(df_allbirds["English_name"].value_counts()))
print("Number of Rose-crested Blue Pipit:", len(df_allbirds[df_allbirds['English_name'] == "Rose-crested Blue Pipit"]))
print("Recording position X range: %d - %d" % (min(df_allbirds["X"]), max(df_allbirds["X"])) )
print("Recording position Y range: %d - %d" % (min(df_allbirds["Y"]), max(df_allbirds["Y"])) )
print("Column names and value types:")
print(df_allbirds.dtypes)

Number of bird recordings: 2081
Number of distinct bird families: 19
Number of Rose-crested Blue Pipit: 186
Recording position X range: 10 - 171
Recording position Y range: 13 - 186
Column names and value types:
File ID               int64
English_name         object
Vocalization_type    object
Quality              object
Time                 object
Date                 object
X                     int64
Y                     int64
dtype: object


## Kasio birds recordings

In [8]:
print(df_testbirds.to_string())

    ID    X    Y
0    1  140  119
1    2   63  153
2    3   70  136
3    4   78  150
4    5   60   90
5    6  126  103
6    7   71  121
7    8   78   62
8    9   61  145
9   10   45   39
10  11  132  106
11  12   61   20
12  13   35  160
13  14   40  125
14  15  110  121


### Data cleaning

In [9]:
# remove unexpected space character in column X and Y names
df_testbirds.rename(index=str, columns={" X": "X", " Y": "Y"}, inplace = True)

### Data description

In [10]:
print("Number of bird recordings:", len(df_testbirds["ID"].unique()))
print("Recording position X range: %d - %d" % (min(df_testbirds["X"]), max(df_testbirds["X"])) )
print("Recording position Y range: %d - %d" % (min(df_testbirds["Y"]), max(df_testbirds["Y"])) )
print("Column names and value types:")
print(df_testbirds.dtypes)

Number of bird recordings: 15
Recording position X range: 35 - 140
Recording position Y range: 20 - 160
Column names and value types:
ID    int64
X     int64
Y     int64
dtype: object


# Processing map image

In [11]:
from PIL import Image
# convert image pixels to black & white
# any value that is not black is assigned the value white
im = plt.imread(DIR+"Lekagul Roadways 2018.bmp")
img = Image.open(DIR+'Lekagul Roadways 2018.bmp').convert('L')
np_img = np.array(img)
np_img[np_img > 0] = 255 #Binarize
np_img[np_img == 0] = 100 #Binarize
binarized_img = Image.fromarray(np_img)

# Plot Birds Distribution on Map

In [12]:
df_pipits = df_allbirds[df_allbirds['English_name'] == "Rose-crested Blue Pipit"]
df_others = df_allbirds[df_allbirds['English_name'] != "Rose-crested Blue Pipit"]

In [18]:
p = figure(x_range=(0, 199), y_range=(0, 199), title = "Recordings positions in the Preserve", plot_width=900, plot_height=700)
#
p.xaxis.axis_label = 'Coordinate X'
p.yaxis.axis_label = 'Coordinate Y'
#plot map image
p.image(image=[np_img], x=0, y=0, dw=200, dh=200)
#plot recordings 
p.circle(df_pipits["X"], df_pipits["Y"], color='blue', fill_alpha=0.2, size=6, legend="Blue Pipits recordings from University")
p.circle(df_others["X"], df_others["Y"], fill_alpha=0.2, size=3, legend="Non Blue Pipits recordings from University")
p.circle(df_testbirds["X"], df_testbirds["Y"], color='red', size=6, legend="Blue Pipits recordings from Kasios")
p.circle(df_testbirds["X"], df_testbirds["Y"], color='red', size=6, legend="Blue Pipits recordings from Kasios")
#plot waste dumpings
p.square(148, 149, size=15, fill_alpha=0.4, color='green', legend="Kasio waste dumpings")
#legend properties
p.legend.location ="bottom_right"
p.legend.click_policy="hide"
p.add_tools(HoverTool(
    tooltips=[
    ("(X,Y)", "($x, $y)"),
]))
#title properties
p.title.text_font_size = '15pt'
p.title.align = 'center'
#show plot inside notebook
show(p, notebook_handle = True)