In [17]:
# Dependencies
import pandas as pd
import numpy as np
import scipy.stats as st
import matplotlib.pyplot as plt
import requests
import json
import gmaps
from config import g_key
gmaps.configure(api_key=g_key)

In [2]:
# import filtered shark csv file
filename = "Resource/Shark Data WA 2016-2020 (filtered).xlsx"
shark_data = pd.read_excel(filename)
shark_data.head()

Unnamed: 0,Case Number,Date,Year,Type,Country,Area,Location,Activity,Name,Age,Injury,Fatal (Y/N),Time,Species,Investigator or Source
0,2020.01.17,17-Jan-2020,2020,Unprovoked,AUSTRALIA,New South Wales,Windang Beach,Surfing,Will Schroeter,59,Laceration ot left ankle and foot,N,08h00,"""A small shark""","B. Myatt & M. Michaelson, GSAF; K. McMurray, T..."
1,2020.01.07,07-Jan-2020,2020,Unprovoked,AUSTRALIA,Queensland,North West Island,Swimming,Miah Holmes,7,Lacerations to leg,N,17h30,Lemon shark,"B. Myatt & M. Michaelson, GSAF; K. McMurray, T..."
2,2020.01.05,05-Jan-2020,2020,Unprovoked,AUSTRALIA,Western Australia,Cull Island / Esperance,Scuba diving,Gary Johnson,57,FATAL,Y,13h00,White shark,"B. Myatt, GSAF"
3,2020.01.02,02-Jan-2020,2020,Questionable,AUSTRALIA,Western Australia,Esperance,Swimming after being swept off rocks,Eric Birighitti,21,Probable drowning and scavenging,Y,16h10,,"B. Myatt & M. Michaelson, GSAF; K. McMurray, T..."
4,2019.12.30,30-Dec-2019,2019,Unprovoked,AUSTRALIA,Queensland,North West Island,Swimming,male,30s,Minor injury to hand,N,12h58,"Shovelnose ""shark"" which is a ray, not a shark)","B. Myatt, GSAF"


In [3]:
shark_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114 entries, 0 to 113
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype 
---  ------                  --------------  ----- 
 0   Case Number             114 non-null    object
 1   Date                    114 non-null    object
 2   Year                    114 non-null    int64 
 3   Type                    114 non-null    object
 4   Country                 114 non-null    object
 5   Area                    114 non-null    object
 6   Location                113 non-null    object
 7   Activity                112 non-null    object
 8   Name                    111 non-null    object
 9   Age                     80 non-null     object
 10  Injury                  114 non-null    object
 11  Fatal (Y/N)             109 non-null    object
 12  Time                    92 non-null     object
 13  Species                 74 non-null     object
 14  Investigator or Source  113 non-null    object
dtypes: int

In [4]:
shark_data['Area'].unique()

array(['New South Wales', 'Queensland', 'Western Australia',
       'Northern Territory', 'Tasmania', 'Westerm Australia', 'Victoria',
       'South Australia'], dtype=object)

In [5]:
# clean up misspelled State
shark_data['Area'] = shark_data['Area'].replace({'Westerm Australia':'Western Australia'})

In [6]:
# remove unwanted columns
shark_df = shark_data.drop(['Name','Investigator or Source'], axis = 1)


In [7]:
shark_df['Location'].nunique()

106

In [8]:
# match location to lat and long coordinates

# create columns to hold data
shark_df['lat']=""
shark_df['lng']=""

# Build URL using the Google Maps API
base_url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json"

params = {"key": g_key, "inputtype": "textquery", "fields":"geometry"}

for index, row in shark_df.iterrows():
    # get extra parameters
    params['input'] = (f'{row["Location"]}, {row["Area"]}')
    
    # Run request
    print(f"Retrieving Results for Index {index}: {row['Location']}.")
    response = requests.get(base_url, params=params)
    results = response.json()

    # Extract lat/lng
    try:
        shark_df.loc[index, 'lat'] = results['candidates'][0]['geometry']['location']['lat']
        shark_df.loc[index, 'lng'] = results['candidates'][0]['geometry']['location']['lng'] 
        
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")

    


Retrieving Results for Index 0: Windang Beach.
Retrieving Results for Index 1: North West Island.
Retrieving Results for Index 2: Cull Island / Esperance.
Retrieving Results for Index 3: Esperance.
Retrieving Results for Index 4: North West Island.
Retrieving Results for Index 5: Shellharbour.
Retrieving Results for Index 6: Exmouth.
Retrieving Results for Index 7: Bargara.
Retrieving Results for Index 8: Elliot Head, near Bundaberg..
Retrieving Results for Index 9: Off Airlie Beach, Whitsundays.
Retrieving Results for Index 10: Woorim Beach, Bribie Island,.
Retrieving Results for Index 11: Lighthouse Beach.
Retrieving Results for Index 12: Nightcliff Jetty, Darwin.
Retrieving Results for Index 13: Burns Beach, Perth.
Retrieving Results for Index 14: Magra Islet.
Retrieving Results for Index 15: Pelican Waters, Caloundra.
Retrieving Results for Index 16: Maatsuyker Island.
Retrieving Results for Index 17: Lennox Head.
Retrieving Results for Index 18: Yandaran Beach.
Retrieving Results 

In [9]:
shark_df.head()

Unnamed: 0,Case Number,Date,Year,Type,Country,Area,Location,Activity,Age,Injury,Fatal (Y/N),Time,Species,lat,lng
0,2020.01.17,17-Jan-2020,2020,Unprovoked,AUSTRALIA,New South Wales,Windang Beach,Surfing,59,Laceration ot left ankle and foot,N,08h00,"""A small shark""",-34.5343,150.875
1,2020.01.07,07-Jan-2020,2020,Unprovoked,AUSTRALIA,Queensland,North West Island,Swimming,7,Lacerations to leg,N,17h30,Lemon shark,-23.2951,151.708
2,2020.01.05,05-Jan-2020,2020,Unprovoked,AUSTRALIA,Western Australia,Cull Island / Esperance,Scuba diving,57,FATAL,Y,13h00,White shark,-33.9222,121.903
3,2020.01.02,02-Jan-2020,2020,Questionable,AUSTRALIA,Western Australia,Esperance,Swimming after being swept off rocks,21,Probable drowning and scavenging,Y,16h10,,-33.8613,121.891
4,2019.12.30,30-Dec-2019,2019,Unprovoked,AUSTRALIA,Queensland,North West Island,Swimming,30s,Minor injury to hand,N,12h58,"Shovelnose ""shark"" which is a ray, not a shark)",-23.2951,151.708


In [10]:
# find record where location is not given
no_loc = shark_df.loc[shark_df['Location'].isna()]
print(no_loc)

# this record is not in WA, so will not affect study too much, so will be dropped from df
shark_df = shark_df.loc[shark_df['Location'].notna()]





     Case Number         Date  Year      Type    Country             Area  \
25  2018.12.15.b  15-Dec-2018  2018  Provoked  AUSTRALIA  New South Wales   

   Location Activity Age             Injury Fatal (Y/N)   Time  \
25      NaN      NaN  48  PROVOKED INCIDENT           N  11h00   

           Species       lat      lng  
25  Wobbegong shark -34.4663  150.848  


In [11]:
# convert lat and lng results to numbers
shark_df['lat'] = pd.to_numeric(shark_df['lat'])
shark_df['lng'] = pd.to_numeric(shark_df['lng'])

In [12]:
empty = shark_df.isna()
empty.sum()

Case Number     0
Date            0
Year            0
Type            0
Country         0
Area            0
Location        0
Activity        1
Age            34
Injury          0
Fatal (Y/N)     5
Time           22
Species        40
lat             2
lng             2
dtype: int64

In [13]:
# find rows with empty locations
shark_df_empty_loc = shark_df.loc[shark_df['lat'].isna()]
shark_df_empty_loc

Unnamed: 0,Case Number,Date,Year,Type,Country,Area,Location,Activity,Age,Injury,Fatal (Y/N),Time,Species,lat,lng
82,2016.12.24,24-Dec-2016,2016,Unprovoked,AUSTRALIA,Western Australia,"Bundegi Sanctuary Zone, Ningaloo",Snorkeling,,Buttock bitten,N,Morning,a small shark,,
98,2016.07.20,20-Jul-2016,2016,Provoked,AUSTRALIA,Queensland,"20 k off The Spit, off the Gold Coast",Fishing,31.0,Laceration to left calf from hooked shark PROV...,N,After noon,"reef shark, 1m",,


In [15]:
# these rows are important to the findings, so will need to find location data for them

# Build URL using the Google Maps API

base_url = "https://maps.googleapis.com/maps/api/place/findplacefromtext/json"

params = {"key": g_key, "inputtype": "textquery", "fields":"geometry"}

# Sam's Creek Area, Point Samson WA, case number 2017.09.10.b
params['input'] = "Point Samson WA"
response = requests.get(base_url, params=params)
results = response.json()
shark_df.loc[shark_df['Case Number'] == "2017.09.10.b",'lat'] = results['candidates'][0]['geometry']['location']['lat']
shark_df.loc[shark_df['Case Number'] == "2017.09.10.b",'lng'] = results['candidates'][0]['geometry']['location']['lng']

# bundegi sanctuary zone, case number 2016.12.24
params['input'] = "bundegi sanctuary zone"
response = requests.get(base_url, params=params)
results = response.json()
shark_df.loc[shark_df['Case Number'] == "2016.12.24",'lat'] = results['candidates'][0]['geometry']['location']['lat']
shark_df.loc[shark_df['Case Number'] == "2016.12.24",'lng'] = results['candidates'][0]['geometry']['location']['lng']

# The Spit Gold Coast, Seaworld Drive, Main Beach QLD, case number 2016.07.20
params['input'] = "The Spit Gold Coast, Seaworld Drive, Main Beach QLD"
response = requests.get(base_url, params=params)
results = response.json()
shark_df.loc[shark_df['Case Number'] == "2016.07.20",'lat'] = results['candidates'][0]['geometry']['location']['lat']
shark_df.loc[shark_df['Case Number'] == "2016.07.20",'lng'] = results['candidates'][0]['geometry']['location']['lng']





In [18]:
# Create a map with markers
marker_locations = shark_df[['lat', 'lng']]

# Create a marker_layer 
fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations)
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(height='420px'))