In [38]:
import gmaps
import pandas as pd

# Google developer API key
from config import g_key

# Access maps with unique API key
gmaps.configure(api_key=g_key)

In [39]:
# import file with data
filename = "shark_geo.csv"
shark_df = pd.read_csv(filename)

In [40]:
shark_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 161 entries, 0 to 160
Data columns (total 23 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Unnamed: 0      161 non-null    int64  
 1   Case Number     161 non-null    object 
 2   Date            161 non-null    object 
 3   Year            161 non-null    float64
 4   Type            161 non-null    object 
 5   Country         161 non-null    object 
 6   Area            161 non-null    object 
 7   Location        161 non-null    object 
 8   Activity        155 non-null    object 
 9   Sex             156 non-null    object 
 10  Age             116 non-null    object 
 11  Injury          160 non-null    object 
 12  Fatal (Y/N)     154 non-null    object 
 13  Time            136 non-null    object 
 14  Species         104 non-null    object 
 15  pdf             161 non-null    object 
 16  href formula    161 non-null    object 
 17  href            161 non-null    obj

In [41]:
# remove 2021 data
shark_df = shark_df.loc[shark_df['Year']<=2020]
# remove unwanted columns
shark_df = shark_df.drop([ 'pdf', 'href formula','href', 'Case Number.1', 'Case Number.2', 'original order'], axis = 1)

In [42]:
# clean up data for fatal
fatal_type = shark_df['Fatal (Y/N)'].unique()
print(fatal_type)
fatal_count = shark_df['Fatal (Y/N)'].value_counts(dropna = False)
print(fatal_count)
# examine anomalies
shark_df.loc[shark_df['Fatal (Y/N)']=="F"]
# this one should be Y
shark_df['Fatal (Y/N)'] = shark_df['Fatal (Y/N)'].replace({'F':'Y'})
# look at nan
unknown = shark_df.loc[shark_df['Fatal (Y/N)'].isna()]
unknown
# these all look like no injury or minor injury, so change fatal to N
shark_df['Fatal (Y/N)'] = shark_df['Fatal (Y/N)'].fillna('N')
# check values
fatal_count = shark_df['Fatal (Y/N)'].value_counts(dropna = False)
print(fatal_count)
fatal_type = shark_df['Fatal (Y/N)'].unique()
print(fatal_type)



['N' 'Y' nan 'F']
N      124
Y       13
NaN      7
F        1
Name: Fatal (Y/N), dtype: int64
N    131
Y     14
Name: Fatal (Y/N), dtype: int64
['N' 'Y']


In [45]:
# put a weighting on fatality, 6 for fatal, 1 for not fatal
shark_df['fatal_weight']=shark_df['Fatal (Y/N)'].apply(lambda x: 6 if x == 'Y' else 1)
shark_df['fatal_weight']

16     1
17     1
18     6
19     1
20     1
      ..
156    1
157    1
158    1
159    1
160    1
Name: fatal_weight, Length: 145, dtype: int64

In [44]:
# Create a map with markers
marker_locations = shark_df[['lat', 'lng']]

# Create a marker_layer 
fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations)
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(height='420px'))