In [13]:
# Dependencies
import pandas as pd
from pathlib import Path


In [14]:
# set path
original_data = "Australian_Shark_Incident_Database_Public_Version.csv"

# read the file
# read the csv file into pandas and remove unnamed columns
original_df = pd.read_csv(original_data, encoding = "CP1252")
original_df

Unnamed: 0,UIN,Incident.month,Incident.year,Victim.injury,State,Location,Latitude,Longitude,Site.category,Site.category.comment,...,Spring.or.neap.tide,Tidal.cycle,Wind.condition,Weather.condition,Air.temperature.°C,Personal.protective.device,Deterrent.brand.and.type,Data.source,Reference,Unnamed: 59
0,1,1,1791,fatal,NSW,near sydney,-33.86666667,151.2,coastal,,...,,,,,,,,book,"shark&survl, whitley 1958, book ref 1793",
1,2,3,1803,injured,WA,"hamelin bay, faure island",-25.83333333,113.8833333,coastal,,...,,,,,,,,book,"balgridge,green,taylor,whitley 1940",
2,3,1,1807,injured,NSW,"cockle bay, sydney harbour",-33.86666667,151.2,estuary/harbour,,...,,,,,,,,media outlet,sydney gazette 18.1.1807,
3,4,1,1820,fatal,TAS,"sweetwater point, pitt water",-42.8,147.5333333,coastal,,...,,,,,,,,witness account,"shark&survl, c. black researcher",
4,5,1,1825,injured,NSW,"kirribili point, sydney harbour",-33.85,151.2166667,estuary/harbour,,...,,,,,,,,media outlet,maitland daily mercury 13.11.1899,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1223,1224,11,2023,injured,QLD,clack Island,-14.06549,144.26335,coastal,island,...,,,,,,,,,,
1224,1225,12,2023,injured,WA,wedge island,-30.81556,115.19234,island open ocean,,...,,,,,,,,,,
1225,1226,12,2023,injured,NSW,"old bar, taree",-31.9694,152.5863946,coastal,bay to open ocean,...,,,,,,,,,,
1226,1227,12,2023,fatal,SA,"ethel beach, yorke penninsula",-34.679356,137.684906.,coastal,bay to open ocean,...,,,,,,,,,,


In [15]:
original_df.dtypes

UIN                              int64
Incident.month                   int64
Incident.year                    int64
Victim.injury                   object
State                           object
Location                        object
Latitude                        object
Longitude                       object
Site.category                   object
Site.category.comment           object
Shark.common.name               object
Shark.scientific.name           object
Shark.identification.method     object
Shark.identification.source     object
Shark.length.m                 float64
Basis.for.length                object
Provoked/unprovoked             object
Provocative.act                 object
No.sharks                      float64
Victim.activity                 object
Fish.speared?                   object
Commercial.dive.activity        object
Object.of.bite                  object
Present.at.time.of.bite         object
Direction.first.strike          object
Shark.behaviour          

In [16]:
# keep relevant columns
original_df = original_df[['Incident.year',
                           'Victim.injury',
                           'State',
                           'Latitude',
                           'Longitude',
                           'Shark.common.name',
                           'Shark.scientific.name',
                          'Provoked/unprovoked',
                          'Site.category',
                          'Incident.month']]
original_df

Unnamed: 0,Incident.year,Victim.injury,State,Latitude,Longitude,Shark.common.name,Shark.scientific.name,Provoked/unprovoked,Site.category,Incident.month
0,1791,fatal,NSW,-33.86666667,151.2,white shark,Carcharodon carcharias,unprovoked,coastal,1
1,1803,injured,WA,-25.83333333,113.8833333,tiger shark,Galeocerdo cuvier,unprovoked,coastal,3
2,1807,injured,NSW,-33.86666667,151.2,bull shark,Carcharhinus leucas,unprovoked,estuary/harbour,1
3,1820,fatal,TAS,-42.8,147.5333333,,,provoked,coastal,1
4,1825,injured,NSW,-33.85,151.2166667,bull shark,Carcharhinus leucas,unprovoked,estuary/harbour,1
...,...,...,...,...,...,...,...,...,...,...
1223,2023,injured,QLD,-14.06549,144.26335,unknown,,provoked,coastal,11
1224,2023,injured,WA,-30.81556,115.19234,bronze whaler shark,Carcharhinus brachyurus,unprovoked,island open ocean,12
1225,2023,injured,NSW,-31.9694,152.5863946,unknown,,unprovoked,coastal,12
1226,2023,fatal,SA,-34.679356,137.684906.,white shark,Carcharodon carcharias,unprovoked,coastal,12


In [17]:
# replace the nulls with unidentified
clean_df = original_df.fillna('unknown')
# clean_df

In [18]:
clean_df.dtypes

Incident.year             int64
Victim.injury            object
State                    object
Latitude                 object
Longitude                object
Shark.common.name        object
Shark.scientific.name    object
Provoked/unprovoked      object
Site.category            object
Incident.month            int64
dtype: object

In [19]:
import numpy as np
# clean_df['Latitude'] = clean_df['Latitude'].astype(float)
# clean_df['Longitude'] = clean_df['Longitude'].astype(float)
# Remove trailing period and convert to float
clean_df['Longitude'] = clean_df['Longitude'].str.rstrip('.').astype(float)


In [20]:
clean_df.dtypes

Incident.year              int64
Victim.injury             object
State                     object
Latitude                  object
Longitude                float64
Shark.common.name         object
Shark.scientific.name     object
Provoked/unprovoked       object
Site.category             object
Incident.month             int64
dtype: object

In [21]:
print(clean_df.head())

   Incident.year Victim.injury State      Latitude   Longitude  \
0           1791         fatal   NSW  -33.86666667  151.200000   
1           1803       injured    WA  -25.83333333  113.883333   
2           1807       injured   NSW  -33.86666667  151.200000   
3           1820         fatal   TAS         -42.8  147.533333   
4           1825       injured   NSW        -33.85  151.216667   

  Shark.common.name    Shark.scientific.name Provoked/unprovoked  \
0       white shark  Carcharodon carcharias           unprovoked   
1       tiger shark       Galeocerdo cuvier           unprovoked   
2        bull shark      Carcharhinus leucas          unprovoked   
3           unknown                  unknown            provoked   
4        bull shark      Carcharhinus leucas          unprovoked   

     Site.category  Incident.month  
0          coastal               1  
1          coastal               3  
2  estuary/harbour               1  
3          coastal               1  
4  estuar

In [22]:
import folium

# Create a map centered on Australia
australia_map = folium.Map(location=[-25.2744, 133.7751], zoom_start=4)

# Add markers for each incident
for index, row in clean_df.iterrows():
    popup_text = f"Incident Year: {row['Incident.year']}<br>State: {row['State']}"
    folium.Marker(location=[row['Latitude'], row['Longitude']], popup=popup_text).add_to(australia_map)

# Display the map
australia_map


In [23]:
import folium

# Create a map centered on Australia
australia_map = folium.Map(location=[-25.2744, 133.7751], zoom_start=4)

# Define a function to assign color based on incident year
def get_color(year):
    if year <= 1800:
        return 'red'
    elif 1800 < year <= 1850:
        return 'blue'
    else:
        return 'green'

# Add circle markers for each incident
for index, row in clean_df.iterrows():
    popup_text = f"Incident Year: {row['Incident.year']}<br>State: {row['State']}"
    folium.CircleMarker(location=[row['Latitude'], row['Longitude']], radius=5, 
                        color=get_color(row['Incident.year']), fill=True, fill_color=get_color(row['Incident.year']),
                        popup=popup_text).add_to(australia_map)

# Display the map
australia_map


In [24]:
import folium
from folium.plugins import MarkerCluster

# Create a map centered on Australia
australia_map = folium.Map(location=[-25.2744, 133.7751], zoom_start=4)

# Create marker cluster
marker_cluster = MarkerCluster().add_to(australia_map)

# Add markers for each incident
for index, row in clean_df.iterrows():
    # Define popup text with detailed information
    popup_text = f"Incident Year: {row['Incident.year']}<br>State: {row['State']}<br>Shark Species: {row['Shark.common.name']}<br>Victim Injury: {row['Victim.injury']}"
    # Add marker with custom icon based on shark species
    icon = folium.Icon(color='blue' if row['Shark.common.name'] == 'white shark' else 'green')
    folium.Marker(location=[row['Latitude'], row['Longitude']], popup=popup_text, icon=icon).add_to(marker_cluster)

# Add layer control for marker clusters
folium.LayerControl().add_to(australia_map)

# Display the map
australia_map
