In [79]:
# Dependencies
import pandas as pd
import numpy as np
from pathlib import Path
from dash import Dash, html, dash_table, dcc, Input, Output, State
import plotly.express as px
import folium
from folium.plugins import MarkerCluster
import dash_leaflet as dl


ModuleNotFoundError: No module named 'dash_leaflet'

In [80]:
# set path
original_data = "Australian_Shark_Incident_Database_Public_Version.csv"

# read the file
original_df = pd.read_csv(original_data, encoding = "CP1252")
original_df

Unnamed: 0,UIN,Incident.month,Incident.year,Victim.injury,State,Location,Latitude,Longitude,Site.category,Site.category.comment,...,Spring.or.neap.tide,Tidal.cycle,Wind.condition,Weather.condition,Air.temperature.°C,Personal.protective.device,Deterrent.brand.and.type,Data.source,Reference,Unnamed: 59
0,1,1,1791,fatal,NSW,near sydney,-33.86666667,151.2,coastal,,...,,,,,,,,book,"shark&survl, whitley 1958, book ref 1793",
1,2,3,1803,injured,WA,"hamelin bay, faure island",-25.83333333,113.8833333,coastal,,...,,,,,,,,book,"balgridge,green,taylor,whitley 1940",
2,3,1,1807,injured,NSW,"cockle bay, sydney harbour",-33.86666667,151.2,estuary/harbour,,...,,,,,,,,media outlet,sydney gazette 18.1.1807,
3,4,1,1820,fatal,TAS,"sweetwater point, pitt water",-42.8,147.5333333,coastal,,...,,,,,,,,witness account,"shark&survl, c. black researcher",
4,5,1,1825,injured,NSW,"kirribili point, sydney harbour",-33.85,151.2166667,estuary/harbour,,...,,,,,,,,media outlet,maitland daily mercury 13.11.1899,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1223,1224,11,2023,injured,QLD,clack Island,-14.06549,144.26335,coastal,island,...,,,,,,,,,,
1224,1225,12,2023,injured,WA,wedge island,-30.81556,115.19234,island open ocean,,...,,,,,,,,,,
1225,1226,12,2023,injured,NSW,"old bar, taree",-31.9694,152.5863946,coastal,bay to open ocean,...,,,,,,,,,,
1226,1227,12,2023,fatal,SA,"ethel beach, yorke penninsula",-34.679356,137.684906.,coastal,bay to open ocean,...,,,,,,,,,,


In [81]:
# keep relevant columns
original_df = original_df[['Incident.year',
                           'Victim.injury',
                           'State',
                           'Latitude',
                           'Longitude',
                           'Shark.common.name',
                           'Shark.scientific.name',
                          'Provoked/unprovoked',
                          'Site.category',
                          'Incident.month']]
original_df

Unnamed: 0,Incident.year,Victim.injury,State,Latitude,Longitude,Shark.common.name,Shark.scientific.name,Provoked/unprovoked,Site.category,Incident.month
0,1791,fatal,NSW,-33.86666667,151.2,white shark,Carcharodon carcharias,unprovoked,coastal,1
1,1803,injured,WA,-25.83333333,113.8833333,tiger shark,Galeocerdo cuvier,unprovoked,coastal,3
2,1807,injured,NSW,-33.86666667,151.2,bull shark,Carcharhinus leucas,unprovoked,estuary/harbour,1
3,1820,fatal,TAS,-42.8,147.5333333,,,provoked,coastal,1
4,1825,injured,NSW,-33.85,151.2166667,bull shark,Carcharhinus leucas,unprovoked,estuary/harbour,1
...,...,...,...,...,...,...,...,...,...,...
1223,2023,injured,QLD,-14.06549,144.26335,unknown,,provoked,coastal,11
1224,2023,injured,WA,-30.81556,115.19234,bronze whaler shark,Carcharhinus brachyurus,unprovoked,island open ocean,12
1225,2023,injured,NSW,-31.9694,152.5863946,unknown,,unprovoked,coastal,12
1226,2023,fatal,SA,-34.679356,137.684906.,white shark,Carcharodon carcharias,unprovoked,coastal,12


In [82]:
# replace the nulls with unidentified
clean_df = original_df.fillna('unknown')

In [83]:
clean_df.dtypes

Incident.year             int64
Victim.injury            object
State                    object
Latitude                 object
Longitude                object
Shark.common.name        object
Shark.scientific.name    object
Provoked/unprovoked      object
Site.category            object
Incident.month            int64
dtype: object

In [84]:
clean_df['Longitude'] = clean_df['Longitude'].astype(str).str.rstrip('.').astype(float)
clean_df.dtypes

Incident.year              int64
Victim.injury             object
State                     object
Latitude                  object
Longitude                float64
Shark.common.name         object
Shark.scientific.name     object
Provoked/unprovoked       object
Site.category             object
Incident.month             int64
dtype: object

In [85]:
yearly_df = clean_df.loc[:,["Incident.year", "State"]]
yearly_df

Unnamed: 0,Incident.year,State
0,1791,NSW
1,1803,WA
2,1807,NSW
3,1820,TAS
4,1825,NSW
...,...,...
1223,2023,QLD
1224,2023,WA
1225,2023,NSW
1226,2023,SA


In [86]:
states = yearly_df['State'].unique()
states

array(['NSW', 'WA', 'TAS', 'SA', 'QLD', 'VIC', 'NT'], dtype=object)

In [87]:
yearly_count = yearly_df['Incident.year'].value_counts()
yearly_count

Incident.year
2020    37
2015    33
2016    31
2018    30
2009    28
        ..
1904     1
1909     1
1803     1
1943     1
2024     1
Name: count, Length: 178, dtype: int64

In [88]:
yearly_df = yearly_df[['Incident.year', 'State']].value_counts().reset_index(name='Count')
yearly_df

Unnamed: 0,Incident.year,State,Count
0,2015,NSW,21
1,2009,NSW,19
2,2020,NSW,18
3,2016,NSW,16
4,1929,QLD,13
...,...,...,...
496,1956,WA,1
497,1955,QLD,1
498,1954,SA,1
499,1953,QLD,1


In [89]:
yearly_df.loc[yearly_df['State'] == 'NSW'].sort_values(by=['Incident.year'])

Unnamed: 0,Incident.year,State,Count
321,1791,NSW,1
387,1807,NSW,1
385,1825,NSW,1
233,1832,NSW,2
382,1837,NSW,1
...,...,...,...
13,2019,NSW,9
2,2020,NSW,18
21,2021,NSW,8
10,2022,NSW,11


In [90]:
yearly_grouped = yearly_df.groupby(['Incident.year']).sum().reset_index()
yearly_grouped

Unnamed: 0,Incident.year,State,Count
0,1791,NSW,1
1,1803,WA,1
2,1807,NSW,1
3,1820,TAS,1
4,1825,NSW,1
...,...,...,...
173,2020,NSWWAQLDVICTASSA,37
174,2021,WANSWVICQLDSA,25
175,2022,NSWWAQLDSATASVIC,22
176,2023,WASANSWQLD,17


In [91]:
# select my chosen columns focusing on shark types based on state
reduced_df = clean_df.loc[:, ["Shark.common.name", "State"]]
reduced_df

Unnamed: 0,Shark.common.name,State
0,white shark,NSW
1,tiger shark,WA
2,bull shark,NSW
3,unknown,TAS
4,bull shark,NSW
...,...,...
1223,unknown,QLD
1224,bronze whaler shark,WA
1225,unknown,NSW
1226,white shark,SA


In [92]:
# an overall total count by shark common name
total_name = reduced_df['Shark.common.name'].value_counts()
total_name

Shark.common.name
white shark                  372
tiger shark                  229
wobbegong                    203
bull shark                   201
whaler shark                  75
unknown                       61
bronze whaler shark           29
grey nurse shark               9
grey reef shark                8
whitetip reef shark            7
lemon shark                    6
dusky shark                    5
hammerhead shark               5
blacktip reef shark            4
galapagos shark                3
shortfin mako shark            2
broadnose sevengill shark      2
dogfish                        1
school shark                   1
sevengill shark                1
silvertip shark                1
blind shark                    1
port jackson shark             1
seven gill shark               1
Name: count, dtype: int64

In [93]:
# count by shark name for each state
totals_df = reduced_df[['State', 'Shark.common.name']].value_counts().reset_index(name='count')
totals_df.head(20)

Unnamed: 0,State,Shark.common.name,count
0,QLD,tiger shark,179
1,NSW,white shark,155
2,NSW,wobbegong,107
3,NSW,bull shark,101
4,QLD,bull shark,79
5,WA,white shark,75
6,SA,white shark,63
7,WA,wobbegong,52
8,VIC,white shark,46
9,QLD,whaler shark,28


In [94]:
totals_df.groupby(['State']).value_counts().head(20)

State  Shark.common.name    count
NSW    blind shark          1        1
       bronze whaler shark  11       1
       bull shark           101      1
       dogfish              1        1
       dusky shark          4        1
       galapagos shark      3        1
       grey nurse shark     7        1
       tiger shark          15       1
       unknown              15       1
       whaler shark         26       1
       white shark          155      1
       wobbegong            107      1
NT     whaler shark         4        1
       bull shark           3        1
       tiger shark          9        1
       unknown              1        1
       whitetip reef shark  1        1
       wobbegong            1        1
QLD    wobbegong            22       1
       blacktip reef shark  2        1
Name: count, dtype: int64

In [95]:
dff = totals_df
dff

Unnamed: 0,State,Shark.common.name,count
0,QLD,tiger shark,179
1,NSW,white shark,155
2,NSW,wobbegong,107
3,NSW,bull shark,101
4,QLD,bull shark,79
...,...,...,...
58,NT,wobbegong,1
59,NT,whitetip reef shark,1
60,NT,unknown,1
61,NSW,dogfish,1


In [96]:
# Get unique states and shark common names for dropdown options
states = dff['State'].unique()
names = dff['Shark.common.name'].unique()

In [97]:
# Initialize the app
app = Dash(__name__)

In [98]:
# Define app layout
dropdown = dcc.Dropdown(id='dropdown', options=[{'label': state, 'value': state} for state in yearly_df['State'].unique()])
line_graph = dcc.Graph(id='line_graph')

# Define the yearly dropdown options for the map
map_dropdown_options = [{'label': str(year), 'value': year} for year in sorted(clean_df['Incident.year'].unique())]

app.layout = html.Div([
    html.H1("Shark Attack Data"),
    html.H4("Select State"),
    html.Div([
        html.H1(""),
        # Dropdown menu for selecting states
        dcc.Dropdown(
            id='state-dropdown',
            options=[{'label': state, 'value': state} for state in states],
            value='NSW',  # Default value
            multi=False,  # Allow single selection
            style={'width': '50%'}
        ),
        # Graph component for displaying the bar chart
        dcc.Graph(id='bar-chart')
    ]),
    html.Hr(),
    html.H1("Yearly Shark Attacks"),
    html.H4("Select State"),
    dropdown,
    line_graph,
    # Yearly dropdown menu for the map
    html.H1("Interactive Cluster Map"),
    html.H4("Select year"),
    dcc.Dropdown(
        id='dropdown-year-map',
        options=map_dropdown_options,
        value=map_dropdown_options[0]['value'],  # Default value is the first year
        style={'width': '50%'}
    ),
    html.Div(id='map-container')
])

# Define callback to update the bar chart based on dropdown selection
@app.callback(
    Output('bar-chart', 'figure'),
    [Input('state-dropdown', 'value')]
)
def update_bar_chart(selected_state):
    filtered_dff = dff[dff['State'] == selected_state]
    fig = px.bar(filtered_dff, x='Shark.common.name', y='count', color='Shark.common.name',
                 labels={'count': 'Number of Attacks', 'Shark.common.name': 'Shark Common Name'})
    fig.update_layout(title=f'Shark Attack Counts by Name in {selected_state}')
    return fig

# Define callback to update the line graph based on dropdown selection
@app.callback(
    Output('line_graph', 'figure'),
    [Input('dropdown', 'value')]
)
def update_line_chart(selected_state):
    if selected_state is None:
        filtered_df = yearly_grouped
        x_axis = filtered_df['Incident.year']
    else:
        filtered_df = yearly_df.loc[yearly_df['State'] == selected_state].sort_values(by=['Incident.year'])
        x_axis = 'Incident.year'

    new_graph = px.line(data_frame=filtered_df, x=x_axis, y='Count')
    new_graph.update_layout(title=f'Yearly Shark Attacks in {selected_state}' if selected_state else 'Yearly Shark Attacks')
    return new_graph

# Define callback to update the map based on selected year
@app.callback(
    Output('map-container', 'children'),
    [Input('dropdown-year-map', 'value')]
)
def update_map(selected_year):
    # Filter data based on selected year
    filtered_df = clean_df[clean_df['Incident.year'] == selected_year]
    
    # Create a map centered on Australia
    australia_map = folium.Map(location=[-25.2744, 133.7751], zoom_start=4)

    # Create marker cluster
    marker_cluster = MarkerCluster().add_to(australia_map)

    # Add markers for each incident
    for index, row in filtered_df.iterrows():
        # Define popup text with detailed information
        popup_text = f"Incident Year: {row['Incident.year']}<br>State: {row['State']}<br>Shark Species: {row['Shark.common.name']}<br>Victim Injury: {row['Victim.injury']}"
        # Add marker with custom icon based on shark species
        icon = folium.Icon(color='blue' if row['Shark.common.name'] == 'white shark' else 'green')
        folium.Marker(location=[row['Latitude'], row['Longitude']], popup=popup_text, icon=icon).add_to(marker_cluster)

    # Convert folium map to HTML
    map_html = australia_map._repr_html_()
    
    return html.Iframe(srcDoc=map_html, style={"width": "100%", "height": "600px"})
    
# run the app
if __name__ == "__main__":
    app.run_server(debug=True, port=229)
