In [1]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt

import plotly.graph_objects as go

import chart_studio.plotly as py
import cufflinks as cf
import seaborn as sns
import plotly.express as px
%matplotlib inline

# Make Plotly work in your Jupyter Notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
# Use Plotly locally
cf.go_offline()

import datapane as dp


ModuleNotFoundError: No module named 'numpy'

In [2]:
!pip3 install numpy pandas matplotlib seaborn plotly cufflinks statsmodels datapane

Collecting matplotlib
  Downloading matplotlib-3.9.3-cp310-cp310-macosx_11_0_arm64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.9/294.9 kB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting plotly
  Downloading plotly-5.24.1-py3-none-any.whl (19.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m19.1/19.1 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting cufflinks
  Downloading cufflinks-0.17.3.tar.gz (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.7/81.7 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting statsmodels
  Downloading statsmodels-0.14.4-cp310-cp310-macosx_11_0_arm64.whl (9.9 M

In [None]:
import streamlit as st

In [None]:
df = pd.read_excel("/Users/paniket/TU_Eindhoven/2_Study/Q2_JBI100_Visualisation_4/4_Code/JBI100_Visualisation/JBI100_Data_2024_2025/Australian_Shark_Incidents/Australian Shark-Incident Database Public Version.xlsx", sheet_name="ASID")

In [None]:
df.head(3)

In [None]:
df.info()

In [None]:
# Streamlit Title and Filters
st.title("Interactive Shark Attack Data Explorer")
st.sidebar.header("Filter Options")

# Filter by Year
year_range = st.sidebar.slider(
    "Select Year Range",
    int(df["Incident.year"].min()),
    int(df["Incident.year"].max()),
    (1900, 2023)
)
filtered_data = df[(df["Incident.year"] >= year_range[0]) & (df["Incident.year"] <= year_range[1])]

# Filter by State
state_filter = st.sidebar.multiselect(
    "Select State(s)",
    options=df["State"].unique(),
    default=df["State"].unique()
)
filtered_data = filtered_data[filtered_data["State"].isin(state_filter)]

In [None]:
fig1 = px.line(
    filtered_data.groupby("Incident.year").size().reset_index(name="Count"),
    x="Incident.year",
    y="Count",
    title="Shark Attacks Over Time",
    labels={"Incident.year": "Year", "Count": "Number of Incidents"}
)
st.plotly_chart(fig1)


In [None]:
fig2 = px.histogram(
    filtered_data,
    x="Site.category",
    color="Injury.severity",
    title="Attack Severity by Site Category",
    labels={"Site.category": "Site Category", "Injury.severity": "Severity"}
)
st.plotly_chart(fig2)


In [None]:
# Check for NaN and invalid values
print(filtered_data["Shark.length.m"].isnull().sum())  # Count NaN values
print(filtered_data["Shark.length.m"].unique())  # Check unique values


In [None]:
# Convert to numeric and handle errors
filtered_data["Shark.length.m"] = pd.to_numeric(filtered_data["Shark.length.m"], errors="coerce")

# Replace NaN values with a default size, such as the mean or a small default value
filtered_data["Shark.length.m"] = filtered_data["Shark.length.m"].fillna(1)  # Replace NaN with 1

# Alternatively, drop rows with NaN in Shark.length.m
# filtered_data = filtered_data.dropna(subset=["Shark.length.m"])


In [None]:
filtered_data["Shark.length.m"].unique()[-1]

In [None]:
fig3 = px.scatter_geo(
    filtered_data,
    lat="Latitude",
    lon="Longitude",
    color="Injury.severity",
    size="Shark.length.m",
    title="Geographic Distribution of Shark Attacks",
    labels={"Latitude": "Latitude", "Longitude": "Longitude"},
    projection="natural earth"
)

fig3.show()

In [None]:
fig4 = px.bar(
    filtered_data,
    x="Victim.activity",
    color="Provoked/unprovoked",
    title="Victim Activities and Provocation",
    labels={"Victim.activity": "Activity", "Provoked/unprovoked": "Provocation Type"}
)
st.plotly_chart(fig4)


In [None]:
fig5 = px.bar(
    filtered_data,
    x="Shark.common.name",
    color="Injury.severity",
    title="Shark Species Involved in Incidents",
    labels={"Shark.common.name": "Shark Species", "Injury.severity": "Severity"}
)
st.plotly_chart(fig5)


In [None]:
# Graph Selection
graph_choice = st.sidebar.selectbox(
    "Select a Graph to Display",
    options=["Shark Attacks Over Time", "Attack Severity by Site Category", 
             "Geographic Distribution", "Victim Activity and Provocation", "Shark Species Involved"]
)

# Display the chosen graph
if graph_choice == "Shark Attacks Over Time":
    st.plotly_chart(fig1)
elif graph_choice == "Attack Severity by Site Category":
    st.plotly_chart(fig2)
elif graph_choice == "Geographic Distribution":
    st.plotly_chart(fig3)
elif graph_choice == "Victim Activity and Provocation":
    st.plotly_chart(fig4)
elif graph_choice == "Shark Species Involved":
    st.plotly_chart(fig5)


In [None]:
streamlit run shark_attacks_app.py
