In [31]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import matplotlib.font_manager
import seaborn as sns
%matplotlib inline
from matplotlib.pyplot import figure
import plotly.express as px
from scipy import stats
import plotly.graph_objects as go

In [None]:
from google.colab import drive
drive.mount ('/content/drive')

In [None]:
%load_ext google.colab.data_table

In [None]:
shark_attack = pd.read_csv("/content/drive/MyDrive/RawData/attacks.csv", encoding='latin-1')
shark_attack.loc[0:,'Date':'Species ']

In [None]:
# Welke kolommen heeft mijn dataset?
shark_attack.columns

In [None]:
# Question 1: What are the most dangerous types of sharks to humans?
# Assuming the sharks with most attacks are the ones who are most dangerous to people.
# The result also shows figures that do not directly indicate the type of shark, but the table does show which species has launched the most number of attacks.

#Rename column name
shark_attack = shark_attack.rename(columns={'Species ':'Species'})
# species that attack most
species_attack = shark_attack.groupby('Species')['Species'].count().sort_values(ascending=False)[1:25]

# Create a bar chart
data = go.Bar(x = species_attack.index,y=species_attack.values,text=species_attack.values, marker_color='pink')

layout = go.Layout(title = 'Shark Attack by Species',
                   xaxis=dict(title='Species'),
                   yaxis=dict(title='Attack Count',visible=False),
                  )
fig = go.Figure(
    data=data,
    layout=layout
)
fig.show()

In [None]:
# Question 2: Are children more likely to be attacked by sharks?
# To answer this question, a column has been added to distinguish between shark attacks on children (in this case under 18 years old) and adults (18 years and older).
# Ultimately, the graph shows that the number of attacks on children is much lower than the number of attacks on people aged 18 and over.

# Adding column Child
shark_attack['Child'] = shark_attack['Age'] < '18'

# For visualization purposes, adjust the position of the column 'Child'
shark_attack.insert(11, 'Child', shark_attack.pop('Child'))
shark_attack.loc[0:100,'Date':'Child']

# Create a bar chart of the 'Child' column
attack_on_children = shark_attack['Child'].value_counts()
attack_on_children.plot(kind='bar')

# Labels graph
plt.xlabel('Number of attacks on Child')
plt.ylabel('Aantal')
plt.xticks([0, 1], ['False', 'True'])

plt.show()

In [None]:
# Question 3: Are shark attacks where sharks were provoked more or less dangerous?
# I want to see a total number of attacks in my line graph and add the numbers of provoked attacks and unprovoked attacks to see a clear difference.
# In the graph you can see a big difference between unprovoced attacks and provoced attacks.
# Given the large difference between provoked and unprovoked attacks, I would assume that even if you don't provoke an attack, you still have a high chance of being attacked by a shark.
# Not to mention the fact that I wouldn't provoke a shark to attack anyway.

# To make the graph easier to read, only data from 1800 onwards is shown. Few attacks took place before this.
shark_attack = shark_attack[shark_attack['Year'] >= 1800]

# Calculate the number of attacks per year
year = shark_attack['Year'].value_counts().sort_index()

# Calculate the number of 'Provoked' attacks per year
provoked_attacks = shark_attack[shark_attack['Type'] == 'Provoked']['Year'].value_counts().sort_index()

# Calculate the number of 'Unprovoked' attacks per year
unprovoked_attacks = shark_attack[shark_attack['Type'] == 'Unprovoked']['Year'].value_counts().sort_index()

# Make a line graph
plt.figure(figsize=(20, 6))

plt.plot(year.index, year.values, 'r-', label='Total Attacks')
plt.plot(provoked_attacks.index, provoked_attacks.values, 'y-', label='Provoked attacks')
plt.plot(unprovoked_attacks.index, unprovoked_attacks.values, 'g-', label='Unprovoked attacks')

plt.xlabel('Year')
plt.ylabel('Total Attacks')
plt.title('Attacks since 1800')

plt.legend()

plt.show()

In [None]:
# Question 4: Are certain activities more likely to result in a shark attack?
# To gain a clear insight, the various activities from the data are compared with each other to come to an answer
# Given the outcome, I wouldn't go fishing

attack_activity = shark_attack[shark_attack.Type == 'Provoked'].groupby('Activity')['Activity'].count().sort_values(ascending=False)[:10]

fig = px.bar(attack_activity, x=attack_activity.values, y=attack_activity.index, orientation='h', labels={'index':'','x':'Attack Count'},
            title = 'Which activity creates the greatest risk of shark attack')
fig.update_layout(height=500, width=800)
fig.show()