In [None]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
file_path = '/mnt/data/combined_attacks.csv'
data = pd.read_csv(file_path)

# Data Cleaning
data_cleaned = data.dropna(subset=['year', 'animal_type', 'country', 'location', 'activity'])
data_cleaned['attack_type'] = data_cleaned['attack_type'].str.lower()
data_cleaned['fatal_y_n'] = data_cleaned['fatal_y_n'].str.upper()
data_cleaned['date'] = pd.to_datetime(data_cleaned['date'], errors='coerce')

# Analysis and Visualizations

# Comparison of attacks over the years
plt.figure(figsize=(14, 7))
sns.countplot(data=data_cleaned, x='year', hue='animal_type', palette='viridis')
plt.title('Number of Shark and Bear Attacks Over the Years')
plt.xticks(rotation=90)
plt.show()

# Comparison of fatality rates
fatality_rates = data_cleaned.groupby(['animal_type', 'fatal_y_n']).size().unstack().fillna(0)
fatality_rates['Fatal'] = fatality_rates['Y'] / (fatality_rates['Y'] + fatality_rates['N']) * 100
fatality_rates[['Fatal']].plot(kind='bar', figsize=(10, 6), colormap='viridis')
plt.title('Fatality Rates of Shark and Bear Attacks')
plt.ylabel('Fatality Rate (%)')
plt.show()

# Geographic distribution of attacks
plt.figure(figsize=(12, 8))
sns.scatterplot(data=data_cleaned, x='longitude', y='latitude', hue='animal_type', style='animal_type', palette='viridis', s=100, alpha=0.7)
plt.title('Geographic Distribution of Shark and Bear Attacks')
plt.show()

# Activities during attacks
plt.figure(figsize=(14, 7))
sns.countplot(data=data_cleaned, y='activity', hue='animal_type', order=data_cleaned['activity'].value_counts().index, palette='viridis')
plt.title('Activities During Shark and Bear Attacks')
plt.show()

# Distribution of attacks by country
plt.figure(figsize=(14, 7))
top_countries = data_cleaned['country'].value_counts().head(10).index
sns.countplot(data=data_cleaned[data_cleaned['country'].isin(top_countries)], y='country', hue='animal_type', palette='viridis')
plt.title('Top 10 Countries by Number of Shark and Bear Attacks')
plt.show()
