<h1><center>🌊 Tsunami Data Interactive EDA </center></h1>

<img src="https://images.unsplash.com/photo-1590309282186-fdcaa7b907f9?ixlib=rb-1.2.1&ixid=eyJhcHBfaWQiOjEyMDd9&auto=format&fit=crop&w=1500&q=80">

## 1. Introduction

#### This notebook is my attempt to visualize the [Tsunami Dataset](https://www.kaggle.com/andrewmvd/tsunami-dataset) compiled by [Larxel](https://www.kaggle.com/andrewmvd). This dataset contains some details all the tsunamis in the recorded history.

### #1.1 Libraries 📚⬇

In [None]:
import os, math
import numpy as np
import pandas as pd
import geopandas as gpd

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")

import folium
from folium import Choropleth, Circle, Marker
from folium.plugins import HeatMap, MarkerCluster

### #1.2  Read CSV Data 📝

In [None]:
df = pd.read_csv("../input/tsunami-dataset/tsunami_dataset.csv")
df

### #1.3 Data Pre-processing ⚙️

In [None]:
# Drop rows with missing locations
df = df.dropna(subset=['LATITUDE', 'LONGITUDE'])

# Filter dataframe from the 19th century onwards
df = df[df.YEAR>=1800]

# Get positive tsunami intensity values to vary folium circle sizes
df['TS_INTENSITY'].fillna(df['TS_INTENSITY'].median(), inplace=True)
df['TS_INTENSITY_PLOT'] = df['TS_INTENSITY'] - df['TS_INTENSITY'].min() + 0.5
df

## 2. Visualization 📉

### #2.1 Plotting points

### Plotting using `folium.Marker`

In [None]:
map_ = folium.Map(location=[0.78, 113.92], tiles='openstreetmap', zoom_start=4.8)

for idx, row in df.iterrows():
    Marker([row['LATITUDE'], row['LONGITUDE']], icon=folium.Icon(color='blue', icon='tint', prefix='fa')).add_to(map_)

map_

### Plotting using `folium.plugins.MarkerCluster` to help declutter the map

In [None]:
map_ = folium.Map(location=[0.0, 0.0], tiles='cartodbpositron', zoom_start=1.6)

mc = MarkerCluster()
for idx, row in df.iterrows():
    mc.add_child(Marker([row['LATITUDE'], row['LONGITUDE']]))

map_.add_child(mc)
map_

### Bubble map

In [None]:
map_ = folium.Map(location=[0.78, 113.92], tiles='openstreetmap', zoom_start=4.8)

for i in range(0,len(df)):
    Circle(
        location=[df.iloc[i]['LATITUDE'], df.iloc[i]['LONGITUDE']],
        radius=20000,
        color='green').add_to(map_)

map_

### Bubble map (low / high tsunami intensity)

In [None]:
map_ = folium.Map(location=[0, 0], tiles='openstreetmap', zoom_start=2.4)

for i in range(0,len(df)):
    Circle(
        location=[df.iloc[i]['LATITUDE'], df.iloc[i]['LONGITUDE']],
        radius=df.iloc[i]['TS_INTENSITY_PLOT']*10000,
        color='yellow', fill=True).add_to(map_)

map_

### Heatmap

In [None]:
map_ = folium.Map(location=[0.0, 0.0], tiles='cartodbpositron', zoom_start=1.5)

HeatMap(data=df[['LATITUDE', 'LONGITUDE']], radius=10).add_to(map_)

# Display the map
map_

### Country vs. # Tsunamis

In [None]:
plt.figure(figsize=(12,24))
countries = list(map(lambda x: x.title(), df["COUNTRY"].value_counts().keys().tolist()))
ax = sns.countplot(y="COUNTRY", data=df, order=df["COUNTRY"].value_counts().index)
ax.set_xscale("log")
ax.axes.set_title("Country vs. # Tsunamis (Log Scale)",fontsize=18)
ax.set_xlabel("Number of Tsunamis (Log Scale)",fontsize=16)
ax.set_ylabel("Country",fontsize=16)
ax.tick_params(labelsize=12)
ax.set_yticklabels(countries, rotation=0, fontsize=12)
plt.tight_layout()
plt.show()

### Cause vs. # Tsunamis

In [None]:
plt.figure(figsize=(6,8))
cause = list(map(lambda x: x.title(), df["CAUSE"].value_counts().keys().tolist()))
ax = sns.countplot(y="CAUSE", data=df, order=df["CAUSE"].value_counts().index)
ax.set_xscale("log")
ax.axes.set_title("Cause vs. # Tsunamis (Log Scale)",fontsize=18)
ax.set_xlabel("Number of Tsunamis (Log Scale)",fontsize=16)
ax.set_ylabel("Cause",fontsize=16)
ax.tick_params(labelsize=12)
ax.set_yticklabels(cause, rotation=0, fontsize=12)
plt.tight_layout()
plt.show()

### Month vs. # Tsunamis

In [None]:
plt.figure(figsize=(6,4))
month_idxs = np.array(list(map(lambda x: int(x), df["MONTH"].value_counts().keys().tolist())))-1
month_names = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
months = list(map(month_names.__getitem__, month_idxs))
ax = sns.countplot(y="MONTH", data=df, order=df["MONTH"].value_counts().index)
ax.axes.set_title("Month vs. # Tsunamis",fontsize=18)
ax.set_xlabel("Number of Tsunamis",fontsize=16)
ax.set_ylabel("Month",fontsize=16)
ax.tick_params(labelsize=12)
ax.set_yticklabels(months, rotation=0, fontsize=12)
plt.tight_layout()
plt.show()

### Earthquake Magnitude vs. # Tsunamis

In [None]:
plt.figure(figsize=(8,6))
ax = sns.distplot(df["EQ_MAGNITUDE"], kde=False)
ax.axes.set_title("Earthquake Magnitude vs. # Tsunamis",fontsize=18)
ax.set_xlabel("Earthquake Magnitude",fontsize=16)
ax.set_ylabel("# Tsunamis",fontsize=16)
ax.tick_params(labelsize=12)
plt.tight_layout()
plt.show()

### Tsunami Intensity vs. # Tsunamis

In [None]:
plt.figure(figsize=(8,6))
ax = sns.distplot(df["TS_INTENSITY"], kde=False)
ax.axes.set_title("Tsunami Intensity vs. # Tsunamis",fontsize=18)
ax.set_xlabel("Tsunami Intensity",fontsize=16)
ax.set_ylabel("# Tsunamis",fontsize=16)
ax.tick_params(labelsize=12)
# ax.set_yticklabels(cause, rotation=0, fontsize=12)
plt.tight_layout()
plt.show()

### Earthquake Depth vs. # Tsunamis

In [None]:
plt.figure(figsize=(8,6))
ax = sns.distplot(df["EQ_DEPTH"], kde=False)
ax.axes.set_title("Earthquake Depth vs. # Tsunamis",fontsize=18)
ax.set_xlabel("Earthquake Magnitude",fontsize=16)
ax.set_ylabel("# Tsunamis",fontsize=16)
ax.tick_params(labelsize=12)
plt.tight_layout()
plt.show()

### Event Validity vs. # Tsunamis

In [None]:
plt.figure(figsize=(10,5))
ax = sns.countplot(y="EVENT_VALIDITY", data=df, order=df["EVENT_VALIDITY"].value_counts().index)
ax.set_xscale("log")
ax.axes.set_title("Event Validity vs. # Tsunamis (Log Scale)",fontsize=18)
ax.set_xlabel("Number of Tsunamis (Log Scale)",fontsize=16)
ax.set_ylabel("Event Validity",fontsize=16)
ax.tick_params(labelsize=12)
plt.tight_layout()
plt.show()

### Total Damage vs. # Tsunamis

In [None]:
plt.figure(figsize=(10,4))
ax = sns.countplot(y="DAMAGE_TOTAL_DESCRIPTION", data=df, order=df["DAMAGE_TOTAL_DESCRIPTION"].value_counts().index)
ax.set_xscale("log")
ax.axes.set_title("Total Damage vs. # Tsunamis (Log Scale)",fontsize=18)
ax.set_xlabel("Number of Tsunamis (Log Scale)",fontsize=16)
ax.set_ylabel("Total Damage",fontsize=16)
ax.tick_params(labelsize=12)
# ax.set_yticklabels(countries, rotation=0, fontsize=12)
plt.tight_layout()
plt.show()

### House Damage vs. # Tsunamis

In [None]:
plt.figure(figsize=(10,4))
ax = sns.countplot(y="HOUSES_TOTAL_DESCRIPTION", data=df, order=df["HOUSES_TOTAL_DESCRIPTION"].value_counts().index)
ax.set_xscale("log")
ax.axes.set_title("House Damage vs. # Tsunamis (Log Scale)",fontsize=18)
ax.set_xlabel("Number of Tsunamis (Log Scale)",fontsize=16)
ax.set_ylabel("House Damage",fontsize=16)
ax.tick_params(labelsize=12)
plt.tight_layout()
plt.show()

### Deaths vs. # Tsunamis

In [None]:
plt.figure(figsize=(10,4))
ax = sns.countplot(y="DEATHS_TOTAL_DESCRIPTION", data=df, order=df["DEATHS_TOTAL_DESCRIPTION"].value_counts().index)
ax.set_xscale("log")
ax.axes.set_title("Deaths vs. # Tsunamis (Log Scale)",fontsize=18)
ax.set_xlabel("Number of Tsunamis (Log Scale)",fontsize=16)
ax.set_ylabel("Deaths",fontsize=16)
ax.tick_params(labelsize=12)
plt.tight_layout()
plt.show()