<a href="https://www.kaggle.com/code/ashishamin/tsunami-prediction-model?scriptVersionId=267541109" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# Data Exploring

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
df=pd.read_csv('/kaggle/input/global-earthquake-tsunami-risk-assessment-dataset/earthquake_data_tsunami.csv')

In [None]:
df.head()

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df_corr=df.corr()

In [None]:
sorted_corr = df_corr['tsunami'].abs().sort_values(ascending=False)

In [None]:
sorted_corr

In [None]:
plt.figure(figsize=(12, 8))  # Adjust size for clarity
sns.heatmap(
    df_corr,
    annot=True,              # Show correlation values
    fmt=".2f",               # Format to 2 decimal places
    cmap="coolwarm",         # Color palette for contrast
    vmin=-1, vmax=1,         # Fix scale for consistency
    linewidths=0.5,          # Add lines between cells
    linecolor='gray',        # Line color
    square=True,             # Make cells square
    cbar_kws={"shrink": 0.8} # Shrink color bar
)
plt.title("Correlation Matrix", fontsize=16)
plt.xticks(rotation=45, ha='right')  # Rotate x-axis labels
plt.yticks(rotation=0)
plt.tight_layout()
plt.show()


In [None]:
tsunami_events = df[df['tsunami']==1]

In [None]:
tsunami_counts_by_year=tsunami_events.groupby('Year').size()

tsunami_counts_by_year

In [None]:
plt.figure(figsize=(12, 6))
tsunami_counts_by_year.plot(kind='bar', color='skyblue')
plt.title("Tsunami Events per Year")
plt.xlabel("Year")
plt.ylabel("Number of Tsunamis")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# Model Training

In [None]:
df.columns

In [None]:
X=df[['magnitude','cdi','mmi','sig','depth','latitude','longitude']]

In [None]:
y=df['tsunami']

In [None]:
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.tree import DecisionTreeRegressor
import warnings
warnings.filterwarnings('ignore')


In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2)

In [None]:
lr=LinearRegression()

In [None]:
lr.fit(X_train,y_train)

In [None]:
lr_pred=lr.predict(X_test)

In [None]:
print(mean_squared_error(y_test,lr_pred),",",mean_absolute_error(y_test,lr_pred))

In [None]:
param_grid={'criterion':['mse','friedman_mse','mae'],
           'splitter':['best','random'],
           'max_depth':[None,10,20,30,40,50],
           'min_samples_split':[2,5,10],
           'min_samples_leaf':[1,2,4]
            }


In [None]:
tree_model=DecisionTreeRegressor()

In [None]:
grid_tree=GridSearchCV(estimator=tree_model,param_grid=param_grid)

In [None]:
grid_tree.best_params_

In [None]:
grid_tree.fit(X_train,y_train)

In [None]:
tree_pred=grid_tree.predict(X_test)

In [None]:
print(mean_squared_error(y_test,tree_pred),',',mean_absolute_error(y_test,tree_pred))