# Global Earthquake-Tsunami Risk Assessment

## Importing the required libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

## Loading the Dataset

In [2]:
initial_data = pd.read_csv('https://raw.githubusercontent.com/AbhishekBiswas-github/Python_Data_Science/refs/heads/main/Global%20Earthquake-Tsunami%20Risk%20Assessment/earthquake_data_tsunami.csv')
initial_data = np.array(initial_data)
unscaled_data = initial_data[:,:-3]
targets_data = initial_data[:,-1:].astype(int)

## Balancing the datasets

In [3]:
num_one_targets = np.sum(targets_data.flatten() == 1)
num_zero_targets = 0
indicies_deletion = []

for i in range(targets_data.shape[0]):
    if targets_data[i] == 0:
        num_zero_targets += 1
        if num_zero_targets > num_one_targets:
            indicies_deletion.append(i)

unscaled_balanced_input_data = np.delete(unscaled_data, indicies_deletion, axis=0)
balanced_target_data = np.delete(targets_data, indicies_deletion, axis=0)

## Standardizing the inputs

In [4]:
scaled_inputs = preprocessing.scale(unscaled_balanced_input_data)

## Shuffle the data

In [5]:
shuffled_indices = np.arange(scaled_inputs.shape[0])
np.random.shuffle(shuffled_indices)

shuffled_input = scaled_inputs[shuffled_indices]
shuffled_target = balanced_target_data[shuffled_indices]

## Split the dataset into train, validation and test sets

In [6]:
SAMPLE_COUNT = shuffled_input.shape[0]

train_size = int(0.8 * SAMPLE_COUNT)
validation_size = int( 0.1 * SAMPLE_COUNT)
test_size = SAMPLE_COUNT - train_size - validation_size

train_input_data = shuffled_input[:train_size]
train_target_data = shuffled_target[:train_size]

validation_input_data = shuffled_input[train_size: train_size + validation_size]
validation_target_data = shuffled_target[train_size: train_size + validation_size]

test_input_data = shuffled_input[train_size + validation_size:]
test_target_data = shuffled_target[train_size + validation_size:]

print(np.sum(train_target_data), train_size, np.sum(train_target_data) / train_size)
print(np.sum(validation_target_data), validation_size, np.sum(validation_target_data) / validation_size)
print(np.sum(test_target_data), test_size, np.sum(test_target_data) / test_size)

248 486 0.5102880658436214
32 60 0.5333333333333333
24 62 0.3870967741935484


## Saving the datasets in tensorflow format

In [7]:
np.savez("Earthquake_data_train", inputs=train_input_data, targets=train_target_data)
np.savez("Earthquake_data_validate", inputs=validation_input_data, targets=validation_target_data)
np.savez("Earthquake_data_test", inputs=test_input_data, targets=test_target_data)