In [1]:
import os
import requests
from datetime import datetime

import pandas as pd
from pandas.io.json import json_normalize

import folium
from folium import plugins

In [2]:
df = pd.read_csv('../data/tsunamis-2022-04-27_23-16-51_-0400.tsv', sep='\t')

In [3]:
df.tail()

Unnamed: 0,Search Parameters,Year,Mo,Dy,Hr,Mn,Sec,Tsunami Event Validity,Tsunami Cause Code,Earthquake Magnitude,...,Total Missing,Total Missing Description,Total Injuries,Total Injuries Description,Total Damage ($Mil),Total Damage Description,Total Houses Destroyed,Total Houses Destroyed Description,Total Houses Damaged,Total Houses Damaged Description
2652,,2022.0,1.0,16.0,,,,4.0,6.0,,...,,,,,,,,,,
2653,,2022.0,1.0,29.0,2.0,46.0,39.0,4.0,1.0,6.5,...,,,,,,,,,,
2654,,2022.0,3.0,16.0,14.0,36.0,33.0,4.0,1.0,7.3,...,,,107.0,3.0,,2.0,,,,
2655,,2022.0,3.0,30.0,20.0,56.0,58.0,4.0,1.0,6.9,...,,,,,,,,,,
2656,,2022.0,3.0,31.0,5.0,44.0,1.0,4.0,1.0,7.0,...,,,,,,,,,,


In [4]:
df.shape

(2657, 46)

In [5]:
for col in df.columns:
    unique_values = df[col].nunique()
    print(f"{col}: {unique_values}")

Search Parameters: 1
Year: 456
Mo: 12
Dy: 31
Hr: 24
Mn: 60
Sec: 361
Tsunami Event Validity: 6
Tsunami Cause Code: 12
Earthquake Magnitude: 52
Vol: 162
More Info: 0
Deposits: 19
Country: 111
Location Name: 968
Latitude: 1407
Longitude: 1631
Maximum Water Height (m): 240
Number of Runups: 113
Tsunami Magnitude (Abe): 1
Tsunami Magnitude (Iida): 64
Tsunami Intensity: 32
Deaths: 119
Death Description: 4
Missing: 4
Missing Description: 3
Injuries: 47
Injuries Description: 4
Damage ($Mil): 41
Damage Description: 4
Houses Destroyed: 67
Houses Destroyed Description: 4
Houses Damaged: 14
Houses Damaged Description: 4
Total Deaths: 231
Total Death Description: 4
Total Missing: 10
Total Missing Description: 4
Total Injuries: 161
Total Injuries Description: 4
Total Damage ($Mil): 109
Total Damage Description: 4
Total Houses Destroyed: 163
Total Houses Destroyed Description: 4
Total Houses Damaged: 76
Total Houses Damaged Description: 4


### That's a lot of columns; certainly more than I need. Also, the ones before there was good earthquake data are nice to know historically but basically useless for my purposes. Long term records start at about 1990, so I'll use that as a cutoff for now. Also, I'm only using `Event Validity` of 4 for now. Beyond that, I can drop a lot of the damage, death and missing people columns.

In [22]:
df.shape

(339, 19)

In [23]:
df = df[df['Year'] >= 1990]


KeyError: 'Year'

In [None]:
df = df[df['Tsunami Event Validity'] == 4]


In [None]:
df.head(3)


In [21]:





cols = df.columns.to_list()
cols

drop_cols = [ 'Deaths',
 'Death Description',
 'Missing',
 'Missing Description',
 'Injuries',
 'Injuries Description',
 'Damage ($Mil)',
 'Damage Description',
 'Houses Destroyed',
 'Houses Destroyed Description',
 'Houses Damaged',
 'Houses Damaged Description',
 'Total Deaths',
 'Total Death Description',
 'Total Missing',
 'Total Missing Description',
 'Total Injuries',
 'Total Injuries Description',
 'Total Damage ($Mil)',
 'Total Damage Description',
 'Total Houses Destroyed',
 'Total Houses Destroyed Description',
 'Total Houses Damaged',
 'Total Houses Damaged Description',
 'More Info',
 'Search Parameters',
 'Vol',
 'Deposits'
]

KeyError: 'Year'

In [12]:
df = df.drop(columns = drop_cols)

In [13]:
df.columns

Index(['Year', 'Mo', 'Dy', 'Hr', 'Mn', 'Sec', 'Tsunami Event Validity',
       'Tsunami Cause Code', 'Earthquake Magnitude', 'Country',
       'Location Name', 'Latitude', 'Longitude', 'Maximum Water Height (m)',
       'Number of Runups', 'Tsunami Magnitude (Abe)',
       'Tsunami Magnitude (Iida)', 'Tsunami Intensity'],
      dtype='object')

In [14]:
df.dtypes

Year                        float64
Mo                          float64
Dy                          float64
Hr                          float64
Mn                          float64
Sec                         float64
Tsunami Event Validity      float64
Tsunami Cause Code          float64
Earthquake Magnitude        float64
Country                      object
Location Name                object
Latitude                    float64
Longitude                   float64
Maximum Water Height (m)    float64
Number of Runups            float64
Tsunami Magnitude (Abe)     float64
Tsunami Magnitude (Iida)    float64
Tsunami Intensity           float64
dtype: object

In [15]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 339 entries, 2232 to 2656
Data columns (total 18 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Year                      339 non-null    float64
 1   Mo                        339 non-null    float64
 2   Dy                        339 non-null    float64
 3   Hr                        319 non-null    float64
 4   Mn                        317 non-null    float64
 5   Sec                       304 non-null    float64
 6   Tsunami Event Validity    339 non-null    float64
 7   Tsunami Cause Code        339 non-null    float64
 8   Earthquake Magnitude      300 non-null    float64
 9   Country                   339 non-null    object 
 10  Location Name             339 non-null    object 
 11  Latitude                  332 non-null    float64
 12  Longitude                 332 non-null    float64
 13  Maximum Water Height (m)  327 non-null    float64
 14  Number

In [16]:
df2 = df[df[['Year', 'Mo', 'Dy', 'Hr', 'Mn', 'Sec']].isna().any(axis=1)]
df2

Unnamed: 0,Year,Mo,Dy,Hr,Mn,Sec,Tsunami Event Validity,Tsunami Cause Code,Earthquake Magnitude,Country,Location Name,Latitude,Longitude,Maximum Water Height (m),Number of Runups,Tsunami Magnitude (Abe),Tsunami Magnitude (Iida),Tsunami Intensity
2249,1992.0,1.0,5.0,6.0,,,4.0,1.0,3.7,CHINA,SOUTH CHINA SEA,18.0,108.0,0.8,5.0,,-0.3,
2271,1994.0,4.0,28.0,11.0,30.0,,4.0,8.0,,NORWAY,WESTERN NORWAY,61.267,6.583,4.0,1.0,,,4.0
2279,1994.0,9.0,19.0,,,,4.0,6.0,,PAPUA NEW GUINEA,PAPUA NEW GUINEA,-4.238,152.214,3.0,3.0,,1.2,
2283,1994.0,11.0,4.0,4.0,12.0,,4.0,8.0,,USA,"SKAGWAY, AK",59.5,-135.3,9.0,2.0,,3.5,2.5
2306,1996.0,1.0,1.0,,,,4.0,8.0,,GREECE,W. CORINTH GULF,38.367,22.083,2.0,1.0,,,2.0
2307,1996.0,1.0,2.0,15.0,40.0,,4.0,6.0,,RUSSIA,"KARYMSKOYE, KAMCHATKA, RUSSIA",54.05,159.43,30.0,1.0,,,
2330,1997.0,12.0,26.0,8.0,,,4.0,7.0,,MONTSERRAT,WHITE RIVER VALLEY,16.72,-62.18,3.0,1.0,,,
2331,1998.0,3.0,19.0,18.0,30.0,,4.0,8.0,,NORWAY,WESTERN NORWAY,61.183,5.267,6.0,1.0,,,4.0
2337,1999.0,1.0,20.0,,,,4.0,6.0,,MONTSERRAT,SOUFRIERE HILLS VOLCANO,16.722,-62.18,2.0,3.0,,,
2339,1999.0,9.0,13.0,,,,4.0,8.0,,FRENCH POLYNESIA,FATU HIVA,-10.52,-138.67,8.0,9.0,,,


In [17]:
df2.shape

(35, 18)

In [18]:
df[['Year', 'Mo', 'Dy', 'Hr', 'Mn', 'Sec']] = df[['Year', 'Mo', 'Dy', 'Hr', 'Mn', 'Sec']].fillna(0).astype(int)
df

Unnamed: 0,Year,Mo,Dy,Hr,Mn,Sec,Tsunami Event Validity,Tsunami Cause Code,Earthquake Magnitude,Country,Location Name,Latitude,Longitude,Maximum Water Height (m),Number of Runups,Tsunami Magnitude (Abe),Tsunami Magnitude (Iida),Tsunami Intensity
2232,1990,2,8,7,15,32,4.0,1.0,6.6,PHILIPPINES,PHILIPPINES,9.755,124.694,2.10,0.0,,,
2233,1990,2,20,6,53,39,4.0,1.0,6.4,JAPAN,JAPAN,34.706,139.252,0.16,1.0,,-1.7,
2234,1990,3,25,13,22,55,4.0,1.0,7.0,COSTA RICA,COSTA RICA,9.919,-84.808,1.00,4.0,,,
2235,1990,4,5,21,12,35,4.0,1.0,7.5,USA TERRITORY,"MARIANA TRENCH, N. MARIANA ISLANDS",15.125,147.596,1.80,34.0,,-1.0,
2238,1990,9,23,21,13,7,4.0,1.0,6.5,JAPAN,"W. OF BONIN ISLANDS, IZU IS., JAPAN",33.267,138.643,0.20,4.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2652,2022,1,16,0,0,0,4.0,6.0,,TONGA,TONGA ISLANDS,-20.536,-175.382,0.36,1.0,,,
2653,2022,1,29,2,46,39,4.0,1.0,6.5,NEW ZEALAND,KERMADEC ISLANDS,-29.531,-176.749,0.10,1.0,,,
2654,2022,3,16,14,36,33,4.0,1.0,7.3,JAPAN,OFF FUKUSHIMA PREFECTURE,37.702,141.587,0.30,5.0,,,
2655,2022,3,30,20,56,58,4.0,1.0,6.9,NEW CALEDONIA,LOYALTY ISLANDS,-22.716,170.286,0.07,3.0,,,


In [19]:
df.rename(columns = {'Year': 'year', 'Mo': "month", 'Dy': 'day', 'Hr': 'hour', 'Mn': 'minute', 'Sec': 'second'}, inplace=True)

In [20]:
df['datetime'] = pd.to_datetime(df[['year', 'month', 'day', 'hour', 'minute', 'second']])