## USGS RECTANGULAR DATA Latitude: [-57.136, 61.939] , Longitude: [-257.695, -65]  between 1993 - Now


In [1]:
import pandas as pd

# List to store dataframes
dfs = []

# Loop over the years from 1993 to 2024 and construct file paths dynamically
for year in range(1993, 2025):
    # Handle the special case of 2024 where data ends at 2024-09-09
    if year == 2024:
        file_path = f"../../data/raw/usgs-data/circum-pacific-seismic-belt/{year}-01-01_2024-09-09.csv"
    else:
        file_path = f"../../data/raw/usgs-data/circum-pacific-seismic-belt/{year}-01-01_{year}-12-31.csv"
    
    # Read each year's CSV and append it to the list
    try:
        df = pd.read_csv(file_path)
        dfs.append(df)
        print(f"Loaded: {file_path}")
    except FileNotFoundError:
        print(f"File not found: {file_path}")
        continue

# Concatenate all DataFrames into one
full_earthquake_data = pd.concat(dfs, ignore_index=True)

unique_earthquake_data = full_earthquake_data.drop_duplicates(subset='id')
unique_earthquake_data.rename(columns={
    'id': 'Event ID',
    'time': 'Datetime',
    'latitude': 'Latitude',
    'longitude': 'Longitude',
    'depth': 'Depth(km)',
    'mag': 'xM'
}, inplace=True)

unique_earthquake_data['Datetime'] = pd.to_datetime(unique_earthquake_data['Datetime']).dt.strftime('%d/%m/%Y %H:%M:%S')
df_converted =  unique_earthquake_data[['Event ID', 'Datetime', 'Latitude', 'Longitude', 'Depth(km)', 'xM']]
df_converted

Loaded: ../../data/raw/usgs-data/circum-pacific-seismic-belt/1993-01-01_1993-12-31.csv
File not found: ../../data/raw/usgs-data/circum-pacific-seismic-belt/1994-01-01_1994-12-31.csv
Loaded: ../../data/raw/usgs-data/circum-pacific-seismic-belt/1995-01-01_1995-12-31.csv
Loaded: ../../data/raw/usgs-data/circum-pacific-seismic-belt/1996-01-01_1996-12-31.csv
Loaded: ../../data/raw/usgs-data/circum-pacific-seismic-belt/1997-01-01_1997-12-31.csv
Loaded: ../../data/raw/usgs-data/circum-pacific-seismic-belt/1998-01-01_1998-12-31.csv
Loaded: ../../data/raw/usgs-data/circum-pacific-seismic-belt/1999-01-01_1999-12-31.csv
Loaded: ../../data/raw/usgs-data/circum-pacific-seismic-belt/2000-01-01_2000-12-31.csv
Loaded: ../../data/raw/usgs-data/circum-pacific-seismic-belt/2001-01-01_2001-12-31.csv
Loaded: ../../data/raw/usgs-data/circum-pacific-seismic-belt/2002-01-01_2002-12-31.csv
Loaded: ../../data/raw/usgs-data/circum-pacific-seismic-belt/2003-01-01_2003-12-31.csv
Loaded: ../../data/raw/usgs-data/ci

Unnamed: 0,Event ID,Datetime,Latitude,Longitude,Depth(km),xM
0,usp0005k5r,01/01/1993 02:33:07,3.8340,126.4750,86.100,4.4
1,usp0005k5s,01/01/1993 02:54:42,35.1960,111.0310,10.000,3.9
2,usp0005k5u,01/01/1993 03:46:33,-31.9260,-178.0700,33.000,5.1
3,ak9931pzpqo,01/01/1993 04:12:28,60.2248,-152.7263,10.200,3.1
4,usp0005k5y,01/01/1993 04:30:08,53.2260,157.6020,169.700,4.2
...,...,...,...,...,...,...
416413,ak024bmotrix,09/09/2024 16:55:41,60.0269,-146.2638,7.800,3.3
416414,us6000nqqj,09/09/2024 17:24:09,-31.7067,-72.2578,10.000,4.4
416415,us6000nqqz,09/09/2024 17:37:09,-6.2723,-107.1024,10.000,5.5
416416,us6000nqre,09/09/2024 19:31:28,17.8394,119.8370,10.000,4.5


In [2]:
df_converted.to_csv("../../data/cleaned/usgs_pacific_belt_93_now_rectangular_region.csv")

In [3]:
from src.util import earthquake_filter_utils
df_large = earthquake_filter_utils.magnitude_filter(df_converted, 5.5)
df_large

Unnamed: 0,Event ID,Datetime,Latitude,Longitude,Depth(km),xM
41,usp0005k8d,03/01/1993 04:23:34,-1.5130,119.6790,37.200,5.7
50,usp0005k8x,03/01/1993 17:08:13,-4.0640,129.5120,53.700,5.8
58,usp0005k97,04/01/1993 01:26:17,3.2660,128.0940,72.300,5.9
76,usp0005ka8,04/01/1993 20:41:11,-22.0550,-174.8660,33.000,6.0
112,usp0005kdw,07/01/1993 12:02:10,-32.0310,-178.0730,10.000,5.6
...,...,...,...,...,...,...
416319,us6000npu5,05/09/2024 10:38:26,-19.1251,-67.3038,222.966,5.7
416334,us6000nq0y,06/09/2024 10:12:58,-28.6861,-69.5171,123.000,5.7
416367,us6000nqd8,07/09/2024 22:39:07,-21.2926,-173.7204,10.000,6.0
416377,us6000nqf4,08/09/2024 05:01:41,-7.4177,156.1219,42.328,5.5


In [4]:
df_converted['Datetime'] = pd.to_datetime(df_converted['Datetime'], format="%d/%m/%Y %H:%M:%S")

df_converted =  (df_converted[df_converted['Datetime'] >= pd.Timestamp('1993-01-01')]).sort_values('Datetime', ascending=True)
df_converted

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_converted['Datetime'] = pd.to_datetime(df_converted['Datetime'], format="%d/%m/%Y %H:%M:%S")


Unnamed: 0,Event ID,Datetime,Latitude,Longitude,Depth(km),xM
0,usp0005k5r,1993-01-01 02:33:07,3.8340,126.4750,86.100,4.4
1,usp0005k5s,1993-01-01 02:54:42,35.1960,111.0310,10.000,3.9
2,usp0005k5u,1993-01-01 03:46:33,-31.9260,-178.0700,33.000,5.1
3,ak9931pzpqo,1993-01-01 04:12:28,60.2248,-152.7263,10.200,3.1
4,usp0005k5y,1993-01-01 04:30:08,53.2260,157.6020,169.700,4.2
...,...,...,...,...,...,...
416413,ak024bmotrix,2024-09-09 16:55:41,60.0269,-146.2638,7.800,3.3
416414,us6000nqqj,2024-09-09 17:24:09,-31.7067,-72.2578,10.000,4.4
416415,us6000nqqz,2024-09-09 17:37:09,-6.2723,-107.1024,10.000,5.5
416416,us6000nqre,2024-09-09 19:31:28,17.8394,119.8370,10.000,4.5


### Adding X, Y, Z Cartesian coordinates to df.

In [5]:
from src.util.earthquake_efficient_filter_utils import lat_lon_to_cartesian_vectorized

df_converted = lat_lon_to_cartesian_vectorized(df_converted)
df_converted

Unnamed: 0,Event ID,Datetime,Latitude,Longitude,Depth(km),xM,X,Y,Z
0,usp0005k5r,1993-01-01 02:33:07,3.8340,126.4750,86.100,4.4,-3778.904702,5111.559593,426.003261
1,usp0005k5s,1993-01-01 02:54:42,35.1960,111.0310,10.000,3.9,-1868.395728,4859.477002,3672.086828
2,usp0005k5u,1993-01-01 03:46:33,-31.9260,-178.0700,33.000,5.1,-5404.202872,-182.108671,-3369.134719
3,ak9931pzpqo,1993-01-01 04:12:28,60.2248,-152.7263,10.200,3.1,-2812.097715,-1449.797564,5529.903658
4,usp0005k5y,1993-01-01 04:30:08,53.2260,157.6020,169.700,4.2,-3526.328461,1453.303707,5103.190857
...,...,...,...,...,...,...,...,...,...
416413,ak024bmotrix,2024-09-09 16:55:41,60.0269,-146.2638,7.800,3.3,-2646.918149,-1767.692156,5518.942811
416414,us6000nqqj,2024-09-09 17:24:09,-31.7067,-72.2578,10.000,4.4,1651.700170,-5162.330198,-3348.413725
416415,us6000nqqz,2024-09-09 17:37:09,-6.2723,-107.1024,10.000,5.5,-1862.370491,-6052.827735,-696.055716
416416,us6000nqre,2024-09-09 19:31:28,17.8394,119.8370,10.000,4.5,-3017.383766,5260.768572,1951.755682


In [6]:
from src.data import preprocess
import torch

X_test, y_test = preprocess.create_classification_data(df_converted, df_large)
torch.sum(y_test == 1).item() # Shows total number of earthquake samples with label 1.

Error processing event ID usp0005k8d: PAST EARTHQUAKE ERROR: usp0005k8d with only 0 past earthquakes available.
Error processing event ID usp0005k8x: PAST EARTHQUAKE ERROR: usp0005k8x with only 0 past earthquakes available.
Error processing event ID usp0005k97: PAST EARTHQUAKE ERROR: usp0005k97 with only 0 past earthquakes available.
Error processing event ID usp0005ka8: PAST EARTHQUAKE ERROR: usp0005ka8 with only 0 past earthquakes available.
Error processing event ID usp0005kdw: PAST EARTHQUAKE ERROR: usp0005kdw with only 0 past earthquakes available.
Error processing event ID usp0005kdx: PAST EARTHQUAKE ERROR: usp0005kdx with only 1 past earthquakes available.
Error processing event ID usp0005ke8: PAST EARTHQUAKE ERROR: usp0005ke8 with only 0 past earthquakes available.
Error processing event ID usp0005kh7: PAST EARTHQUAKE ERROR: usp0005kh7 with only 0 past earthquakes available.
Error processing event ID usp0005kk2: PAST EARTHQUAKE ERROR: usp0005kk2 with only 0 past earthquakes ava

1043

### We have 1043 samples of 1s with aftershocks included for default number_of_earthquakes=30

In [7]:
from src.data import preprocess
import torch

X_test2, y_test2 = preprocess.create_classification_data(df_converted, df_large, num_earthquakes=20)
torch.sum(y_test2 == 1).item() # Shows total number of earthquake samples with label 1.

Error processing event ID usp0005k8d: PAST EARTHQUAKE ERROR: usp0005k8d with only 0 past earthquakes available.
Error processing event ID usp0005k8x: PAST EARTHQUAKE ERROR: usp0005k8x with only 0 past earthquakes available.
Error processing event ID usp0005k97: PAST EARTHQUAKE ERROR: usp0005k97 with only 0 past earthquakes available.
Error processing event ID usp0005ka8: PAST EARTHQUAKE ERROR: usp0005ka8 with only 0 past earthquakes available.
Error processing event ID usp0005kdw: PAST EARTHQUAKE ERROR: usp0005kdw with only 0 past earthquakes available.
Error processing event ID usp0005kdx: PAST EARTHQUAKE ERROR: usp0005kdx with only 1 past earthquakes available.
Error processing event ID usp0005ke8: PAST EARTHQUAKE ERROR: usp0005ke8 with only 0 past earthquakes available.
Error processing event ID usp0005kh7: PAST EARTHQUAKE ERROR: usp0005kh7 with only 0 past earthquakes available.
Error processing event ID usp0005kk2: PAST EARTHQUAKE ERROR: usp0005kk2 with only 0 past earthquakes ava

1792

In [None]:
from src.util.earthquake_efficient_filter_utils import remove_all_aftershocks_from_data_numpy 
df_without_aftershocks = remove_all_aftershocks_from_data_numpy(df_converted)
df_without_aftershocks