In [2]:
import pandas as pd
from obspy.geodetics import locations2degrees, degrees2kilometers
from tqdm import tqdm

In [10]:
# Load all event data into a list of DataFrames
event_files = [
    '../data/datasets_nwa_shelf_trench/new_events.csv',
    '../data/datasets_nwa_shore/new_events.csv',
    '../data/datasets_or_shelf_trench/new_events.csv',
    '../data/datasets_or_shore/new_events.csv',
    '../data/datasets_pnsn_jdf/new_events.csv',
    '../data/datasets_pnsn_nor/new_events.csv',
    '../data/datasets_pnsn_sor/new_events.csv',
    '../data/datasets_pnsn_wa/new_events.csv',
    '../data/datasets_swa_shelf_trench/new_events.csv',
    '../data/datasets_swa_shore/new_events.csv'
]

# Read all event files into DataFrames
dfs = [pd.read_csv(file, index_col=0) for file in event_files]

# Concatenate all DataFrames into a single DataFrame
merged_df = pd.concat(dfs)

# Sort the DataFrame by 'datetime'
merged_df['datetime'] = pd.to_datetime(merged_df['datetime'])
merged_df = merged_df.sort_values(by='datetime').reset_index(drop=True)

# Define thresholds
time_threshold = 10
dist_threshold = 25  # in kilometers

# Function to calculate distance between two events
def calculate_distance(lat1, lon1, lat2, lon2):
    degrees = locations2degrees(lat1, lon1, lat2, lon2)
    return degrees2kilometers(degrees)



# Initialize a list to collect indices of rows to be dropped
rows_to_drop = []

for i in tqdm(range(len(merged_df)), total=len(merged_df)):
    if i in rows_to_drop:
        continue

    event = merged_df.loc[i]
    t1 = event['datetime']
    olat = event['latitude']
    olon = event['longitude']
    condition = (merged_df['datetime'] >= t1 - pd.Timedelta(seconds=time_threshold)) & \
                (merged_df['datetime'] <= t1 + pd.Timedelta(seconds=time_threshold)) & \
                (degrees2kilometers(locations2degrees(olat, olon, merged_df['latitude'], merged_df['longitude'])) <= dist_threshold) & \
                (merged_df.index != i)   
    rows_to_drop = rows_to_drop + merged_df.loc[condition].index.tolist()

# Make rows_to_drop unique
rows_to_drop = list(set(rows_to_drop))

# Drop the collected rows
merged_df.drop(rows_to_drop, inplace=True)

# Reset the index to ensure it is sequential
merged_df.reset_index(drop=True, inplace=True)

100%|██████████| 41630/41630 [04:17<00:00, 161.83it/s]


In [11]:
merged_df 

Unnamed: 0.1,Unnamed: 0,idx,time,x,y,z,picks,latitude,longitude,depth,event_idx,pick_idx,residual,station,phase,time_pick,datetime
0,0,0,2011-01-01 04:58:03.504768+00:00,219.223650,447.470689,0.390625,6,48.487194,-122.534153,0.390625,0,1940523,0.030308,D03D,S,1.293858e+09,2011-01-01 04:58:03.504768+00:00
1,13,3,2011-01-01 19:52:46.661153+00:00,140.013069,351.633757,8.203125,6,47.648376,-123.636385,8.203125,3,1049251,1.326894,OFR,P,1.293912e+09,2011-01-01 19:52:46.661153+00:00
2,19,4,2011-01-01 21:15:56.139179+00:00,181.291823,464.635512,19.140625,10,48.653486,-123.039309,19.140625,4,1895188,0.487547,LRIV,S,1.293917e+09,2011-01-01 21:15:56.139179+00:00
3,29,5,2011-01-02 02:10:45.376984+00:00,-0.557821,518.990787,49.609375,17,49.168544,-125.507649,49.609375,5,1121863,0.554185,BTB,P,1.293934e+09,2011-01-02 02:10:45.376984+00:00
4,46,6,2011-01-02 04:23:38.382767+00:00,234.842638,457.483502,0.390625,7,48.571463,-122.317539,0.390625,6,1961027,-0.158275,LZB,S,1.293942e+09,2011-01-02 04:23:38.382767+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40707,68797,8958,2015-12-30 10:12:04.084015+00:00,224.801860,458.913904,9.765625,8,48.587986,-122.452629,9.765625,8958,1475194,-0.269381,CLRS,S,1.451470e+09,2015-12-30 10:12:04.084015+00:00
40708,68805,8959,2015-12-30 14:04:12.055910+00:00,126.625365,540.446816,5.859375,6,49.348330,-123.757253,5.859375,8959,374144,-0.009508,CLRS,P,1.451484e+09,2015-12-30 14:04:12.055910+00:00
40709,13816,2192,2015-12-30 17:21:56.886771+00:00,76.421476,-173.323761,19.140625,7,42.936197,-124.563756,19.140625,2192,182969,-0.059408,DBO,P,1.451496e+09,2015-12-30 17:21:56.886771+00:00
40710,68811,8961,2015-12-31 01:10:15.705291+00:00,96.503032,458.913904,38.671875,18,48.620884,-124.191036,38.671875,8961,595258,-0.121397,CLRS,P,1.451524e+09,2015-12-31 01:10:15.705291+00:00
