## Notebook for formatting data frames for phase association
Source:
1. https://chatgpt.com/


In [1]:
import pandas as pd
import numpy as np
import datetime
import glob
import os 

### 1.1 Load the station data frame

In [2]:
region = 'nwa_shore'
stas_2011 = pd.read_csv(f'../data/datasets_2011/stas_2011_{region}.csv',index_col=0)
stas_2012 = pd.read_csv(f'../data/datasets_2012/stas_2012_{region}.csv',index_col=0)
stas_2013 = pd.read_csv(f'../data/datasets_2013/stas_2013_{region}.csv',index_col=0)
stas_2014 = pd.read_csv(f'../data/datasets_2014/stas_2014_{region}.csv',index_col=0)
stas_2015 = pd.read_csv(f'../data/datasets_2015/stas_2015_{region}.csv',index_col=0)
print(stas_2011)
print(stas_2012)
print(stas_2013)
print(stas_2014)
print(stas_2015)

         id   longitude   latitude  elevation
0  UW.FORK. -124.566200  47.947500       44.9
1  7D.J73A. -126.192497  48.767700     -143.3
2  7D.J65A. -125.139603  47.891300     -165.2
3   UW.OFR. -124.396042  47.933128      152.0
4   CN.OZB. -125.497800  48.961200      626.0
5  CN.BMSB. -125.135500  48.835600       10.0
6  7D.M01A. -126.722099  49.150398     -132.9
         id   longitude   latitude  elevation
0   CN.OZB. -125.497800  48.961200      626.0
1   UW.OFR. -124.396042  47.933128      152.0
2  7D.J73A. -126.192497  48.767700     -143.3
3  UW.FORK. -124.566200  47.947500       44.9
4  7D.J65A. -125.139603  47.891300     -165.2
5  7D.M01A. -126.722099  49.150398     -132.9
         id   longitude   latitude  elevation
0  UW.FORK. -124.566200  47.947500       44.9
1  CN.NTKA. -126.616600  49.592400       12.0
2  7D.J73C. -126.192596  48.767899     -133.0
3  7D.M01C. -126.722198  49.150398     -138.0
4   CN.OZB. -125.497800  48.961200      626.0
5  7D.J65C. -125.139801  47.891300

In [3]:
stas = pd.concat([stas_2011,stas_2012,stas_2013,
                           stas_2014,stas_2015],ignore_index=True)
stas = stas.drop_duplicates(subset='id')
stas = stas.reset_index(drop=True)
stas

Unnamed: 0,id,longitude,latitude,elevation
0,UW.FORK.,-124.5662,47.9475,44.9
1,7D.J73A.,-126.192497,48.7677,-143.3
2,7D.J65A.,-125.139603,47.8913,-165.2
3,UW.OFR.,-124.396042,47.933128,152.0
4,CN.OZB.,-125.4978,48.9612,626.0
5,CN.BMSB.,-125.1355,48.8356,10.0
6,7D.M01A.,-126.722099,49.150398,-132.9
7,CN.NTKA.,-126.6166,49.5924,12.0
8,7D.J73C.,-126.192596,48.767899,-133.0
9,7D.M01C.,-126.722198,49.150398,-138.0


In [4]:
stas.to_csv(f'../data/datasets_{region}/all_stations_{region}.csv')

### 1.2 Concatenate data frames from several stations and format 

In [5]:
df = pd.read_csv('../data/datasets_all_regions/all_picks_all_years_for_assoc.csv',index_col=0)

In [6]:
df['station_id'] = df['station_network_code'] + '.' + df['station_code']+'.'
df = df[df['station_id'].isin(list(stas['id']))]

In [7]:
len(df['station_id'].drop_duplicates())

13

In [8]:
len(df)

956487

In [9]:
# df.to_csv('../data/datasets_all_years/all_picks_all_years_pnsn_jdf_for_picking.csv')

In [10]:
# # Remove picks with the following stations due to the noise: 'FN05A', 'YOUB' and 'MGB'
# df = df[~df['station_code'].isin(['FN05A', 'YOUB', 'MGB','FN14A','FN07A','J41A','J49A',
# 'J25B','J33B','FN18A','FN08A','M09B','G17B'])]

In [11]:
# # Remove picks with the following stations due to the noise
# df = df[~df['station_code'].isin(['FN05A', 'YOUB', 'MGB',
#                                   'J41A','J33B','M09B','FN01A','FN08A',
#                                   'FN12A','FN18A','J26A','J34A', 'J41A',
#                                   'M02A','FS03B','FS05B','FS08B','G33B',
#                                   'J17B','M10B','M13B','M18B'
#                                  ])]

In [12]:
len(df)

956487

In [13]:
df.to_csv(f'../data/datasets_{region}/all_picks_all_years_for_assoc_{region}.csv')

### 1.3 Create a CSV file for picks for the association

In [14]:
df = pd.read_csv(f'../data/datasets_{region}/all_picks_all_years_for_assoc_{region}.csv',index_col=0)

In [15]:
# Assign P to P phases in the df
p_phase = df.loc[df['trace_p_arrival'].notna(), 'phase'] = "P"

In [16]:
# Filter the df 
p_stas_picks_phase = df.loc[df['trace_p_arrival'].notna(), ['station_id','trace_p_arrival','phase','pick_id']]
p_stas_picks_phase

Unnamed: 0,station_id,trace_p_arrival,phase,pick_id
305,CN.OZB.,2011-01-01T09:37:24.350000Z,P,305
306,CN.OZB.,2011-01-01T20:50:40.675000Z,P,306
3240,UW.FORK.,2011-01-01T14:45:23.465000Z,P,3240
3241,UW.FORK.,2011-01-01T14:43:06.940000Z,P,3241
3242,UW.FORK.,2011-01-01T14:46:10.740000Z,P,3242
...,...,...,...,...
23571045,UW.FORK.,2015-12-31T14:46:42.765000Z,P,23571045
23571056,UW.FORK.,2015-12-31T13:57:39.290000Z,P,23571056
23571061,UW.FORK.,2015-12-31T18:29:07.440000Z,P,23571061
23571062,UW.FORK.,2015-12-31T19:37:10.240000Z,P,23571062


In [17]:
# Rename the df
_p_stas_picks_phase = p_stas_picks_phase.rename(columns={"station_id": "station", "trace_p_arrival": "time"})
_p_stas_picks_phase

Unnamed: 0,station,time,phase,pick_id
305,CN.OZB.,2011-01-01T09:37:24.350000Z,P,305
306,CN.OZB.,2011-01-01T20:50:40.675000Z,P,306
3240,UW.FORK.,2011-01-01T14:45:23.465000Z,P,3240
3241,UW.FORK.,2011-01-01T14:43:06.940000Z,P,3241
3242,UW.FORK.,2011-01-01T14:46:10.740000Z,P,3242
...,...,...,...,...
23571045,UW.FORK.,2015-12-31T14:46:42.765000Z,P,23571045
23571056,UW.FORK.,2015-12-31T13:57:39.290000Z,P,23571056
23571061,UW.FORK.,2015-12-31T18:29:07.440000Z,P,23571061
23571062,UW.FORK.,2015-12-31T19:37:10.240000Z,P,23571062


In [18]:
# Assign S to S phases in the df
s_phase = df.loc[df['trace_s_arrival'].notna(), 'phase'] = "S"# Filter the df 

In [19]:
# Filter the df 
s_stas_picks_phase = df.loc[df['trace_s_arrival'].notna(), ['station_id','trace_s_arrival','phase','pick_id']]
s_stas_picks_phase

Unnamed: 0,station_id,trace_s_arrival,phase,pick_id
307,CN.OZB.,2011-01-01T09:37:35.925000Z,S,307
308,CN.OZB.,2011-01-01T18:06:49.775000Z,S,308
309,CN.OZB.,2011-01-01T18:30:41.075000Z,S,309
310,CN.OZB.,2011-01-01T19:58:07.075000Z,S,310
311,CN.OZB.,2011-01-01T23:27:32.500000Z,S,311
...,...,...,...,...
23571068,UW.FORK.,2015-12-31T15:58:55.590000Z,S,23571068
23571069,UW.FORK.,2015-12-31T18:33:30.515000Z,S,23571069
23571070,UW.FORK.,2015-12-31T18:35:30.465000Z,S,23571070
23571071,UW.FORK.,2015-12-31T21:03:58.340000Z,S,23571071


In [20]:
# Rename the df
_s_stas_picks_phase = s_stas_picks_phase.rename(columns={"station_id": "station", "trace_s_arrival": "time"})
_s_stas_picks_phase

Unnamed: 0,station,time,phase,pick_id
307,CN.OZB.,2011-01-01T09:37:35.925000Z,S,307
308,CN.OZB.,2011-01-01T18:06:49.775000Z,S,308
309,CN.OZB.,2011-01-01T18:30:41.075000Z,S,309
310,CN.OZB.,2011-01-01T19:58:07.075000Z,S,310
311,CN.OZB.,2011-01-01T23:27:32.500000Z,S,311
...,...,...,...,...
23571068,UW.FORK.,2015-12-31T15:58:55.590000Z,S,23571068
23571069,UW.FORK.,2015-12-31T18:33:30.515000Z,S,23571069
23571070,UW.FORK.,2015-12-31T18:35:30.465000Z,S,23571070
23571071,UW.FORK.,2015-12-31T21:03:58.340000Z,S,23571071


In [21]:
# Concatenate the P and S picks
picks = pd.concat(objs = [_p_stas_picks_phase,_s_stas_picks_phase] , axis=0)
picks

Unnamed: 0,station,time,phase,pick_id
305,CN.OZB.,2011-01-01T09:37:24.350000Z,P,305
306,CN.OZB.,2011-01-01T20:50:40.675000Z,P,306
3240,UW.FORK.,2011-01-01T14:45:23.465000Z,P,3240
3241,UW.FORK.,2011-01-01T14:43:06.940000Z,P,3241
3242,UW.FORK.,2011-01-01T14:46:10.740000Z,P,3242
...,...,...,...,...
23571068,UW.FORK.,2015-12-31T15:58:55.590000Z,S,23571068
23571069,UW.FORK.,2015-12-31T18:33:30.515000Z,S,23571069
23571070,UW.FORK.,2015-12-31T18:35:30.465000Z,S,23571070
23571071,UW.FORK.,2015-12-31T21:03:58.340000Z,S,23571071


In [22]:
# Swap the time and phase columns
picks = picks.iloc[:,[0,2,1,3]]
picks = picks.reset_index(drop=True)
picks

Unnamed: 0,station,phase,time,pick_id
0,CN.OZB.,P,2011-01-01T09:37:24.350000Z,305
1,CN.OZB.,P,2011-01-01T20:50:40.675000Z,306
2,UW.FORK.,P,2011-01-01T14:45:23.465000Z,3240
3,UW.FORK.,P,2011-01-01T14:43:06.940000Z,3241
4,UW.FORK.,P,2011-01-01T14:46:10.740000Z,3242
...,...,...,...,...
956482,UW.FORK.,S,2015-12-31T15:58:55.590000Z,23571068
956483,UW.FORK.,S,2015-12-31T18:33:30.515000Z,23571069
956484,UW.FORK.,S,2015-12-31T18:35:30.465000Z,23571070
956485,UW.FORK.,S,2015-12-31T21:03:58.340000Z,23571071


In [23]:
picks[0:20]

Unnamed: 0,station,phase,time,pick_id
0,CN.OZB.,P,2011-01-01T09:37:24.350000Z,305
1,CN.OZB.,P,2011-01-01T20:50:40.675000Z,306
2,UW.FORK.,P,2011-01-01T14:45:23.465000Z,3240
3,UW.FORK.,P,2011-01-01T14:43:06.940000Z,3241
4,UW.FORK.,P,2011-01-01T14:46:10.740000Z,3242
5,UW.FORK.,P,2011-01-01T20:04:40.765000Z,3243
6,UW.FORK.,P,2011-01-01T14:40:03.290000Z,3244
7,UW.FORK.,P,2011-01-01T14:37:59.140000Z,3245
8,UW.FORK.,P,2011-01-01T14:36:28.015000Z,3246
9,UW.FORK.,P,2011-01-01T18:38:24.565000Z,3247


In [24]:
# Save the these picks to the data folder
picks.to_csv(f"../data/datasets_{region}/picks_{region}.csv")