## Notebook for formatting data frames for phase association
Source:
1. https://chatgpt.com/


In [15]:
import pandas as pd
import numpy as np
import datetime
import glob
import os 

### 1.1 Load the station data frame

In [16]:
region = 'nwa_shelf_trench'
stas_2011 = pd.read_csv(f'../data/datasets_2011/stas_2011_{region}.csv',index_col=0)
stas_2012 = pd.read_csv(f'../data/datasets_2012/stas_2012_{region}.csv',index_col=0)
stas_2013 = pd.read_csv(f'../data/datasets_2013/stas_2013_{region}.csv',index_col=0)
stas_2014 = pd.read_csv(f'../data/datasets_2014/stas_2014_{region}.csv',index_col=0)
stas_2015 = pd.read_csv(f'../data/datasets_2015/stas_2015_{region}.csv',index_col=0)
print(stas_2011)
print(stas_2012)
print(stas_2013)
print(stas_2014)
print(stas_2015)

          id   longitude   latitude  elevation
0    7A.W04. -126.890800  48.531700    -2319.0
1    7A.W08. -126.401100  48.564499    -1028.0
2   7D.J73A. -126.192497  48.767700     -143.3
3   7D.J65A. -125.139603  47.891300     -165.2
4    7A.W06. -126.064697  48.182701    -1001.0
5    7A.W09. -126.577003  48.688000    -1156.0
6   7D.M03A. -126.103996  47.888302    -1818.0
7   NV.NC89. -126.848767  48.670537    -1258.0
8    7A.W01. -126.342499  48.079700    -1697.0
9   7D.M02A. -125.600403  48.306999     -139.0
10   7A.W10. -126.702202  48.787899    -1070.0
11   7A.W03. -126.728699  48.387501    -2489.0
12   7A.W07. -126.282204  48.374699    -1152.0
13   7A.W02. -126.496803  48.259300    -2133.0
14  7D.M01A. -126.722099  49.150398     -132.9
15  NV.NCBC. -126.175200  48.427500     -398.0
         id   longitude   latitude  elevation
0  7D.M03A. -126.103996  47.888302    -1818.0
1  7D.J73A. -126.192497  48.767700     -143.3
2  7D.J65A. -125.139603  47.891300     -165.2
3  7D.J58A. -125.

In [17]:
stas = pd.concat([stas_2011,stas_2012,stas_2013,
                           stas_2014,stas_2015],ignore_index=True)
stas = stas.drop_duplicates(subset='id')
stas = stas.reset_index(drop=True)
stas

Unnamed: 0,id,longitude,latitude,elevation
0,7A.W04.,-126.8908,48.5317,-2319.0
1,7A.W08.,-126.4011,48.564499,-1028.0
2,7D.J73A.,-126.192497,48.7677,-143.3
3,7D.J65A.,-125.139603,47.8913,-165.2
4,7A.W06.,-126.064697,48.182701,-1001.0
5,7A.W09.,-126.577003,48.688,-1156.0
6,7D.M03A.,-126.103996,47.888302,-1818.0
7,NV.NC89.,-126.848767,48.670537,-1258.0
8,7A.W01.,-126.342499,48.0797,-1697.0
9,7D.M02A.,-125.600403,48.306999,-139.0


In [18]:
stas.to_csv(f'../data/datasets_{region}/all_stations_{region}.csv')

### 1.2 Concatenate data frames from several stations and format 

In [19]:
df = pd.read_csv('../data/datasets_all_regions/all_picks_all_years_for_assoc.csv',index_col=0)

In [20]:
df['station_id'] = df['station_network_code'] + '.' + df['station_code']+'.'
df = df[df['station_id'].isin(list(stas['id']))]

In [21]:
len(df['station_id'].drop_duplicates())

25

In [22]:
len(df)

834063

In [23]:
# df.to_csv('../data/datasets_all_years/all_picks_all_years_pnsn_jdf_for_picking.csv')

In [24]:
# # Remove picks with the following stations due to the noise: 'FN05A', 'YOUB' and 'MGB'
# df = df[~df['station_code'].isin(['FN05A', 'YOUB', 'MGB','FN14A','FN07A','J41A','J49A',
# 'J25B','J33B','FN18A','FN08A','M09B','G17B'])]

In [25]:
# # Remove picks with the following stations due to the noise
# df = df[~df['station_code'].isin(['FN05A', 'YOUB', 'MGB',
#                                   'J41A','J33B','M09B','FN01A','FN08A',
#                                   'FN12A','FN18A','J26A','J34A', 'J41A',
#                                   'M02A','FS03B','FS05B','FS08B','G33B',
#                                   'J17B','M10B','M13B','M18B'
#                                  ])]

In [26]:
len(df)

834063

In [27]:
df.to_csv(f'../data/datasets_{region}/all_picks_all_years_for_assoc_{region}.csv')

### 1.3 Create a CSV file for picks for the association

In [39]:
df = pd.read_csv(f'../data/datasets_{region}/all_picks_all_years_for_assoc_{region}.csv',index_col=0)

In [40]:
# Assign P to P phases in the df
p_phase = df.loc[df['trace_p_arrival'].notna(), 'phase'] = "P"

In [41]:
# Filter the df 
p_stas_picks_phase = df.loc[df['trace_p_arrival'].notna(), ['station_id','trace_p_arrival','phase','pick_id']]
p_stas_picks_phase

Unnamed: 0,station_id,trace_p_arrival,phase,pick_id
696,7A.W09.,2011-01-01T11:02:50.284000Z,P,696
702,7A.W09.,2011-01-01T07:04:59.964000Z,P,702
703,7A.W09.,2011-01-01T07:08:22.204000Z,P,703
704,7A.W09.,2011-01-01T07:13:19.284000Z,P,704
705,7A.W09.,2011-01-01T07:16:47.204000Z,P,705
...,...,...,...,...
23558915,NV.NC89.,2015-12-26T09:34:12.325000Z,P,23558915
23558916,NV.NC89.,2015-12-26T09:34:49.565000Z,P,23558916
23558917,NV.NC89.,2015-12-26T09:38:37.500000Z,P,23558917
23558918,NV.NC89.,2015-12-26T09:39:15.965000Z,P,23558918


In [42]:
# Rename the df
_p_stas_picks_phase = p_stas_picks_phase.rename(columns={"station_id": "station", "trace_p_arrival": "time"})
_p_stas_picks_phase

Unnamed: 0,station,time,phase,pick_id
696,7A.W09.,2011-01-01T11:02:50.284000Z,P,696
702,7A.W09.,2011-01-01T07:04:59.964000Z,P,702
703,7A.W09.,2011-01-01T07:08:22.204000Z,P,703
704,7A.W09.,2011-01-01T07:13:19.284000Z,P,704
705,7A.W09.,2011-01-01T07:16:47.204000Z,P,705
...,...,...,...,...
23558915,NV.NC89.,2015-12-26T09:34:12.325000Z,P,23558915
23558916,NV.NC89.,2015-12-26T09:34:49.565000Z,P,23558916
23558917,NV.NC89.,2015-12-26T09:38:37.500000Z,P,23558917
23558918,NV.NC89.,2015-12-26T09:39:15.965000Z,P,23558918


In [43]:
# Assign S to S phases in the df
s_phase = df.loc[df['trace_s_arrival'].notna(), 'phase'] = "S"# Filter the df 

In [44]:
# Filter the df 
s_stas_picks_phase = df.loc[df['trace_s_arrival'].notna(), ['station_id','trace_s_arrival','phase','pick_id']]
s_stas_picks_phase

Unnamed: 0,station_id,trace_s_arrival,phase,pick_id
697,7A.W09.,2011-01-01T19:18:49.764000Z,S,697
698,7A.W09.,2011-01-01T19:18:07.624000Z,S,698
699,7A.W09.,2011-01-01T19:17:21.804000Z,S,699
700,7A.W09.,2011-01-01T19:15:01.864000Z,S,700
701,7A.W09.,2011-01-01T19:12:10.264000Z,S,701
...,...,...,...,...
23559071,NV.NC89.,2015-12-26T16:17:33.725000Z,S,23559071
23559072,NV.NC89.,2015-12-26T16:20:39.305000Z,S,23559072
23559073,NV.NC89.,2015-12-26T16:31:37.875000Z,S,23559073
23559074,NV.NC89.,2015-12-26T16:35:09.400000Z,S,23559074


In [45]:
# Rename the df
_s_stas_picks_phase = s_stas_picks_phase.rename(columns={"station_id": "station", "trace_s_arrival": "time"})
_s_stas_picks_phase

Unnamed: 0,station,time,phase,pick_id
697,7A.W09.,2011-01-01T19:18:49.764000Z,S,697
698,7A.W09.,2011-01-01T19:18:07.624000Z,S,698
699,7A.W09.,2011-01-01T19:17:21.804000Z,S,699
700,7A.W09.,2011-01-01T19:15:01.864000Z,S,700
701,7A.W09.,2011-01-01T19:12:10.264000Z,S,701
...,...,...,...,...
23559071,NV.NC89.,2015-12-26T16:17:33.725000Z,S,23559071
23559072,NV.NC89.,2015-12-26T16:20:39.305000Z,S,23559072
23559073,NV.NC89.,2015-12-26T16:31:37.875000Z,S,23559073
23559074,NV.NC89.,2015-12-26T16:35:09.400000Z,S,23559074


In [46]:
# Concatenate the P and S picks
picks = pd.concat(objs = [_p_stas_picks_phase,_s_stas_picks_phase] , axis=0)
picks

Unnamed: 0,station,time,phase,pick_id
696,7A.W09.,2011-01-01T11:02:50.284000Z,P,696
702,7A.W09.,2011-01-01T07:04:59.964000Z,P,702
703,7A.W09.,2011-01-01T07:08:22.204000Z,P,703
704,7A.W09.,2011-01-01T07:13:19.284000Z,P,704
705,7A.W09.,2011-01-01T07:16:47.204000Z,P,705
...,...,...,...,...
23559071,NV.NC89.,2015-12-26T16:17:33.725000Z,S,23559071
23559072,NV.NC89.,2015-12-26T16:20:39.305000Z,S,23559072
23559073,NV.NC89.,2015-12-26T16:31:37.875000Z,S,23559073
23559074,NV.NC89.,2015-12-26T16:35:09.400000Z,S,23559074


In [47]:
# Swap the time and phase columns
picks = picks.iloc[:,[0,2,1,3]]
picks = picks.reset_index(drop=True)
picks

Unnamed: 0,station,phase,time,pick_id
0,7A.W09.,P,2011-01-01T11:02:50.284000Z,696
1,7A.W09.,P,2011-01-01T07:04:59.964000Z,702
2,7A.W09.,P,2011-01-01T07:08:22.204000Z,703
3,7A.W09.,P,2011-01-01T07:13:19.284000Z,704
4,7A.W09.,P,2011-01-01T07:16:47.204000Z,705
...,...,...,...,...
834058,NV.NC89.,S,2015-12-26T16:17:33.725000Z,23559071
834059,NV.NC89.,S,2015-12-26T16:20:39.305000Z,23559072
834060,NV.NC89.,S,2015-12-26T16:31:37.875000Z,23559073
834061,NV.NC89.,S,2015-12-26T16:35:09.400000Z,23559074


In [48]:
picks[0:20]

Unnamed: 0,station,phase,time,pick_id
0,7A.W09.,P,2011-01-01T11:02:50.284000Z,696
1,7A.W09.,P,2011-01-01T07:04:59.964000Z,702
2,7A.W09.,P,2011-01-01T07:08:22.204000Z,703
3,7A.W09.,P,2011-01-01T07:13:19.284000Z,704
4,7A.W09.,P,2011-01-01T07:16:47.204000Z,705
5,7A.W09.,P,2011-01-01T07:18:11.844000Z,706
6,7A.W09.,P,2011-01-01T07:48:40.704000Z,707
7,7A.W09.,P,2011-01-01T07:51:33.464000Z,708
8,7A.W09.,P,2011-01-01T08:03:42.524000Z,709
9,7A.W09.,P,2011-01-01T08:07:20.824000Z,712


In [49]:
# Save the these picks to the data folder
picks.to_csv(f"../data/datasets_{region}/picks_{region}.csv")