## Notebook for formatting data frames for phase association
Source:
1. https://chatgpt.com/


In [1]:
import pandas as pd
import numpy as np
import datetime
import glob
import os 

### 1.1 Load the station data frame

In [2]:
region = 'pnsn_jdf'
stas_2011 = pd.read_csv(f'../data/datasets_2011/stas_2011_{region}.csv',index_col=0)
stas_2012 = pd.read_csv(f'../data/datasets_2012/stas_2012_{region}.csv',index_col=0)
stas_2013 = pd.read_csv(f'../data/datasets_2013/stas_2013_{region}.csv',index_col=0)
stas_2014 = pd.read_csv(f'../data/datasets_2014/stas_2014_{region}.csv',index_col=0)
stas_2015 = pd.read_csv(f'../data/datasets_2015/stas_2015_{region}.csv',index_col=0)
print(stas_2011)
print(stas_2012)
print(stas_2013)
print(stas_2014)
print(stas_2015)

         id   longitude   latitude  elevation
0  7D.J59A. -126.415298  47.509602    -2371.0
1  7D.J51A. -126.164101  46.797001    -2610.0
2  7D.G03A. -126.162498  40.059101    -4113.0
3  7D.J35A. -126.266800  45.498901    -2662.0
4  7D.J43A. -126.172096  46.137798    -2654.4
           id   longitude   latitude  elevation
0   7D.FS02B. -124.797997  40.351101    -1402.8
1    7D.J51A. -126.164101  46.797001    -2610.0
2   7D.FS06B. -124.785301  40.381199    -2198.0
3    7D.G11B. -126.376404  40.687500    -3123.0
4   7D.FS09B. -124.808502  40.438702    -2161.0
5    7D.J59A. -126.415298  47.509602    -2371.0
6   7D.FS13B. -124.806503  40.493099    -2332.0
7   7D.FS01B. -124.949203  40.326801     -940.0
8    7D.G19B. -125.773598  41.307400    -3071.0
9    7D.J43A. -126.172096  46.137798    -2654.4
10   7D.G26B. -125.177498  41.924900    -2357.0
11  7D.FS20B. -125.031097  40.389500    -2378.0
12   7D.G28B. -126.733902  41.942799    -3327.0
13   7D.G02B. -125.296898  40.048599    -1920.0
14  

In [3]:
stas = pd.concat([stas_2011,stas_2012,stas_2013,
                           stas_2014,stas_2015],ignore_index=True)
stas = stas.drop_duplicates(subset='id')
stas = stas.reset_index(drop=True)
stas

Unnamed: 0,id,longitude,latitude,elevation
0,7D.J59A.,-126.415298,47.509602,-2371.0
1,7D.J51A.,-126.164101,46.797001,-2610.0
2,7D.G03A.,-126.162498,40.059101,-4113.0
3,7D.J35A.,-126.266800,45.498901,-2662.0
4,7D.J43A.,-126.172096,46.137798,-2654.4
...,...,...,...,...
74,7D.G11D.,-126.468300,40.784500,-3145.0
75,7D.G03D.,-126.161201,40.058701,-4056.5
76,7D.G27D.,-125.981697,42.001301,-2939.0
77,Z5.GB321.,-125.911697,40.392200,-2283.0


In [4]:
stas.to_csv(f'../data/datasets_{region}/all_stations_{region}.csv')

### 1.2 Concatenate data frames from several stations and format 

In [5]:
df = pd.read_csv('../data/datasets_all_regions/all_picks_all_years_for_assoc.csv',index_col=0)

In [6]:
df['station_id'] = df['station_network_code'] + '.' + df['station_code']+'.'
df = df[df['station_id'].isin(list(stas['id']))]

In [7]:
len(df['station_id'].drop_duplicates())

79

In [8]:
len(df)

3762601

In [9]:
# df.to_csv('../data/datasets_all_years/all_picks_all_years_pnsn_jdf_for_picking.csv')

In [10]:
# # Remove picks with the following stations due to the noise: 'FN05A', 'YOUB' and 'MGB'
# df = df[~df['station_code'].isin(['FN05A', 'YOUB', 'MGB','FN14A','FN07A','J41A','J49A',
# 'J25B','J33B','FN18A','FN08A','M09B','G17B'])]

In [11]:
# # Remove picks with the following stations due to the noise
# df = df[~df['station_code'].isin(['FN05A', 'YOUB', 'MGB',
#                                   'J41A','J33B','M09B','FN01A','FN08A',
#                                   'FN12A','FN18A','J26A','J34A', 'J41A',
#                                   'M02A','FS03B','FS05B','FS08B','G33B',
#                                   'J17B','M10B','M13B','M18B'
#                                  ])]

In [12]:
len(df)

3762601

In [13]:
df.to_csv(f'../data/datasets_{region}/all_picks_all_years_for_assoc_{region}.csv')

### 1.3 Create a CSV file for picks for the association

In [14]:
df = pd.read_csv(f'../data/datasets_{region}/all_picks_all_years_for_assoc_{region}.csv',index_col=0)

In [15]:
# Assign P to P phases in the df
p_phase = df.loc[df['trace_p_arrival'].notna(), 'phase'] = "P"

In [16]:
# Filter the df 
p_stas_picks_phase = df.loc[df['trace_p_arrival'].notna(), ['station_id','trace_p_arrival','phase','pick_id']]
p_stas_picks_phase

Unnamed: 0,station_id,trace_p_arrival,phase,pick_id
1361617,7D.J43A.,2011-10-19T22:42:41.009000Z,P,1361617
1372478,7D.J43A.,2011-10-20T21:31:11.089500Z,P,1372478
1372479,7D.J43A.,2011-10-20T18:30:20.749500Z,P,1372479
1372485,7D.J43A.,2011-10-20T18:30:07.509500Z,P,1372485
1372486,7D.J43A.,2011-10-20T21:30:58.849500Z,P,1372486
...,...,...,...,...
23562560,OO.HYSB1.,2015-12-28T18:47:37.759676Z,P,23562560
23562561,OO.HYSB1.,2015-12-28T18:10:53.320013Z,P,23562561
23562562,OO.HYSB1.,2015-12-28T17:26:54.490415Z,P,23562562
23562563,OO.HYSB1.,2015-12-28T17:18:58.090488Z,P,23562563


In [17]:
# Rename the df
_p_stas_picks_phase = p_stas_picks_phase.rename(columns={"station_id": "station", "trace_p_arrival": "time"})
_p_stas_picks_phase

Unnamed: 0,station,time,phase,pick_id
1361617,7D.J43A.,2011-10-19T22:42:41.009000Z,P,1361617
1372478,7D.J43A.,2011-10-20T21:31:11.089500Z,P,1372478
1372479,7D.J43A.,2011-10-20T18:30:20.749500Z,P,1372479
1372485,7D.J43A.,2011-10-20T18:30:07.509500Z,P,1372485
1372486,7D.J43A.,2011-10-20T21:30:58.849500Z,P,1372486
...,...,...,...,...
23562560,OO.HYSB1.,2015-12-28T18:47:37.759676Z,P,23562560
23562561,OO.HYSB1.,2015-12-28T18:10:53.320013Z,P,23562561
23562562,OO.HYSB1.,2015-12-28T17:26:54.490415Z,P,23562562
23562563,OO.HYSB1.,2015-12-28T17:18:58.090488Z,P,23562563


In [18]:
# Assign S to S phases in the df
s_phase = df.loc[df['trace_s_arrival'].notna(), 'phase'] = "S"# Filter the df 

In [19]:
# Filter the df 
s_stas_picks_phase = df.loc[df['trace_s_arrival'].notna(), ['station_id','trace_s_arrival','phase','pick_id']]
s_stas_picks_phase

Unnamed: 0,station_id,trace_s_arrival,phase,pick_id
1361616,7D.J43A.,2011-10-19T22:42:41.349000Z,S,1361616
1372480,7D.J43A.,2011-10-20T18:48:36.169500Z,S,1372480
1372481,7D.J43A.,2011-10-20T20:15:01.229500Z,S,1372481
1372482,7D.J43A.,2011-10-20T21:25:04.689500Z,S,1372482
1372483,7D.J43A.,2011-10-20T21:30:20.469500Z,S,1372483
...,...,...,...,...
23562550,OO.HYSB1.,2015-12-28T02:27:18.963651Z,S,23562550
23562551,OO.HYSB1.,2015-12-28T02:20:46.603711Z,S,23562551
23562552,OO.HYSB1.,2015-12-28T00:47:05.304569Z,S,23562552
23562553,OO.HYSB1.,2015-12-28T00:11:33.859894Z,S,23562553


In [20]:
# Rename the df
_s_stas_picks_phase = s_stas_picks_phase.rename(columns={"station_id": "station", "trace_s_arrival": "time"})
_s_stas_picks_phase

Unnamed: 0,station,time,phase,pick_id
1361616,7D.J43A.,2011-10-19T22:42:41.349000Z,S,1361616
1372480,7D.J43A.,2011-10-20T18:48:36.169500Z,S,1372480
1372481,7D.J43A.,2011-10-20T20:15:01.229500Z,S,1372481
1372482,7D.J43A.,2011-10-20T21:25:04.689500Z,S,1372482
1372483,7D.J43A.,2011-10-20T21:30:20.469500Z,S,1372483
...,...,...,...,...
23562550,OO.HYSB1.,2015-12-28T02:27:18.963651Z,S,23562550
23562551,OO.HYSB1.,2015-12-28T02:20:46.603711Z,S,23562551
23562552,OO.HYSB1.,2015-12-28T00:47:05.304569Z,S,23562552
23562553,OO.HYSB1.,2015-12-28T00:11:33.859894Z,S,23562553


In [21]:
# Concatenate the P and S picks
picks = pd.concat(objs = [_p_stas_picks_phase,_s_stas_picks_phase] , axis=0)
picks

Unnamed: 0,station,time,phase,pick_id
1361617,7D.J43A.,2011-10-19T22:42:41.009000Z,P,1361617
1372478,7D.J43A.,2011-10-20T21:31:11.089500Z,P,1372478
1372479,7D.J43A.,2011-10-20T18:30:20.749500Z,P,1372479
1372485,7D.J43A.,2011-10-20T18:30:07.509500Z,P,1372485
1372486,7D.J43A.,2011-10-20T21:30:58.849500Z,P,1372486
...,...,...,...,...
23562550,OO.HYSB1.,2015-12-28T02:27:18.963651Z,S,23562550
23562551,OO.HYSB1.,2015-12-28T02:20:46.603711Z,S,23562551
23562552,OO.HYSB1.,2015-12-28T00:47:05.304569Z,S,23562552
23562553,OO.HYSB1.,2015-12-28T00:11:33.859894Z,S,23562553


In [22]:
# Swap the time and phase columns
picks = picks.iloc[:,[0,2,1,3]]
picks = picks.reset_index(drop=True)
picks

Unnamed: 0,station,phase,time,pick_id
0,7D.J43A.,P,2011-10-19T22:42:41.009000Z,1361617
1,7D.J43A.,P,2011-10-20T21:31:11.089500Z,1372478
2,7D.J43A.,P,2011-10-20T18:30:20.749500Z,1372479
3,7D.J43A.,P,2011-10-20T18:30:07.509500Z,1372485
4,7D.J43A.,P,2011-10-20T21:30:58.849500Z,1372486
...,...,...,...,...
3762596,OO.HYSB1.,S,2015-12-28T02:27:18.963651Z,23562550
3762597,OO.HYSB1.,S,2015-12-28T02:20:46.603711Z,23562551
3762598,OO.HYSB1.,S,2015-12-28T00:47:05.304569Z,23562552
3762599,OO.HYSB1.,S,2015-12-28T00:11:33.859894Z,23562553


In [23]:
picks[0:20]

Unnamed: 0,station,phase,time,pick_id
0,7D.J43A.,P,2011-10-19T22:42:41.009000Z,1361617
1,7D.J43A.,P,2011-10-20T21:31:11.089500Z,1372478
2,7D.J43A.,P,2011-10-20T18:30:20.749500Z,1372479
3,7D.J43A.,P,2011-10-20T18:30:07.509500Z,1372485
4,7D.J43A.,P,2011-10-20T21:30:58.849500Z,1372486
5,7D.J43A.,P,2011-10-20T17:28:04.729500Z,1372487
6,7D.J43A.,P,2011-10-20T21:33:45.749500Z,1372488
7,7D.J43A.,P,2011-10-20T21:50:56.449500Z,1372489
8,7D.J43A.,P,2011-10-20T05:56:07.949500Z,1372490
9,7D.J43A.,P,2011-10-20T22:11:09.189500Z,1372491


In [24]:
# Save the these picks to the data folder
picks.to_csv(f"../data/datasets_{region}/picks_{region}.csv")