## Notebook for formatting data frames for phase association
Source:
1. https://chatgpt.com/


In [1]:
import pandas as pd
import numpy as np
import datetime
import glob
import os 

### 1.1 Load the station data frame

In [2]:
region = 'or_shelf_trench'
stas_2011 = pd.read_csv(f'../data/datasets_2011/stas_2011_{region}.csv',index_col=0)
stas_2012 = pd.read_csv(f'../data/datasets_2012/stas_2012_{region}.csv',index_col=0)
stas_2013 = pd.read_csv(f'../data/datasets_2013/stas_2013_{region}.csv',index_col=0)
stas_2014 = pd.read_csv(f'../data/datasets_2014/stas_2014_{region}.csv',index_col=0)
stas_2015 = pd.read_csv(f'../data/datasets_2015/stas_2015_{region}.csv',index_col=0)
print(stas_2011)
print(stas_2012)
print(stas_2013)
print(stas_2014)
print(stas_2015)

         id   longitude   latitude  elevation
0  7D.J33A. -124.570801  45.106602     -348.7
1  7D.J41A. -124.537201  45.811901     -175.0
2  7D.J42A. -125.299698  45.933102    -1540.0
3  7D.M08A. -124.895302  44.118698     -126.4
4  7D.M07A. -125.116798  44.898701    -1356.5
5  7D.J25A. -124.621597  44.472900     -142.8
          id   longitude   latitude  elevation
0   7D.J25A. -124.621597  44.472900     -142.8
1   7D.J09B. -124.726997  43.151001     -252.0
2   7D.J42A. -125.299698  45.933102    -1540.0
3   7D.J41A. -124.537201  45.811901     -175.0
4   7D.J25B. -124.621696  44.471298     -147.0
5   7D.M08A. -124.895302  44.118698     -126.4
6   7D.J33B. -124.570602  45.106602     -350.0
7   7D.J18B. -125.466003  44.008301    -3047.0
8   7D.M09B. -125.058899  44.249699     -914.0
9   7D.M07A. -125.116798  44.898701    -1356.5
10  7D.J10B. -125.543503  43.349400    -3093.0
11  7D.J33A. -124.570801  45.106602     -348.7
12  7D.J26A. -125.466400  44.654701    -2864.0
13  7D.M06A. -124.92

In [3]:
stas = pd.concat([stas_2011,stas_2012,stas_2013,
                           stas_2014,stas_2015],ignore_index=True)
stas = stas.drop_duplicates(subset='id')
stas = stas.reset_index(drop=True)
stas

Unnamed: 0,id,longitude,latitude,elevation
0,7D.J33A.,-124.570801,45.106602,-348.7
1,7D.J41A.,-124.537201,45.811901,-175.0
2,7D.J42A.,-125.299698,45.933102,-1540.0
3,7D.M08A.,-124.895302,44.118698,-126.4
4,7D.M07A.,-125.116798,44.898701,-1356.5
5,7D.J25A.,-124.621597,44.4729,-142.8
6,7D.J09B.,-124.726997,43.151001,-252.0
7,7D.J25B.,-124.621696,44.471298,-147.0
8,7D.J33B.,-124.570602,45.106602,-350.0
9,7D.J18B.,-125.466003,44.008301,-3047.0


In [4]:
stas.to_csv(f'../data/datasets_{region}/all_stations_{region}.csv')

### 1.2 Concatenate data frames from several stations and format 

In [5]:
df = pd.read_csv('../data/datasets_all_regions/all_picks_all_years_for_assoc.csv',index_col=0)

In [6]:
df['station_id'] = df['station_network_code'] + '.' + df['station_code']+'.'
df = df[df['station_id'].isin(list(stas['id']))]

In [7]:
len(df['station_id'].drop_duplicates())

38

In [8]:
len(df)

3662214

In [9]:
# df.to_csv('../data/datasets_all_years/all_picks_all_years_pnsn_jdf_for_picking.csv')

In [10]:
# # Remove picks with the following stations due to the noise: 'FN05A', 'YOUB' and 'MGB'
# df = df[~df['station_code'].isin(['FN05A', 'YOUB', 'MGB','FN14A','FN07A','J41A','J49A',
# 'J25B','J33B','FN18A','FN08A','M09B','G17B'])]

In [11]:
# # Remove picks with the following stations due to the noise
# df = df[~df['station_code'].isin(['FN05A', 'YOUB', 'MGB',
#                                   'J41A','J33B','M09B','FN01A','FN08A',
#                                   'FN12A','FN18A','J26A','J34A', 'J41A',
#                                   'M02A','FS03B','FS05B','FS08B','G33B',
#                                   'J17B','M10B','M13B','M18B'
#                                  ])]

In [12]:
len(df)

3662214

In [13]:
df.to_csv(f'../data/datasets_{region}/all_picks_all_years_for_assoc_{region}.csv')

### 1.3 Create a CSV file for picks for the association

In [14]:
df = pd.read_csv(f'../data/datasets_{region}/all_picks_all_years_for_assoc_{region}.csv',index_col=0)

In [15]:
# Assign P to P phases in the df
p_phase = df.loc[df['trace_p_arrival'].notna(), 'phase'] = "P"

In [16]:
# Filter the df 
p_stas_picks_phase = df.loc[df['trace_p_arrival'].notna(), ['station_id','trace_p_arrival','phase','pick_id']]
p_stas_picks_phase

Unnamed: 0,station_id,trace_p_arrival,phase,pick_id
619458,7D.J41A.,2011-07-26T10:54:58.807000Z,P,619458
619490,7D.J41A.,2011-07-26T09:43:46.095000Z,P,619490
619491,7D.J41A.,2011-07-26T09:43:20.487000Z,P,619491
619492,7D.J41A.,2011-07-26T09:42:54.903000Z,P,619492
619493,7D.J41A.,2011-07-26T09:39:56.711000Z,P,619493
...,...,...,...,...
23562560,OO.HYSB1.,2015-12-28T18:47:37.759676Z,P,23562560
23562561,OO.HYSB1.,2015-12-28T18:10:53.320013Z,P,23562561
23562562,OO.HYSB1.,2015-12-28T17:26:54.490415Z,P,23562562
23562563,OO.HYSB1.,2015-12-28T17:18:58.090488Z,P,23562563


In [17]:
# Rename the df
_p_stas_picks_phase = p_stas_picks_phase.rename(columns={"station_id": "station", "trace_p_arrival": "time"})
_p_stas_picks_phase

Unnamed: 0,station,time,phase,pick_id
619458,7D.J41A.,2011-07-26T10:54:58.807000Z,P,619458
619490,7D.J41A.,2011-07-26T09:43:46.095000Z,P,619490
619491,7D.J41A.,2011-07-26T09:43:20.487000Z,P,619491
619492,7D.J41A.,2011-07-26T09:42:54.903000Z,P,619492
619493,7D.J41A.,2011-07-26T09:39:56.711000Z,P,619493
...,...,...,...,...
23562560,OO.HYSB1.,2015-12-28T18:47:37.759676Z,P,23562560
23562561,OO.HYSB1.,2015-12-28T18:10:53.320013Z,P,23562561
23562562,OO.HYSB1.,2015-12-28T17:26:54.490415Z,P,23562562
23562563,OO.HYSB1.,2015-12-28T17:18:58.090488Z,P,23562563


In [18]:
# Assign S to S phases in the df
s_phase = df.loc[df['trace_s_arrival'].notna(), 'phase'] = "S"# Filter the df 

In [19]:
# Filter the df 
s_stas_picks_phase = df.loc[df['trace_s_arrival'].notna(), ['station_id','trace_s_arrival','phase','pick_id']]
s_stas_picks_phase

Unnamed: 0,station_id,trace_s_arrival,phase,pick_id
618083,7D.J41A.,2011-07-25T23:51:13.295000Z,S,618083
618084,7D.J41A.,2011-07-25T23:52:36.007000Z,S,618084
619442,7D.J41A.,2011-07-26T04:39:50.983000Z,S,619442
619443,7D.J41A.,2011-07-26T04:41:09.479000Z,S,619443
619444,7D.J41A.,2011-07-26T04:53:35.639000Z,S,619444
...,...,...,...,...
23562550,OO.HYSB1.,2015-12-28T02:27:18.963651Z,S,23562550
23562551,OO.HYSB1.,2015-12-28T02:20:46.603711Z,S,23562551
23562552,OO.HYSB1.,2015-12-28T00:47:05.304569Z,S,23562552
23562553,OO.HYSB1.,2015-12-28T00:11:33.859894Z,S,23562553


In [20]:
# Rename the df
_s_stas_picks_phase = s_stas_picks_phase.rename(columns={"station_id": "station", "trace_s_arrival": "time"})
_s_stas_picks_phase

Unnamed: 0,station,time,phase,pick_id
618083,7D.J41A.,2011-07-25T23:51:13.295000Z,S,618083
618084,7D.J41A.,2011-07-25T23:52:36.007000Z,S,618084
619442,7D.J41A.,2011-07-26T04:39:50.983000Z,S,619442
619443,7D.J41A.,2011-07-26T04:41:09.479000Z,S,619443
619444,7D.J41A.,2011-07-26T04:53:35.639000Z,S,619444
...,...,...,...,...
23562550,OO.HYSB1.,2015-12-28T02:27:18.963651Z,S,23562550
23562551,OO.HYSB1.,2015-12-28T02:20:46.603711Z,S,23562551
23562552,OO.HYSB1.,2015-12-28T00:47:05.304569Z,S,23562552
23562553,OO.HYSB1.,2015-12-28T00:11:33.859894Z,S,23562553


In [21]:
# Concatenate the P and S picks
picks = pd.concat(objs = [_p_stas_picks_phase,_s_stas_picks_phase] , axis=0)
picks

Unnamed: 0,station,time,phase,pick_id
619458,7D.J41A.,2011-07-26T10:54:58.807000Z,P,619458
619490,7D.J41A.,2011-07-26T09:43:46.095000Z,P,619490
619491,7D.J41A.,2011-07-26T09:43:20.487000Z,P,619491
619492,7D.J41A.,2011-07-26T09:42:54.903000Z,P,619492
619493,7D.J41A.,2011-07-26T09:39:56.711000Z,P,619493
...,...,...,...,...
23562550,OO.HYSB1.,2015-12-28T02:27:18.963651Z,S,23562550
23562551,OO.HYSB1.,2015-12-28T02:20:46.603711Z,S,23562551
23562552,OO.HYSB1.,2015-12-28T00:47:05.304569Z,S,23562552
23562553,OO.HYSB1.,2015-12-28T00:11:33.859894Z,S,23562553


In [22]:
# Swap the time and phase columns
picks = picks.iloc[:,[0,2,1,3]]
picks = picks.reset_index(drop=True)
picks

Unnamed: 0,station,phase,time,pick_id
0,7D.J41A.,P,2011-07-26T10:54:58.807000Z,619458
1,7D.J41A.,P,2011-07-26T09:43:46.095000Z,619490
2,7D.J41A.,P,2011-07-26T09:43:20.487000Z,619491
3,7D.J41A.,P,2011-07-26T09:42:54.903000Z,619492
4,7D.J41A.,P,2011-07-26T09:39:56.711000Z,619493
...,...,...,...,...
3662209,OO.HYSB1.,S,2015-12-28T02:27:18.963651Z,23562550
3662210,OO.HYSB1.,S,2015-12-28T02:20:46.603711Z,23562551
3662211,OO.HYSB1.,S,2015-12-28T00:47:05.304569Z,23562552
3662212,OO.HYSB1.,S,2015-12-28T00:11:33.859894Z,23562553


In [23]:
picks[0:20]

Unnamed: 0,station,phase,time,pick_id
0,7D.J41A.,P,2011-07-26T10:54:58.807000Z,619458
1,7D.J41A.,P,2011-07-26T09:43:46.095000Z,619490
2,7D.J41A.,P,2011-07-26T09:43:20.487000Z,619491
3,7D.J41A.,P,2011-07-26T09:42:54.903000Z,619492
4,7D.J41A.,P,2011-07-26T09:39:56.711000Z,619493
5,7D.J41A.,P,2011-07-26T09:38:40.095000Z,619494
6,7D.J41A.,P,2011-07-26T09:38:14.607000Z,619495
7,7D.J41A.,P,2011-07-26T09:37:49.087000Z,619496
8,7D.J41A.,P,2011-07-26T09:35:14.967000Z,619497
9,7D.J41A.,P,2011-07-26T09:33:03.279000Z,619498


In [24]:
# Save the these picks to the data folder
picks.to_csv(f"../data/datasets_{region}/picks_{region}.csv")