In [1]:
import numpy as np
import matplotlib.pyplot as plt
import scipy
import scipy.stats as stats
import pickle
import obspy
from scipy.signal import find_peaks
import os
import pandas as pd

import cmcrameri.cm as cmc

from matplotlib import patches
import seaborn as sns


In [2]:
def get_scardec_stf(scardec_name, wanted_type = 'fctopt'):
    time = []
    momentrate = []

    event = os.listdir(f'/home/earthquakes1/homes/Rebecca/phd/stf/data/scardec/{scardec_name}')
    starts = [n for n, l in enumerate(event) if l.startswith(wanted_type)]
    with open(f'/home/earthquakes1/homes/Rebecca/phd/stf/data/scardec/{scardec_name}/{event[starts[0]]}') as f:
        lines = f.read().splitlines()

    lines = lines[2:]
    for line in lines:
        split = line.split(' ')
        split = [s for s in split if s not in ['', ' ', '\n']]
        time.append(float(split[0]))
        momentrate.append(float(split[1]))

    momentrate = np.array(momentrate)
    time = np.array(time)
    return momentrate, time

In [3]:
def get_ye_stf(ye_name):
    data_path = '/home/earthquakes1/homes/Rebecca/phd/stf/data/Ye_et_al_2016/'
    momentrate = []
    time = []

    with open(data_path + str(ye_name), 'r') as f:
        data = f.readlines()
        for line in data:
            line = line.strip()
            line = line.rstrip()
            if line[0] not in ['0','1','2','3','4','5','6','7','8','9']:
                continue
            line = line.split()
            time.append(float(line[0]))
            momentrate.append(float(line[1]))
    momentrate = np.array(momentrate)
    time = np.array(time)
    return momentrate, time

In [4]:
def get_usgs_stf(usgs_name):
    data_path = '/home/earthquakes1/homes/Rebecca/phd/stf/data/USGS/'
    momentrate = []
    time = []

    with open(data_path + str(usgs_name), 'r') as f:
        data = f.readlines()
        for line in data:
            line = line.strip()
            line = line.rstrip()
            if line[0] not in ['0','1','2','3','4','5','6','7','8','9']:
                continue
            line = line.split()
            time.append(float(line[0]))
            momentrate.append(float(line[1]))

    momentrate = np.array(momentrate)
    time = np.array(time)

    if usgs_name == '19950205_225105.txt' or usgs_name == '20041226_005853.txt':
        momentrate = momentrate
    elif int(usgs_name[0:4]) < 2021:
        momentrate = momentrate / 10**7 # convert to Nm from dyne cm
    elif int(usgs_name[0:4]) == 2021:
        if int(usgs_name[4:6]) < 5:
            momentrate = momentrate / 10**7
    else:
        momentrate = momentrate
    return momentrate, time

In [5]:
def get_sigloch_stf(sigloch_name):
    data_path = '/home/siglochnas1/shared/AmplitudeProjects/pdata_processed/psdata_events/'
    momentrate = []
    time = []

    file_path = data_path + str(sigloch_name) + '/outfiles/ampinv.stf.xy'

    with open(file_path, 'r') as file:
        content = file.read()
        content = content.split('\n')
        greater_than_count = content.count('>')
        if greater_than_count > 0:
            time = [list(np.arange(0, 25.6, 0.1))]
            momentrate = [[]]
            for i in range(greater_than_count-1):
                time.append(list(np.arange(0, 25.6, 0.1)))
                momentrate.append([])


        stf_count = 0
        for c in content:
            if c not in ['<', '>', '']:
                split = c.split()
                #time[stf_count].append(float(split[0]))
                momentrate[stf_count].append(10**float(split[1]))
            else:
                stf_count += 1

    # time = np.arange(0, 25.6, 0.1)
    # time = np.array(time)
    return momentrate, time

In [6]:
def get_isc_stf(isc_name):
    isc_save_path = '/home/earthquakes1/homes/Rebecca/phd/stf/data/isc/'
    with open(f'{isc_save_path}{isc_name}/{isc_name}.txt', 'rb') as f:
        stf_list = pickle.load(f)
    with open(f'{isc_save_path}{isc_name}/{isc_name}_norm_info.txt', 'rb') as f:
        norm_dict = pickle.load(f)

    time = np.arange(0, 25.6, 0.1)
    momentrate = np.array(stf_list)*norm_dict['mo_norm']*10**8,
    #print(momentrate)
    return momentrate[0], time

In [7]:
combined = pd.read_csv('/home/earthquakes1/homes/Rebecca/phd/stf/data/combined_scardec_ye_usgs_sigloch_isc_mag.csv')

In [8]:
combined.columns = ['event', 'scardec', 'ye', 'isc', 'sigloch', 'usgs', 'mag']

In [9]:
def find_end_stf(momentrate, time, dataset = ''):
    not_zero = np.where(momentrate > 0)[0]
    #print(max(momentrate))
    start = min(not_zero)
    end = max(not_zero)

    detected_end = end
    detected_end_time = time[end]

    time = time[:end]
    momentrate = momentrate[:end]

    less_than_10 = np.where(momentrate <= 10*max(momentrate)/100)[0]

    if dataset == 'sigloch':
        start = np.where(momentrate > 0.05 * max(momentrate))[0][0]
    else:
        start = min(not_zero)
    #print(less_than_10)
    total_moment = scipy.integrate.simpson(momentrate[start:end],
                                        dx = time[1]-time[0])
    #print(less_than_10)
    for i in less_than_10:
        if i <= start:
            continue
        if i == 0:
            continue
        moment = scipy.integrate.simpson(momentrate[start:i],
                                        dx = time[1]-time[0])
        #print(i, moment/total_moment)
        if moment >= 0.5 * total_moment:
            #print('inif')
            #print(f'first time where < 10% of total momentrate and 50% of moment released: {time[i]} s')
            detected_end_time = time[i]
            detected_end = i
            #print(f'proportion of moment released: {(moment/total_moment)*100:.2f}%')
            break
    return detected_end_time, detected_end, time[start], start
    #return time[end], end

In [10]:
# looks for time value of root
def f3(end_time, total_moment, time_opt, momentrate_opt, start, points_before_zero, proportion = 0.1):
    dx = time_opt[1]-time_opt[0]
    end_window = (end_time/dx)+points_before_zero
    end = int(np.floor(end_window))
    if start == end:
        end += 1
    short = scipy.integrate.simpson(momentrate_opt[start:end], dx = dx)
    return short-(total_moment*proportion)

In [11]:
def moment_in_different_windows(window = None, window_prop = None, combined=None):
#def moment_in_different_windows(window = None, window_prop = None, combined=None):
    #window = 1
    #window_prop = None
    if combined is None:
        combined = pd.read_csv('/home/earthquakes1/homes/Rebecca/phd/stf/data/combined_scardec_ye_usgs_sigloch_isc_mag.csv')
        combined.columns = ['event', 'scardec', 'ye', 'isc', 'sigloch', 'usgs', 'mag']
    if window is None and window_prop is None:
        window_prop = 1

    simpson = []

    simpson_short = []

    durations = []

    magnitudes = []

    datasets = []

    names = []

    events = []
    
    to_ignore = ['20051203_1610_1', '20071226_2204_2', '20030122_0206_1', '20090929_1748_0', '20120421_0125_1', '20110311_2011_2']

    for i, row in combined.iterrows():

        for dataset, get_stf in zip(['scardec_opt', 'scardec_moy', 'ye', 'usgs', 'sigloch', 'isc'], [get_scardec_stf, get_scardec_stf, get_ye_stf, get_usgs_stf, get_sigloch_stf, get_isc_stf]):
        #for dataset, get_stf in zip(['sigloch'], [get_sigloch_stf]):

            if dataset == 'scardec_moy' or dataset == 'scardec_opt':
                name = row[dataset[:-4]]
            else:
                name = row[dataset]

            if name == '0' or name == 0:
                continue

            if dataset == 'scardec_moy':
                momentrate, time = get_stf(name, 'fctmoy')
            elif dataset == 'scardec_opt':
                momentrate, time = get_stf(name, 'fctopt')
            else:
                momentrate, time = get_stf(name)

            if dataset != 'sigloch':
                momentrate_list = [momentrate]
                time_list = [time]
            else:
                momentrate_list = momentrate
                time_list = time

            count = 0
            for momentrate, time in zip(momentrate_list, time_list):
                if time[0] == time[1]:
                    time = time[1:]
                
                if dataset != 'sigloch':
                    save_key = row.event
                    dataset_name = dataset
                else:
                    dataset_name = dataset + '_' + str(count)
                    save_key = row.event + '_' + str(count)

                if save_key in to_ignore:
                    continue
                
                momentrate = np.array(momentrate)

                time = np.array(time)
                detected_end_time, detected_end, detected_start_time, detected_start = find_end_stf(momentrate, time, dataset)
                time = time[detected_start:detected_end] # shift to start STF at zero
                
                start = 0
                #end = len(momentrate)
                duration = time[-1] - time[0]
                #durations.append(duration)
                momentrate = momentrate[detected_start:detected_end]
                start = 0
                #end = len(momentrate)
                duration = time[-1] - time[0]
                durations.append(duration)
                end = len(momentrate)
                dx = time[1]-time[0]
                
                simpson.append(scipy.integrate.simpson(momentrate[start:end], dx = time[1]-time[0]))

                if window_prop is None: #using static time window
                    end_window = int(round((window/dx), 0))    #int((end-start)*(window/duration))
                else: #based on proportion of duration
                    end_window = int((duration)*window_prop)

                # print(duration, window, end_window, end, dx)
                # print(start, start+end_window)
                # print(dx * window)

                if window < dx:
                    ynew = np.interp(np.linspace(0, dx*2, window), time[0:2], momentrate[0:2])
                    simpson_short.append(scipy.integrate.simpson(ynew[0:1]))

                #if duration == end_window:
                
                else:
                    simpson_short.append(scipy.integrate.simpson(momentrate[start:start + end_window], dx = time[1]-time[0]))

                magnitudes.append(row.mag)

                datasets.append(dataset_name)
                names.append(name)
                events.append(row.event)
    return events, names, simpson, simpson_short, durations, magnitudes, datasets

In [12]:
events, names, simpson, simpson_short, durations, magnitudes, datasets = moment_in_different_windows(window = 1)

In [13]:
simpson = np.array(simpson)
simpson_short = np.array(simpson_short)
durations = np.array(durations)
magnitudes = np.array(magnitudes)


In [14]:
datasets_for_colors = []
for d in datasets:
	datasets_for_colors.append(d.split('_')[0])

In [15]:
db = pd.DataFrame({'event': events,
				   'name': names,
				   'magnitude': magnitudes, 
				   'simpson': simpson, 
				   'simpson_short': simpson_short, 
				   'dataset': datasets_for_colors})

In [16]:
db

Unnamed: 0,event,name,magnitude,simpson,simpson_short,dataset
0,20170818_0259,FCTs_20170818_025925_NORTH_OF_ASCENSION_ISLAND,6.591,9.240153e+18,1.169831e+17,scardec
1,20170818_0259,FCTs_20170818_025925_NORTH_OF_ASCENSION_ISLAND,6.591,9.906913e+18,9.409279e+16,scardec
2,20110126_1542,FCTs_20110126_154229_NORTHERN_SUMATRA__INDONESIA,6.114,2.067641e+18,2.577540e+16,scardec
3,20110126_1542,FCTs_20110126_154229_NORTHERN_SUMATRA__INDONESIA,6.114,1.956907e+18,1.345204e+15,scardec
4,20110126_1542,0706.2011.026.a,6.114,1.741252e+18,7.574988e+16,sigloch
...,...,...,...,...,...,...
11760,20230206_1024,20230206_102448.txt,,4.412318e+20,5.974812e+17,usgs
11761,20231202_1437,20231202_143704.txt,,2.924153e+20,1.270926e+17,usgs
11762,19971108_1002,19971108_100252.txt,,2.005195e+20,2.193716e+17,usgs
11763,20181220_1701,20181220_170155.txt,,1.169586e+20,2.681520e+17,usgs


In [17]:
unique_datasets = ['scardec', 'usgs', 'sigloch', 'ye', 'isc']
for dataset in unique_datasets:
	subset = db[db['dataset'] == dataset]
	subset = subset[subset['simpson'] > 10**16]
	subset = subset[subset['simpson_short'] > 0]
	if dataset != 'usgs':
		subset_no_nan = subset.dropna()
		subset = subset_no_nan[abs(subset_no_nan['magnitude']-2/3*(np.log10(subset_no_nan['simpson'])-9.1)) < 1]
	just_this_dataset = db[db['dataset'] == dataset]
	events_not_in_subset = just_this_dataset[~just_this_dataset['event'].isin(subset['event'])]
	combined.loc[combined['event'].isin(events_not_in_subset['event']), dataset] = 0
	

In [18]:
combined.to_csv('/home/earthquakes1/homes/Rebecca/phd/stf/data/combined_scardec_ye_usgs_sigloch_isc_mag.csv', index=False)

In [19]:
combined_old = pd.read_csv('/home/earthquakes1/homes/Rebecca/phd/stf/data/combined_scardec_ye_usgs_sigloch_isc_mag.csv')
combined_old.columns = ['event', 'scardec', 'ye', 'isc', 'sigloch', 'usgs', 'mag']

In [22]:
simpson[0:20]

array([9.24015296e+18, 9.90691313e+18, 2.06764064e+18, 1.95690672e+18,
       1.74125241e+18, 1.00985595e+18, 9.76981134e+17, 1.26560920e+18,
       1.33378169e+18, 2.18479541e+18, 2.18061190e+18, 3.54473325e+19,
       3.17616210e+19, 1.49480350e+19, 1.82377773e+14, 1.52383794e+10,
       2.51584462e+18, 2.47855446e+18, 1.52972598e+20, 1.44053923e+20])

In [26]:
db.iloc[0:20]

Unnamed: 0,event,name,magnitude,simpson,simpson_short,dataset
0,20170818_0259,FCTs_20170818_025925_NORTH_OF_ASCENSION_ISLAND,6.591,9.240153e+18,1.169831e+17,scardec
1,20170818_0259,FCTs_20170818_025925_NORTH_OF_ASCENSION_ISLAND,6.591,9.906913e+18,9.409279e+16,scardec
2,20110126_1542,FCTs_20110126_154229_NORTHERN_SUMATRA__INDONESIA,6.114,2.067641e+18,2.57754e+16,scardec
3,20110126_1542,FCTs_20110126_154229_NORTHERN_SUMATRA__INDONESIA,6.114,1.956907e+18,1345204000000000.0,scardec
4,20110126_1542,0706.2011.026.a,6.114,1.741252e+18,7.574988e+16,sigloch
5,19981010_1629,FCTs_19981010_162908_MINAHASSA_PENINSULA__SULA...,5.927,1.009856e+18,1.314002e+17,scardec
6,19981010_1629,FCTs_19981010_162908_MINAHASSA_PENINSULA__SULA...,5.927,9.769811e+17,6.207362e+16,scardec
7,19920730_0824,FCTs_19920730_082446_XIZANG,6.017,1.265609e+18,6.496706e+16,scardec
8,19920730_0824,FCTs_19920730_082446_XIZANG,6.017,1.333782e+18,72931620000000.0,scardec
9,19960327_2052,FCTs_19960327_205206_NEAR_COAST_OF_NICARAGUA,6.157,2.184795e+18,1.769802e+16,scardec


In [None]:
combined[event]