### Notebook untuk evaluasi
jalankan notebook ini dengan kernel python versi 3.6 dan install EQTransformer

In [None]:
%pip install EQTransformer

### Contoh Data

download folder datasets di google drive : <br>
<br>
folder datasets berisi data sebagai berikut :
- data seismic waveform 20 hz sebelum di upsample untuk channel BH
- data waveform 100hz setelah men-upsample data 20hz di point pertama
- model EQTransformer yang sudah di train (pre-trained)
- metadata csv

In [None]:
import h5py

path = r"datasets/100hz/waveform.hdf5"

with h5py.File(path, "a") as f:
    g = f["data"]
    last_key = list(g.keys())[0] 
    dset = g[last_key]

    print("Last key:", last_key)
    print("Shape:", dset.shape)
    print("Dtype:", dset.dtype)

    print("\nAttributes:")
    for k, v in dset.attrs.items():
        print(f"{k}: {v}")
    print(dset[...])


Last key: 109C.TA_20060723155859_EV
Shape: (6000, 3)
Dtype: float32

Attributes:
back_azimuth_deg: 159.3
coda_end_sample: [[2895]]
network_code: TA
p_arrival_sample: 700.0
p_status: manual
p_travel_sec: 17.079999923706055
p_weight: 0.5
receiver_code: 109C
receiver_elevation_m: 150.0
receiver_latitude: 32.8889
receiver_longitude: -117.1051
receiver_type: BH
s_arrival_sample: 1894.0
s_status: manual
s_weight: 0.5
snr_db: [56.79999924 55.40000153 47.40000153]
source_depth_km: 0.45
source_depth_uncertainty_km: 
source_distance_deg: 0.92
source_distance_km: 102.09
source_error_sec: 1.1119
source_gap_deg: 107.466
source_horizontal_uncertainty_km: 4.6403
source_id: 8556349
source_latitude: 33.7496
source_longitude: -117.4938
source_magnitude: 3.6
source_magnitude_author: 
source_magnitude_type: ml
source_mechanism_strike_dip_rake: 
source_origin_time: 2006-07-23 15:58:50.88
source_origin_uncertainty_sec: 0.47
trace_category: earthquake_local
trace_start_time: 2006-07-23 15:59:00.960000
[[ 7.2

### Evaluasi Model Versi Konservatif

In [None]:
from EQTransformer.core.tester import tester

tester(
    input_hdf5='datasets/100hz/waveform.hdf5', 
    input_testset='datasets/trace_name.npy', 
    input_model='artifacts/EqT_model_conservative.h5', 
    output_name='conservative_tester', 
    detection_threshold=0.20,
    P_threshold=0.1, 
    S_threshold=0.1, 
    number_of_plots=3,
    estimate_uncertainty=True,
    number_of_sampling=2, 
    input_dimention=(6000, 3), 
    normalization_mode='std', 
    mode='generator', 
    batch_size=100,
    gpuid=None, 
    gpu_limit=None)

Using TensorFlow backend.


Loading the model ...
Loading is complete!
Testing ...
Writting results into: " conservative_tester_outputs "


100%|██████████| 2981/2981 [1:06:54<00:00,  1.35s/it]


### Hasil Evaluasi model versi konservatif

In [2]:
import pandas as pd

df = pd.read_csv("conservative_tester_outputs/X_test_results.csv")

cols_numeric = [
    "P_error", "S_error",
    "detection_probability",
    "P_probability", "S_probability",
]
for col in cols_numeric:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

p_stats = df["P_error"].dropna().describe()
print("\nP-PICK ERROR:")
print(p_stats)

s_stats = df["S_error"].dropna().describe()
print("\nS-PICK ERROR:")
print(s_stats)

df["is_event_true"] = df["trace_category"].str.contains("earthquake", na=False)

threshold = 0.20
df["is_event_pred"] = df["detection_probability"] > threshold

accuracy = (df["is_event_true"] == df["is_event_pred"]).mean()

print("\nDETECTION ACCURACY:")
print(f"Accuracy (threshold {threshold}): {accuracy:.4f}")

cm = pd.crosstab(df["is_event_true"], df["is_event_pred"],
                 rownames=["True"], colnames=["Predicted"])
print("\n=== CONFUSION MATRIX ===")
print(cm)



P-PICK ERROR:
count    257133.000000
mean          2.117783
std          19.452874
min        -870.000000
25%           0.000000
50%           0.000000
75%           2.000000
max         558.000000
Name: P_error, dtype: float64

S-PICK ERROR:
count    256905.000000
mean          1.627255
std          38.605039
min       -4011.000000
25%          -5.000000
50%           1.000000
75%           8.000000
max        3538.000000
Name: S_error, dtype: float64

DETECTION ACCURACY:
Accuracy (threshold 0.2): 0.9991

=== CONFUSION MATRIX ===
Predicted  False   True 
True                    
False      39759     208
True          48  258008


### Evaluasi model versi original

In [None]:
from EQTransformer.core.tester import tester

tester(
    input_hdf5='datasets/100hz/waveform.hdf5', 
    input_testset='datasets/trace_name.npy', 
    input_model='artifacts/EqT_model_original.h5', 
    output_name='original_tester', 
    detection_threshold=0.20,
    P_threshold=0.1, 
    S_threshold=0.1, 
    number_of_plots=3,
    estimate_uncertainty=True,
    number_of_sampling=2, 
    input_dimention=(6000, 3), 
    normalization_mode='std', 
    mode='generator', 
    batch_size=10,
    gpuid=None, 
    gpu_limit=None)

Using TensorFlow backend.


Loading the model ...
Loading is complete!
Testing ...
Writting results into: " original_tester_outputs "


100%|██████████| 29803/29803 [2:03:20<00:00,  4.03it/s]  


### Hasil Evaluasi model versi original

In [3]:
import pandas as pd

df = pd.read_csv("original_tester_outputs/X_test_results.csv")

cols_numeric = [
    "P_error", "S_error",
    "detection_probability",
    "P_probability", "S_probability",
]
for col in cols_numeric:
    if col in df.columns:
        df[col] = pd.to_numeric(df[col], errors="coerce")

p_stats = df["P_error"].dropna().describe()
print("\nP-PICK ERROR:")
print(p_stats)

s_stats = df["S_error"].dropna().describe()
print("\nS-PICK ERROR:")
print(s_stats)

df["is_event_true"] = df["trace_category"].str.contains("earthquake", na=False)

threshold = 0.2
df["is_event_pred"] = df["detection_probability"] > threshold

accuracy = (df["is_event_true"] == df["is_event_pred"]).mean()

print("\nDETECTION ACCURACY:")
print(f"Accuracy (threshold {threshold}): {accuracy:.4f}")

cm = pd.crosstab(df["is_event_true"], df["is_event_pred"],
                 rownames=["True"], colnames=["Predicted"])
print("\n=== CONFUSION MATRIX ===")
print(cm)



P-PICK ERROR:
count    256557.000000
mean          1.659245
std          21.689313
min       -1300.000000
25%           0.000000
50%           0.000000
75%           0.000000
max         584.000000
Name: P_error, dtype: float64

S-PICK ERROR:
count    256289.000000
mean          1.305198
std          41.371162
min       -3759.000000
25%          -5.000000
50%           2.000000
75%           8.000000
max        2849.000000
Name: S_error, dtype: float64

DETECTION ACCURACY:
Accuracy (threshold 0.2): 0.9061

=== CONFUSION MATRIX ===
Predicted  False   True 
True                    
False      11982   27985
True           1  258055
