# Preprocessing

### Préparation des colonnes pré existantes

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import kurtosis
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from torch.utils.data import TensorDataset, DataLoader
import torch


# ---------------------------
# CONFIG
# ---------------------------
file_path = "pf_2020-03-30_filtered_downsampled.csv"

time_col = "time"        # nom de la colonne datetime dans le csv
amp_col = "amplitude"           # si None, automatiquement détecté ou calculé
fs = 1/60                   # 1/60 Hz (1 minute)
win_min = 10         # window pour features (10 minutes)
step_minuts = 10        # step (ici non chevauché). mettre < win_min pour chevauchement
env_window = 200         # smoothing median window (200 valeurs)
min_rows_for_env = env_window
seq_length = 60       # longueur des séquences pour le modèle (en minutes)
batch_size = 64
device = "cuda" if torch.cuda.is_available() else "cpu"

# Classe mapping (modifiable)
# Par défaut : classes (0..4) :
# 0 => delay > 24h
# 1 => 16h < delay <= 24h
# 2 => 1h < delay <= 12h
# 3 => 0h < delay <= 1h
# 4 => delay <= 0h (en cours)
# NOTE: ceci est paramétrable ci-dessous
thresholds_hours = {
    "0": 0,
    "1": 1,
    "2": 12,
    "3": 16,
    "4": 24
}

# ---------------------------
# 1. Lecture, homogénéisation titre colonnes et colonnes temporelles
# ---------------------------
df = pd.read_csv(file_path)
df.columns = [c.strip().lower() for c in df.columns]
# ---------------------------
# Division de la colonne "time"
# ---------------------------
df[time_col] = pd.to_datetime(df[time_col], errors="coerce")
if df[time_col].isna().any():
    raise ValueError("Des valeurs non parsables dans la colonne time. Vérifier le format.")

# ajouter year/month/day/hour pour le modèle (toujours numériques)
df["year"]  = df[time_col].dt.year.astype(np.int16)
df["month"] = df[time_col].dt.month.astype(np.int8)
df["day"]   = df[time_col].dt.day.astype(np.int8)
df["hour"]  = df[time_col].dt.hour.astype(np.int8)
df["minute"]= df[time_col].dt.minute.astype(np.int8)
df["seconde"]= df[time_col].dt.second.astype(np.int8)

# NOUVEAU : Rendre la colonne temps tz-naive (sans information de fuseau horaire)
if df[time_col].dt.tz is not None:
    df[time_col] = df[time_col].dt.tz_localize(None)

# Rendre les variables start et end tz-naive si elles ont une information de fuseau horaire
start = pd.to_datetime('2020-04-02T08:20:00.000000Z', utc=True).tz_localize(None) #Date de début de l'éruption
end = pd.to_datetime('2020-04-06T09:30:00.000000Z', utc=True).tz_localize(None) #Date de fin de l'éruption

default_case = pd.NaT

# Définition des conditions de marquage pour le délai (Timedelta)
conditions = [
    (df[time_col] > end),                           # 1. APRÈS l'intervalle
    (df[time_col] >= start) & (df[time_col] <= end), # 2. PENDANT l'intervalle
    (df[time_col] < start)                           # 3. AVANT l'intervalle (CORRECTION : c'est la condition logique)
]
choices = [
    pd.NaT,          # 1. Si APRÈS -> NaN
    pd.Timedelta(0),  # 2. Si PENDANT -> 0
    start - df[time_col]      # 3. Si AVANT -> Délai avant l'éruption (Timedelta positif)
]

# Création de la colonne 'delai_eruption' (Timedelta)
df['delai_eruption'] = np.select(conditions, choices, default=pd.NaT)

# NOUVEAU : Conversion explicite en Timedelta avant d'utiliser .dt
df['delai_eruption'] = pd.to_timedelta(df['delai_eruption'])

# NOUVEAU : Calculer directement delai_hours (Float) à partir du Timedelta
# Cela convertit le Timedelta (même s'il est affiché en nanosecondes) en un nombre correct d'heures.
df['delai_hours'] = df['delai_eruption'].dt.total_seconds() / 3600.0
# Les NaT sont automatiquement convertis en NaN, ce qui est géré plus tard.

# ---------------------------
#Extraction du type de composante : horizontale ou verticale
# ---------------------------
def type_component(channel):
    c = str(channel).upper()

    # EHZ, SHZ, BHZ, HHZ → verticale
    if c.endswith("Z"):
        return "vertical"

    # HHE, HHN, EHE, etc → horizontale
    return "horizontal"

df["component_type"] = df["channel"].apply(type_component)

# Encodage numérique pour le modèle
df["component_flag"] = df["component_type"].map({
    "horizontal": 0,
    "vertical": 1
})




### Création et ajout des nouvelles features dans le dataframe

In [65]:
# ---------------------------
# 3. Fonctions features
# ---------------------------
def shannon_entropy(segment, bins=50):
    p, _ = np.histogram(segment, bins=bins, density=True)
    p = p[p > 0]
    if p.size == 0:
        return 0.0
    return -np.sum(p * np.log2(p))

def frequency_index_proxy(segment):
    # proxy amplitude-based for fs=1Hz
    med = np.median(np.abs(segment))
    high = segment[np.abs(segment) > med]
    low  = segment[np.abs(segment) <= med]
    E_high = np.sum(high**2)
    E_low  = np.sum(low**2)
    if E_low == 0:
        return np.nan
    return float(E_high) / float(E_low)

# ---------------------------
# 4. Sliding windows: calcul des features
#    WARNING: sur des millions de lignes, cette boucle est lente.
#    Pour des datasets massifs, vectoriser ou utiliser numba/parallel est recommandé.
# ---------------------------
signal = df[amp_col].values.astype(float)
time_values = df[time_col].values # NOUVEAU : Accès rapide aux horodatages
channel_values = df["channel"].values # NOUVEAU : Accès rapide aux valeurs de canal
component_type_values = df["component_type"].values # NOUVEAU
n = len(signal)
win = int(win_min)             # ex 60
step = int(step_minuts)
indices = range(0, n - win + 1, step)

feat_list = []
times_out = []

for i in indices:
    seg = signal[i:i+win]
    t_center = time_values[i + win - 1]  # CORRECTION: Utilise time_values pour l'efficacité
    cat_channel = channel_values[i + win - 1] # NOUVEAU: Valeur catégorielle
    cat_component = component_type_values[i + win - 1] # NOUVEAU: Valeur catégorielle
    SE = shannon_entropy(seg)
    K  = float(kurtosis(seg, fisher=True, bias=False))
    FI = frequency_index_proxy(seg)
    std = float(np.std(seg))
    mean = float(np.mean(seg))
    med = float(np.median(seg))
    per90 = float(np.percentile(seg,90))
    per10 = float(np.percentile(seg,10))
    tension = per90 - per10

    feat_list.append((t_center, cat_channel, cat_component, SE, K, FI, std, mean, med, per90, per10, tension))
    times_out.append(t_center)

# DataFrame features
df_feat = pd.DataFrame(feat_list, columns=[
    "time", "channel", "component_type", "SE","Kurtosis","FI","std","mean","median","per90","per10","tension"
])

df_feat = df_feat.set_index("time")

# NOUVEAU : Assurer que l'index de df_feat est tz-naive
if df_feat.index.tz is not None:
    df_feat.index = df_feat.index.tz_localize(None)

# conserver la colonne time dans le dataset final (sous forme datetime index + une colonne time si nécessaire)
df_feat[time_col] = df_feat.index

# ---------------------------
# 5. Enveloppe median smoothing (200 valeurs)
# ---------------------------
if len(df_feat) < env_window:
    # option : utiliser min_periods=1 pour avoir valeurs même si < env_window
    df_feat["SE_env"] = df_feat["SE"].rolling(env_window, min_periods=1).median()
    df_feat["FI_env"] = df_feat["FI"].rolling(env_window, min_periods=1).median()
    df_feat["Kurt_env"] = df_feat["Kurtosis"].rolling(env_window, min_periods=1).median()
    # idem pour autres stats si souhaité
    df_feat["std_env"] = df_feat["std"].rolling(env_window, min_periods=1).median()
else:
    df_feat["SE_env"] = df_feat["SE"].rolling(env_window).median()
    df_feat["FI_env"] = df_feat["FI"].rolling(env_window).median()
    df_feat["Kurt_env"] = df_feat["Kurtosis"].rolling(env_window).median()
    df_feat["std_env"] = df_feat["std"].rolling(env_window).median()

# ---------------------------
# 6. Joindre colonnes date/year/month/day/hour depuis la table d'origine
#    (pour que modèle ait ces features temporelles)
# ---------------------------
df_feat[time_col] = df_feat.index
df_feat["year"]  = df_feat.index.year.astype(np.int16)
df_feat["month"] = df_feat.index.month.astype(np.int8)
df_feat["day"]   = df_feat.index.day.astype(np.int8)
df_feat["hour"]  = df_feat.index.hour.astype(np.int8)
df_feat["minute"]= df_feat.index.minute.astype(np.int8)


# ---------------------------
# 7. Création du label à partir de 'delai_eruption' si présente dans df original
#    else lever erreur ou construire via table fournie
# ---------------------------
# On accepte différents formats : Timedelta / seconds / hours / NaN

# Si df original contient 'delai_eruption' (Timedelta) aligned on original time,
# il faut produire une colonne delai_eruption pour df_feat.
# Approche : si original df contient 'delai_eruption', faire forward fill / resample:
if "delai_eruption" in df.columns:
    # CORRECTION : Utiliser drop_duplicates pour gérer les index en double dans df temporaire
    df_temp = df[[time_col,"delai_eruption"]].drop_duplicates(subset=[time_col], keep='last').set_index(time_col)
    df_feat = df_feat.join(df_temp, how="left")
    # si delai_eruption est Timedelta, le garder ; sinon convertir en secondes si numeric
else:
    raise ValueError("Aucune colonne 'delai_eruption' trouvée dans le CSV d'origine. Fournir la table d'éruptions ou la colonne delai_eruption.")

# Normaliser le format de delai_eruption : convertir tout en float seconds


# ---------------------------
# 8. Mapping labels (paramétrable)
# ---------------------------
def label_from_delay_hours(h):
    # h: float hours or nan
    if pd.isna(h):
        return 0   # class 0 = "pas de risque (>24h)" par défaut pour NaN
    if h <= 0:
        return 4   # en cours
    if h <= 1:
        return 3   # <1h
    if h <= 12:
        return 2   # <12h
    if h <= 16:
        return 1   # >16h class (intermédiaire)
    # else h > 16 -> class 0 (>24h / no risk) ; ajuster si besoin
    return 0

# CORRECTION: Retrait de la fonction delai_to_hours et de son appel

if "delai_hours" in df.columns:
    # Joindre la colonne delai_hours (numérique)
    df_temp = df[[time_col,"delai_hours"]].drop_duplicates(subset=[time_col], keep='last').set_index(time_col)
    
    # Assurer l'alignement tz-naive
    if df_temp.index.tz is not None:
        df_temp.index = df_temp.index.tz_localize(None)

    df_feat = df_feat.join(df_temp, how="left")
    
    # Créer le label à partir de la colonne delai_hours fraîchement jointe
    df_feat['label'] = df_feat['delai_hours'].apply(label_from_delay_hours).astype(np.int8)

    # Retirer delai_eruption (optionnel, car non utilisé)
    df_feat.drop("delai_eruption", axis=1, errors='ignore', inplace=True)
else:
    raise ValueError("Aucune colonne 'delai_hours' trouvée dans le DataFrame.")

df_feat["label"] = df_feat["delai_hours"].apply(label_from_delay_hours).astype(np.int64)


### Verification de df_feat

In [66]:
# Nouvelle Cellule de Vérification (après In[2])
print("--- Inspection de df_feat ---")

# Afficher les 5 premières lignes et les types de données
print("\n[A] df_feat.head() et dtypes:")
print(df_feat.head())
print(df_feat.dtypes)

# Afficher la taille du DataFrame
print(f"\n[B] Taille de df_feat : {df_feat.shape[0]} lignes")

# Vérifier la présence de NaN (doit être minimal ou nul après le remplissage)
nan_count = df_feat.isna().sum()
print(f"\n[C] Nombre de NaN par colonne:\n{nan_count[nan_count > 0]}")

# Vérifier que l'index est tz-naive comme prévu
print(f"\n[D] Fuseau horaire de l'index : {df_feat.index.tz}")

--- Inspection de df_feat ---

[A] df_feat.head() et dtypes:
                    channel component_type        SE  Kurtosis         FI  \
time                                                                        
2020-03-30 00:01:30     HHE     horizontal  0.663807  0.476227  11.350228   
2020-03-30 00:03:10     HHE     horizontal  0.600959 -0.576031  11.629134   
2020-03-30 00:04:50     HHE     horizontal  0.620373 -1.066237   7.176878   
2020-03-30 00:06:30     HHE     horizontal  0.375461  0.950616  40.675349   
2020-03-30 00:08:10     HHE     horizontal  0.741771 -0.029760   5.175879   

                            std       mean      median       per90  \
time                                                                 
2020-03-30 00:01:30  123.100520 -37.565111  -43.303673   68.307201   
2020-03-30 00:03:10  161.805842 -55.421499  -51.182505  110.381080   
2020-03-30 00:04:50  170.512009  25.619871   30.244040  209.834037   
2020-03-30 00:06:30  261.956437   7.998256   12.9

### Vérification de la Distribution des Labels

In [67]:
# Nouvelle Cellule de Vérification (après In[2])
print("\n--- Distribution des Labels ---")
print(df_feat['label'].value_counts(normalize=True).sort_index())


--- Distribution des Labels ---
label
0    0.423106
1    0.023643
2    0.065018
3    0.005911
4    0.482323
Name: proportion, dtype: float64


### Encodage

In [68]:
# ---------------------------
# 9. Features finales et encodage
# ---------------------------
# Colonnes numériques à garder
numeric_features = ["SE","Kurtosis","FI","std","mean","median","per90","per10","tension",
                    "SE_env","FI_env","Kurt_env","std_env",
                    "year","month","day","hour","minute"]

# garder uniquement celles existantes
numeric_features = [c for c in numeric_features if c in df_feat.columns]

# Colonnes catégorielles à encoder (ex: station, channel, component)
categorical_candidates_raw = ["channel", "component_type", "station", "network"]
categorical_features_final = [c for c in categorical_candidates_raw if c in df_feat.columns]


# Build preprocess pipeline
numeric_transformer = StandardScaler()
categorical_transformer = OneHotEncoder(handle_unknown="ignore", sparse_output=False)

preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features_final)
    ],
    remainder="drop"
)

# Remplir NaN numériques par median avant scaler
df_feat[numeric_features] = df_feat[numeric_features].fillna(df_feat[numeric_features].median())

# Remplir catégoriques na par 'unk' dans la table d'origine (préventif)
# Correction : Retirer le remplissage dans df[c] car nous travaillons avec df_feat
# 
for c in categorical_features_final:
    df_feat[c] = df_feat[c].fillna("unk")
    # aligner au df_feat times: map nearest value from original df if needed
    # simple approach: forward fill in original df then join earlier would have done it.

# Fit transformer on entire dataset (ou training only, best practice: fit on train)
df_for_transform = df_feat[numeric_features + categorical_features_final]
X_combined = preprocessor.fit_transform(df_for_transform)

# final feature names count
n_features = X_combined.shape[1]


### Vérification de l'Encodage et du Scaling

In [69]:
# Nouvelle Cellule de Vérification (après In[3])
print("--- Inspection de la Matrice Encodée (X_combined) ---")

# Afficher la taille
print(f"\n[A] X_combined shape : {X_combined.shape}")
print(f"    (Nombre de features finales : {n_features})")

# Vérifier la standardisation des premières colonnes (numériques)
# Les premières colonnes correspondent aux features numériques standardisées.
if X_combined.shape[1] > 0:
    print("\n[B] Standardisation des premières colonnes (Numériques):")
    # Calcule la moyenne et l'écart-type des 5 premières colonnes
    print(f"    Moyenne de la 1ère colonne: {np.mean(X_combined[:, 0]):.4f}")
    print(f"    Écart-type de la 1ère colonne: {np.std(X_combined[:, 0]):.4f}")

    # Vérifiez que les valeurs des features one-hot (dernières colonnes) sont bien 0 ou 1
    # Le nombre de colonnes catégorielles dépend de vos données. Si vous en avez 10,
    # la 15e colonne devrait être une colonne One-Hot.
    print("\n[C] Valeurs des dernières colonnes (Encodage One-Hot):")
    # Affiche les 10 dernières colonnes (doit être 0 ou 1)
    print(f"    Valeurs uniques des 5 dernières colonnes: {np.unique(X_combined[:, -5:])}")

--- Inspection de la Matrice Encodée (X_combined) ---

[A] X_combined shape : (36544, 23)
    (Nombre de features finales : 23)

[B] Standardisation des premières colonnes (Numériques):
    Moyenne de la 1ère colonne: -0.0000
    Écart-type de la 1ère colonne: 1.0000

[C] Valeurs des dernières colonnes (Encodage One-Hot):
    Valeurs uniques des 5 dernières colonnes: [0. 1.]


### Création des séquences pour le modèle transformer et subdivision en train/test/val

In [70]:
# ---------------------------
# 10. Construction des sequences (sliding) pour le modèle
# ---------------------------
# On construit sequences non chevauchantes (ou chevauchantes selon step_sequence)
step_seq = 1  # si 1 -> séquences glissantes à chaque point; si seq_length -> non chevauchées
X = X_combined
y = df_feat["label"].values
T = len(X)

seqs = []
labels = []
times_seq = []

for i in range(0, T - seq_length, step_seq):
    seq = X[i:i+seq_length]
    # label associé au temps de fin de séquence (alignement)
    lab = y[i+seq_length-1]
    seqs.append(seq)
    labels.append(lab)
    times_seq.append(df_feat.index[i+seq_length-1])

X_seq = np.stack(seqs)            # shape (N_seq, seq_length, n_features)
y_seq = np.array(labels)          # shape (N_seq,)

# ---------------------------
# 11. Train/Val/Test split temporel
# ---------------------------
N = len(X_seq)
train_frac = 0.7
val_frac = 0.15
test_frac = 0.15

i_train_end = int(N * train_frac)
i_val_end   = int(N * (train_frac + val_frac))

X_train = X_seq[:i_train_end]
y_train = y_seq[:i_train_end]
X_val   = X_seq[i_train_end:i_val_end]
y_val   = y_seq[i_train_end:i_val_end]
X_test  = X_seq[i_val_end:]
y_test  = y_seq[i_val_end:]

# Convert to torch tensors
X_train_t = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_t = torch.tensor(y_train, dtype=torch.long).to(device)
X_val_t   = torch.tensor(X_val, dtype=torch.float32).to(device)
y_val_t   = torch.tensor(y_val, dtype=torch.long).to(device)
X_test_t  = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_t  = torch.tensor(y_test, dtype=torch.long).to(device)

train_ds = TensorDataset(X_train_t, y_train_t)
val_ds   = TensorDataset(X_val_t, y_val_t)
test_ds  = TensorDataset(X_test_t, y_test_t)

train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_ds, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

### Vérification des Séquences et du Split Train/Test

In [71]:
# Nouvelle Cellule de Vérification (après In[11])
print("--- Inspection des Séquences et Splits ---")

# Dimensions des séquences
print(f"\n[A] X_seq final shape (N_seq, seq_length, n_features): {X_seq.shape}")
print(f"    y_seq final shape (N_seq,): {y_seq.shape}")

# Dimensions des splits
print(f"\n[B] Taille des splits (N_seq):")
print(f"    Train: {X_train.shape[0]} séquences")
print(f"    Validation: {X_val.shape[0]} séquences")
print(f"    Test: {X_test.shape[0]} séquences")

# Vérification des dimensions des Tenseurs PyTorch
print(f"\n[C] Dimension des tenseurs PyTorch (Train): {X_train_t.shape}")

--- Inspection des Séquences et Splits ---

[A] X_seq final shape (N_seq, seq_length, n_features): (36484, 60, 23)
    y_seq final shape (N_seq,): (36484,)

[B] Taille des splits (N_seq):
    Train: 25538 séquences
    Validation: 5473 séquences
    Test: 5473 séquences

[C] Dimension des tenseurs PyTorch (Train): torch.Size([25538, 60, 23])


### Distribution des Labels dans les Splits

In [72]:
# Nouvelle Cellule de Vérification (après In[11])
def check_label_distribution(y_array, name):
    counts = pd.Series(y_array).value_counts(normalize=True).sort_index()
    print(f"\n[D] Distribution des labels dans {name}:")
    print(counts)

check_label_distribution(y_train, "Train")
check_label_distribution(y_val, "Validation")
check_label_distribution(y_test, "Test")


[D] Distribution des labels dans Train:
0    0.421764
1    0.022555
2    0.062025
3    0.005639
4    0.488018
Name: proportion, dtype: float64

[D] Distribution des labels dans Validation:
0    0.423168
1    0.026311
2    0.072355
3    0.006578
4    0.471588
Name: proportion, dtype: float64

[D] Distribution des labels dans Test:
0    0.423168
1    0.026311
2    0.072355
3    0.006578
4    0.471588
Name: proportion, dtype: float64


In [74]:
df_feat

Unnamed: 0_level_0,channel,component_type,SE,Kurtosis,FI,std,mean,median,per90,per10,...,FI_env,Kurt_env,std_env,year,month,day,hour,minute,delai_hours,label
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-03-30 00:01:30.000000,HHE,horizontal,0.663807,0.476227,1.135023e+01,123.100520,-37.565111,-43.303673,68.307201,-177.854938,...,10.472313,-0.263814,513.328879,2020,3,30,0,1,80.308333,0
2020-03-30 00:03:10.000000,HHE,horizontal,0.600959,-0.576031,1.162913e+01,161.805842,-55.421499,-51.182505,110.381080,-278.973581,...,10.472313,-0.263814,513.328879,2020,3,30,0,3,80.280556,0
2020-03-30 00:04:50.000000,HHE,horizontal,0.620373,-1.066237,7.176878e+00,170.512009,25.619871,30.244040,209.834037,-204.720460,...,10.472313,-0.263814,513.328879,2020,3,30,0,4,80.252778,0
2020-03-30 00:06:30.000000,HHE,horizontal,0.375461,0.950616,4.067535e+01,261.956437,7.998256,12.949737,314.964549,-252.770878,...,10.472313,-0.263814,513.328879,2020,3,30,0,6,80.225000,0
2020-03-30 00:08:10.000000,HHE,horizontal,0.741771,-0.029760,5.175879e+00,124.305987,75.616784,100.710240,186.933463,-113.865500,...,10.472313,-0.263814,513.328879,2020,3,30,0,8,80.197222,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-04-05 02:15:17.758394,HHZ,vertical,0.002718,1.707906,1.178977e+01,71581.711883,19683.932295,34605.385712,63261.252592,-67165.641491,...,9.825745,-0.432077,29508.382755,2020,4,5,2,15,0.000000,4
2020-04-05 02:16:57.758394,HHZ,vertical,0.005361,2.069695,1.017574e+01,37787.309504,-6033.397339,-17386.601734,39445.047021,-47197.606798,...,9.869511,-0.421334,29641.212506,2020,4,5,2,16,0.000000,4
2020-04-05 02:18:37.758394,HHZ,vertical,0.005513,0.656211,8.063585e+00,36084.339923,22580.198304,27449.901264,60784.683980,-17238.516472,...,9.869511,-0.408950,29641.212506,2020,4,5,2,18,0.000000,4
2020-04-05 02:20:17.758394,HHZ,vertical,0.005964,-1.390318,2.628291e+01,43513.139430,-26249.679116,-13320.731957,22325.215313,-86672.218653,...,9.910574,-0.421334,29641.212506,2020,4,5,2,20,0.000000,4
