In [18]:
import os
import numpy as np
import pandas as pd

In [19]:
ground_truth_path = "../src/raw_data/ground_truth.csv"
train_dir = "../src/raw_data/train"
test_dir = "../src/raw_data/test"

gt_df = pd.read_csv(ground_truth_path)
annotated_files = gt_df["file"].tolist()

In [20]:
new_columns = [
    "min_pressure", "avg_pressure", "max_pressure",
    "min_difference_recovery", "max_difference_recovery",
    "min_difference_drop", "max_difference_drop"
]

gt_df[new_columns] = 0.0

def analyze_intervals(event_type, intervals, df):
    differences = []
    
    for start, end in intervals:
        event_df = df[(df["time"] >= start) & (df["time"] <= end)]
        if not event_df.empty:
            pressure_change = event_df["pressure"].iloc[-1] - event_df["pressure"].iloc[0]
            differences.append(abs(pressure_change) if event_type == "drop" else pressure_change)
    
    if differences:
        return min(differences), max(differences)
    return None, None

for idx, file in enumerate(annotated_files):
    file_path = os.path.join(train_dir, file)
    
    if not os.path.exists(file_path):
        file_path = os.path.join(test_dir, file)
        if not os.path.exists(file_path):
            continue
    
    df = pd.read_csv(file_path, sep="\\s+", header=None, names=["time", "pressure"], engine="python")
    
    min_pressure = df["pressure"].min()
    avg_pressure = df["pressure"].mean()
    max_pressure = df["pressure"].max()
    
    events = gt_df[gt_df["file"] == file][["recovery", "drop"]].values[0]
    recovery_intervals = eval(events[0]) if isinstance(events[0], str) else []
    drop_intervals = eval(events[1]) if isinstance(events[1], str) else []
    
    min_diff_recovery, max_diff_recovery = analyze_intervals("recovery", recovery_intervals, df)
    min_diff_drop, max_diff_drop = analyze_intervals("drop", drop_intervals, df)
    
    gt_df.loc[gt_df["file"] == file, new_columns] = [
        min_pressure, avg_pressure, max_pressure,
        min_diff_recovery, max_diff_recovery,
        min_diff_drop, max_diff_drop
    ]


In [21]:
gt_df = gt_df.fillna(0.0)
gt_df

Unnamed: 0,file,recovery,drop,min_pressure,avg_pressure,max_pressure,min_difference_recovery,max_difference_recovery,min_difference_drop,max_difference_drop
0,00e03657-8e1e-4c8c-a724-1d3c77b48510,"[[2420.9805555555554, 2438.4241666666667], [31...","[[3454.6875, 3764.9605555555554]]",78.161000,96.178186,168.924915,0.842020,45.507776,43.020432,43.020432
1,00e4dba2-36d2-42b4-beb1-c55aed75f506,[],"[[13285.465, 19439.800555555557]]",34.087519,41.875554,91.190003,0.000000,0.000000,34.842194,34.842194
2,00f035b7-ad7a-4f30-9081-522a3c10805b,[],"[[0.0, 42.75]]",36.936633,59.735903,279.535676,0.000000,0.000000,227.099341,227.099341
3,01a0c034-6afc-4e73-95fa-621f702a0b7d,[],"[[0.0, 491.98305555555555]]",71.051991,89.535937,152.147197,0.000000,0.000000,70.100558,70.100558
4,01a530d3-6496-4515-9fbb-4f44e298fd29,[],"[[4921.376666666667, 6209.231666666667]]",36.399626,54.579204,132.215660,0.000000,0.000000,58.070323,58.070323
...,...,...,...,...,...,...,...,...,...,...
95,1dfaf03c-e297-4d92-a0bf-40b1a829391f,[],[],142.357346,154.066963,181.557049,0.000000,0.000000,0.000000,0.000000
96,1e149fbd-41c6-4779-b87d-c5dc17fbb4c0,[],"[[0.0, 635.3127777777778]]",75.362475,83.645728,160.532492,0.000000,0.000000,85.102268,85.102268
97,1e19b77c-8a0e-4749-a384-9c1e679035bf,[],[],106.249800,136.863865,229.862162,0.000000,0.000000,0.000000,0.000000
98,1e4b4c18-1e32-45eb-917a-5760e33fbaca,"[[9541.77638888889, 10288.5075]]","[[10339.343055555555, 10739.613055555556], [13...",17.691097,62.917351,236.550942,186.899796,186.899796,125.520280,147.599640


In [22]:
gt_df.describe()

Unnamed: 0,min_pressure,avg_pressure,max_pressure,min_difference_recovery,max_difference_recovery,min_difference_drop,max_difference_drop
count,100.0,100.0,100.0,100.0,100.0,100.0,100.0
mean,52.607548,74.502172,173.401571,17.61481,20.547245,45.526771,49.208662
std,40.413953,43.880408,76.506256,43.978796,49.244652,56.466418,57.981041
min,6.068235,11.443732,24.390433,0.0,0.0,0.0,0.0
25%,26.53837,44.407921,119.368806,0.0,0.0,0.0,0.0
50%,37.293754,59.861411,164.670797,0.0,0.0,26.803325,33.917908
75%,71.442524,90.269859,215.346664,0.0,0.0,83.018786,89.025725
max,188.400956,206.70095,400.378974,189.822208,247.679606,258.838787,258.838787


In [23]:
gt_df.to_csv("../src/stats", index=False)