In [1]:
import os
import pandas as pd
import numpy as np
import plotly.express as px

In [46]:
label_df = pd.read_csv('src/raw_data/ground_truth.csv', index_col = False)
label_df = label_df[['file', 'recovery']]
label_df = label_df[label_df['recovery'] != '[]'].reset_index(drop = True)
label_df

Unnamed: 0,file,recovery
0,00e03657-8e1e-4c8c-a724-1d3c77b48510,"[[2420.9805555555554, 2438.4241666666667], [31..."
1,0a269af2-4455-4897-8a01-308ff137b15b,"[[10045.820833333333, 10165.977777777778]]"
2,0a9816bb-2a0f-44fa-82ad-cdb6e0d0eeaf,"[[1172.6930555555555, 1173.3916666666667], [12..."
3,0ad8ecc0-2995-4ee0-832c-5b5b4fa76f36,"[[2329.0, 3457.0]]"
4,0ae3634c-2236-42bc-94c4-571ca1b37673,"[[2305.0, 2953.0]]"
5,0bb24bd2-b325-40ae-8de6-a60782494f72,"[[2248.9280555555556, 2340.244166666667]]"
6,0c0f4c97-aedd-42e6-9a3c-b5f628833c86,"[[497.48305555555555, 671.8]]"
7,0c31a588-734e-4641-b48c-cd46604c47a2,"[[2329.0, 3457.0]]"
8,0c7cfac7-5aac-457c-9d7b-c4b87685cb3f,"[[234.14305555555555, 1341.3944444444444]]"
9,0cf09d9d-2504-4989-ad68-62d41d151eff,"[[16.816666666666666, 28.825], [28.825, 53.033..."


In [107]:
import ast 

def extract_recovery(recovery: str):
    data = np.array(ast.literal_eval(recovery))
    if data.size == 2:
        data = data.reshape(2)
    
    return data

In [150]:
def recognize_log(x_l, y_l, x_r, y_r, df):
    if x_l <= 0 or x_r <= 0:
        return

    a = (y_r - y_l) / (np.log(x_r) - np.log(x_l))
    b = y_l - a * np.log(x_l)

    mask = (df['time'] > x_l) & (df['time'] < x_r)
    intermediate_times = df.loc[mask, 'time']
    intermediate_pressures = df.loc[mask, 'pressure']

    if len(intermediate_times) == 0:
        return

    predicted_pressures = a * np.log(intermediate_times) + b

    ss_total = np.sum((intermediate_pressures - np.mean(intermediate_pressures))**2)
    ss_residual = np.sum((intermediate_pressures - predicted_pressures)**2)
    r2 = 1 - (ss_residual / ss_total) if ss_total != 0 else 0

    print(f"Bounds: ({x_l}, {y_l}) - ({x_r}, {y_r})")
    print(f"Log model: y = {a:.4f} ln(x) + {b:.4f}")
    print(f"R2: {r2:.4f}")
    print("-" * 30)

In [151]:
def recognize_limits(filename: str, recovery: list):
    if filename in ['1c9db047-e335-46ac-8039-effd8589b25b', '1cbce6e5-9f0b-419f-9527-7add4e255217']:
        return

    df = pd.read_csv(f'src/train_reduced/{filename}', sep='\\s+')

    if recovery.size == 2:
        recovery_l, recovery_r = recovery[0], recovery[1]
        pressure_l = min(df[df['time'] >= recovery_l]['pressure'])
        pressure_r = min(df[df['time'] <= recovery_r]['pressure'])
        recognize_log(recovery_l, pressure_l, recovery_r, pressure_r, df)

    elif recovery.size > 2:
        for i in range(len(recovery)):
            recovery_l, recovery_r = recovery[i][0], recovery[i][1]
            pressure_l = min(df[df['time'] >= recovery_l]['pressure'])
            pressure_r = min(df[df['time'] <= recovery_r]['pressure'])
            recognize_log(recovery_l, pressure_l, recovery_r, pressure_r, df)

In [152]:
for idx in range(23):
    recovery = extract_recovery(label_df['recovery'][idx])
    recognize_limits(label_df['file'][idx], recovery)

Bounds: (2420.9805555555554, 78.712668) - (2438.4241666666667, 78.238427)
Log model: y = -66.0562 ln(x) + 593.4182
R2: -26.7793
------------------------------
Bounds: (3178.7994444444444, 78.712668) - (3454.6875, 78.238427)
Log model: y = -5.6981 ln(x) + 124.6633
R2: -39.6469
------------------------------
Bounds: (3764.9605555555554, 78.712668) - (3771.1033333333335, 78.238427)
Log model: y = -290.9034 ln(x) + 2473.8635
R2: -274.6318
------------------------------
Bounds: (10045.820833333333, 37.645149) - (10165.977777777778, 35.680339)
Log model: y = -165.2500 ln(x) + 1560.4096
R2: -9.7892
------------------------------
Bounds: (1172.6930555555555, 202.910941) - (1173.3916666666667, 203.291302)
Log model: y = 638.6666 ln(x) + -4310.5827
R2: -5.8683
------------------------------
Bounds: (1268.9944444444445, 203.141287) - (1439.7258333333334, 202.910941)
Log model: y = -1.8248 ln(x) + 216.1816
R2: -360.6110
------------------------------
Bounds: (2329.0, 44.493273) - (3457.0, 53.10703