In [None]:
import os
import random
import warnings

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler

In [3]:
os.chdir('../src/raw_data')
warnings.filterwarnings('ignore')

In [4]:
label_df = pd.read_csv('ground_truth.csv', sep = ';')
label_df

Unnamed: 0,file,mark,recovery,drop
0,00e03657-8e1e-4c8c-a724-1d3c77b48510,"[0.0,235.9225,237.06666666666666,2076.06055555...","[[2419.9805555555554,2437.4241666666667],[3177...","[[3453.6875,3763.9605555555554]]"
1,00e4dba2-36d2-42b4-beb1-c55aed75f506,"[0.0,7979.234444444444,13284.465,19439.8005555...",[],"[[13284.465,19439.800555555557]]"
2,00f035b7-ad7a-4f30-9081-522a3c10805b,"[0.0,42.75,2438.3330555555553]",[],"[[0.0,42.75]]"
3,01a0c034-6afc-4e73-95fa-621f702a0b7d,"[0.0,491.98305555555555,1439.9830555555557,154...",[],"[[0.0,491.98305555555555]]"
4,01a530d3-6496-4515-9fbb-4f44e298fd29,"[0.0,1287.0341666666666,1288.0483333333334,156...",[],"[[4920.376666666667,6208.231666666667]]"
...,...,...,...,...
95,1dfaf03c-e297-4d92-a0bf-40b1a829391f,"[0.0,7.4,7.933055555555556,14.466666666666667,...",[],[]
96,1e149fbd-41c6-4779-b87d-c5dc17fbb4c0,"[0.0,635.3127777777778]",[],"[[0.0,635.3127777777778]]"
97,1e19b77c-8a0e-4749-a384-9c1e679035bf,"[0.0,82.16555555555556,216.66027777777776,229....",[],[]
98,1e4b4c18-1e32-45eb-917a-5760e33fbaca,"[0.0,1217.8258333333333,1223.6030555555556,125...","[[9541.77638888889,10288.5075]]","[[10339.343055555555,10739.613055555556],[1311..."


In [None]:
def plot_data(df: pd.DataFrame, indicators_df: pd.DataFrame, filename: str):
    events = indicators_df[indicators_df["file"] == filename][["recovery", "drop"]].values[0]
    recovery_intervals = eval(events[0]) if isinstance(events[0], str) else []
    drop_intervals = eval(events[1]) if isinstance(events[1], str) else []
    threshold = 0.8

    df["color"] = "blue"

    high_pressure_indices = df.index[df["pressure"] > threshold].tolist()

    if high_pressure_indices:
        for idx in high_pressure_indices:
            df.loc[idx, "color"] = "red"  
            next_indices = list(range(idx + 1, min(idx + 21, len(df))))  
            df.loc[next_indices, "color"] = "red"

    fig = px.scatter(df, x="time", y="pressure", title=f'Visualization of {filename}', color="color")

    fig.update_layout(
        xaxis_title="Time",
        yaxis_title="Pressure",
        template="plotly_white"
    )

    for start, end in recovery_intervals:
        fig.add_vrect(
            x0=start, x1=end, fillcolor="green", opacity=0.3, layer="below", line_width=0
        )
        fig.add_trace(go.Scatter(
            x=[start, end], 
            y=[df["pressure"].max(), df["pressure"].max()],
            mode="lines", 
            line=dict(color="green", width=4), 
            name="Recovery"
        ))

    for start, end in drop_intervals:
        fig.add_vrect(
            x0=start, x1=end, fillcolor="red", opacity=0.3, layer="below", line_width=0
        )
        fig.add_trace(go.Scatter(
            x=[start, end], 
            y=[df["pressure"].min(), df["pressure"].min()],
            mode="lines", 
            line=dict(color="red", width=4), 
            name="Drop"
        ))

    fig.add_hline(
        y=threshold,  
        line=dict(color="black", width=2, dash="dash"),
    )

    fig.update_layout(
        legend=dict(title="Legend", x=0.99, y=0.99, xanchor="right", yanchor="top")
    )

    fig.show()

In [91]:
idx = random.randint(0, 100)
filename = label_df['file'][idx]
scaler = MinMaxScaler()

if filename in os.listdir('train'):
    file_df = pd.read_csv(f'train/{filename}', sep = '\\s+', names = ['time', 'pressure'])

file_df['pressure'] = scaler.fit_transform(file_df[['time', 'pressure']])[:, 1]
plot_data(file_df, label_df[label_df['file'] == filename], filename)
print(idx, filename)

18 0b13b479-a0cf-4ecc-a7f1-02d9edcbd56c
