In [10]:
import pandas as pd
import numpy as np
import os
import warnings
import random
import plotly.express as px
import plotly.graph_objects as go

In [2]:
warnings.filterwarnings('ignore')
os.chdir('../src/raw_data')

In [26]:

label_df = pd.read_csv('ground_truth.csv')
label_df

Unnamed: 0,file,recovery,drop
0,00e03657-8e1e-4c8c-a724-1d3c77b48510,"[[2420.9805555555554, 2438.4241666666667], [31...","[[3454.6875, 3764.9605555555554]]"
1,00e4dba2-36d2-42b4-beb1-c55aed75f506,[],"[[13285.465, 19439.800555555557]]"
2,00f035b7-ad7a-4f30-9081-522a3c10805b,[],"[[0.0, 42.75]]"
3,01a0c034-6afc-4e73-95fa-621f702a0b7d,[],"[[0.0, 491.98305555555555]]"
4,01a530d3-6496-4515-9fbb-4f44e298fd29,[],"[[4921.376666666667, 6209.231666666667]]"
...,...,...,...
95,1dfaf03c-e297-4d92-a0bf-40b1a829391f,[],[]
96,1e149fbd-41c6-4779-b87d-c5dc17fbb4c0,[],"[[0.0, 635.3127777777778]]"
97,1e19b77c-8a0e-4749-a384-9c1e679035bf,[],[]
98,1e4b4c18-1e32-45eb-917a-5760e33fbaca,"[[9541.77638888889, 10288.5075]]","[[10339.343055555555, 10739.613055555556], [13..."


In [60]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [61]:
def plot_data(df: pd.DataFrame, indicators_df: pd.DataFrame, filename: str):
    events = indicators_df[indicators_df["file"] == filename][["recovery", "drop"]].values[0]
    recovery_intervals = eval(events[0]) if isinstance(events[0], str) else []
    drop_intervals = eval(events[1]) if isinstance(events[1], str) else []

    fig = px.line(df, x = 'time', y = 'pressure', title = f'Visualization of {filename}')
    fig.update_layout(xaxis_title="Time", yaxis_title="Pressure", template="plotly_white")

    for start, end in recovery_intervals:
        fig.add_vrect(x0=start, x1=end, fillcolor="green", opacity=0.3, layer="below", line_width=0, name="Recovery")
        fig.add_trace(go.Scatter(x=[start, end], y=[df["pressure"].max(), df["pressure"].max()],
                                    mode="lines", line=dict(color="green", width=4), name="Recovery (Bold)"))

    for start, end in drop_intervals:
        fig.add_vrect(x0=start, x1=end, fillcolor="red", opacity=0.3, layer="below", line_width=0, name="Drop")
        fig.add_trace(go.Scatter(x=[start, end], y=[df["pressure"].min(), df["pressure"].min()],
                                    mode="lines", line=dict(color="red", width=4), name="Drop (Bold)"))
    
    fig.update_layout(
        xaxis=dict(rangeslider=dict(visible=True), type="linear"),
        yaxis=dict(fixedrange=False),
        legend=dict(title="Legend", x=0.99, y=0.99, xanchor="right", yanchor="top")
        )
    
    fig.show()

### Vanilla Train Data

In [62]:
import ast

idx = random.randint(0, 100)
filename = label_df['file'][idx]

if filename in os.listdir('train'):
    file_df = pd.read_csv(f'train/{filename}', sep = '\\s+', names = ['time', 'pressure'])
plot_data(file_df, label_df[label_df['file'] == filename], filename)
print(filename)

0d4db2c6-1795-4e37-bb05-f67f238bb483


## Train data with moving average. Window size const = 30

In [63]:
convolved = file_df
convolved['pressure'] = np.convolve(file_df['pressure'], np.ones(30) / 30, 'same')
plot_data(convolved, label_df[label_df['file'] == filename], filename)
print(filename)

0d4db2c6-1795-4e37-bb05-f67f238bb483


In [64]:
from scipy.ndimage import uniform_filter1d
uniformed = file_df
uniformed['pressure'] =  uniform_filter1d(file_df['pressure'], size = 30)
plot_data(uniformed, label_df[label_df['file'] == filename], filename)
print(filename)

0d4db2c6-1795-4e37-bb05-f67f238bb483


## Kalman`f filter

In [None]:
from pykalman import KalmanFilter


def kalman_filter(pressure):
    kf = KalmanFilter(initial_state_mean = 0, n_dim_obs = 1)
    kf = kf.em(pressure, n_iter = 100) # too much iteratios it should be various
    filtered, _ = kf.filter(pressure)
    return filtered.flatten()


kalmaned = file_df
kalmaned['pressure'] = kalman_filter(kalmaned['pressure'])
plot_data(kalmaned, label_df[label_df['file'] == filename], filename)
print(filename)


0d4db2c6-1795-4e37-bb05-f67f238bb483


## Fourier filter

In [None]:
from scipy.fft import fft, ifft 


def fourier_filter(pressure, threshold = 0.1): # need to resolve with threshold
    fft_vals = fft(pressure)
    fft_freq = np.fft.fftfreq(len(pressure))
    fft_vals[np.abs(fft_freq) > threshold] = 0
    return np.real(ifft(fft_vals))



fourierd = file_df
fourierd['pressure'] = fourier_filter(fourierd['pressure'])
plot_data(fourierd, label_df[label_df['file'] == filename], filename)
print(filename)

0d4db2c6-1795-4e37-bb05-f67f238bb483
