In [1]:
!conda activate DSU-FIN
import polars as pl
import polars.selectors as cs
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm.notebook import trange, tqdm
from TCN_AE_model_3 import CustomDataset, encoder_decoder_tcn
import holoviews as hv
# from holoviews.operation.datashader import rasterize
import panel as pn
import joblib
from sklearn.preprocessing import StandardScaler
import joblib

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(f' Device: {torch.cuda.get_device_name(0)}. Now using: {device.type}', '\n',
      f'Torch Version: {torch.version.__version__}')

if not os.getcwd() == 'e:\\python_projects\\notebooks\\Deep learning\\FIN':
    os.chdir('e:\\python_projects\\notebooks\\Deep learning\\FIN')

 Device: NVIDIA GeForce RTX 4070 Ti. Now using: cuda 
 Torch Version: 2.7.1+cu128


In [3]:
BATCH_SIZE = 1

hour = 4 * 60
win_size_hours = 24
win_size = win_size_hours * hour

test_dataset = CustomDataset('tina_val_15s.parquet', win_size = win_size, stride = win_size, train = False)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=0)
model_test = encoder_decoder_tcn()
model_test = model_test
rescaler = joblib.load('tina_train_scaler_fit.joblib')
model_test.load_state_dict(torch.load(r'model states\TCN-AE_15s_SL1_m3.pth', weights_only=True))
y_reconstructed = []
model_test.eval()
with torch.no_grad():
    for x_batch, y_batch in tqdm(test_loader):
        x_batch = x_batch
        y_batch = y_batch
        output = model_test(x_batch)
        output = output.squeeze(dim=0).cpu().numpy()
        output = rescaler.inverse_transform(output)
        mini_frame = pl.DataFrame(output)
        y_reconstructed.append(mini_frame)

  0%|          | 0/88 [00:00<?, ?it/s]

In [4]:
reconstructed_df = pl.concat(y_reconstructed)
reconstructed_df.write_parquet(f'tina_TCN_reconstructed.parquet')

In [2]:
alarms = (pl.scan_parquet('tina_categorical_resampled.parquet')
          .select(['Timestamp', 'alarms_none'])
          .with_columns(pl.col('alarms_none').alias('alarm_active')
                        .replace_strict([0,1],[1,0])
                        # .cast(pl.Boolean)
                        )
          .drop('alarms_none')
         ).collect()
maintenence = (pl.scan_parquet('tina_categorical_resampled.parquet')
               .select(['Timestamp', 'm_id_none'])
               .with_columns(pl.col('m_id_none').alias('maintenance_active')
                            .replace_strict([0,1],[1,0])
                            # .cast(pl.Boolean)
                            )
                .drop('m_id_none')
               ).collect()
alarms_maintenance = alarms.join(maintenence, on="Timestamp")

In [3]:
reconstructed_df = pl.read_parquet('tina_TCN_reconstructed.parquet')
original_df = pl.read_parquet('tina_val_15s.parquet')
original_df = original_df.select(list(original_df.schema)[:104])
reconstructed_df.columns = original_df.drop('Timestamp').columns

original_df_adjusted = original_df.head(len(reconstructed_df))
reconstructed_df.insert_column(0, original_df_adjusted['Timestamp'])
reconstructed_df.join(alarms_maintenance, on="Timestamp")
original_df_adjusted = original_df_adjusted.join(alarms_maintenance, on="Timestamp")

In [13]:
all_feature_reconstruction_errors = {} 
for feature in list(reconstructed_df.schema)[1:]:
    current_feature_errors_series = (original_df_adjusted[feature] - reconstructed_df[feature]).pow(2)
    all_feature_reconstruction_errors[feature] = current_feature_errors_series.to_numpy()

thresholds_for_detection = {}
percentile_value = 99.9

for feature_name in list(reconstructed_df.schema)[1:]:
    if feature_name in all_feature_reconstruction_errors and len(all_feature_reconstruction_errors[feature_name]) > 0:
        errors_list = all_feature_reconstruction_errors[feature_name]
        threshold = np.percentile(errors_list, percentile_value)
        thresholds_for_detection[feature_name] = threshold
        print(f"Порог ({percentile_value}%) для {feature_name}: {threshold}")

Порог (99.9%) для mean_FEATURE0: 0.6587525010108948
Порог (99.9%) для mean_FEATURE1: 0.2750788927078247
Порог (99.9%) для mean_FEATURE2: 0.22118327021598816
Порог (99.9%) для mean_FEATURE3: 0.03698326647281647
Порог (99.9%) для mean_FEATURE4: 2.236074209213257
Порог (99.9%) для mean_FEATURE5: 1.3859519958496094
Порог (99.9%) для mean_FEATURE6: 1.6557111740112305
Порог (99.9%) для mean_FEATURE7: 0.51762455701828
Порог (99.9%) для mean_FEATURE8: 0.8475821018218994
Порог (99.9%) для mean_FEATURE9: 6.821884632110596
Порог (99.9%) для mean_FEATURE10: 26.414798736572266
Порог (99.9%) для mean_FEATURE11: 1.1644054651260376
Порог (99.9%) для mean_FEATURE12: 0.8414915204048157
Порог (99.9%) для mean_FEATURE13: 1.8693240880966187
Порог (99.9%) для mean_FEATURE14: 3.614064931869507
Порог (99.9%) для mean_FEATURE15: 2.418714761734009
Порог (99.9%) для mean_FEATURE16: 1.233705759048462
Порог (99.9%) для mean_FEATURE17: 1.0184953212738037
Порог (99.9%) для mean_FEATURE18: 0.6365823745727539
Порог (9

In [6]:
thresholds = {'mean_FEATURE0': np.float32(0.42150515),
 'mean_FEATURE1': np.float32(0.20319732),
 'mean_FEATURE2': np.float32(0.19615193),
 'mean_FEATURE3': np.float32(0.022218227),
 'mean_FEATURE4': np.float32(1.6782992),
 'mean_FEATURE5': np.float32(0.9842625),
 'mean_FEATURE6': np.float32(1.238128),
 'mean_FEATURE7': np.float32(0.3900614),
 'mean_FEATURE8': np.float32(0.8386512),
 'mean_FEATURE9': np.float32(4.7902703),
 'mean_FEATURE10': np.float32(25.071886),
 'mean_FEATURE11': np.float32(0.68052346),
 'mean_FEATURE12': np.float32(0.580897),
 'mean_FEATURE13': np.float32(1.0724338),
 'mean_FEATURE14': np.float32(3.0758564),
 'mean_FEATURE15': np.float32(2.3383162),
 'mean_FEATURE16': np.float32(1.0236775),
 'mean_FEATURE17': np.float32(0.77520007),
 'mean_FEATURE18': np.float32(0.44730085),
 'mean_FEATURE19': np.float32(3.2971532),
 'mean_FEATURE20': np.float32(12.365843),
 'mean_FEATURE21': np.float32(0.6211181),
 'mean_FEATURE22': np.float32(1.1344229),
 'mean_FEATURE23': np.float32(0.8430721),
 'mean_FEATURE24': np.float32(0.63013124),
 'mean_FEATURE25': np.float32(3.816558),
 'mean_FEATURE26': np.float32(2.782583),
 'mean_FEATURE27': np.float32(0.47515142),
 'mean_FEATURE28': np.float32(1.0020258),
 'mean_FEATURE29': np.float32(0.6994351),
 'mean_FEATURE30': np.float32(1.3310556),
 'mean_FEATURE31': np.float32(1.3016282),
 'mean_FEATURE32': np.float32(0.6494755),
 'mean_FEATURE33': np.float32(1.7902198),
 'mean_FEATURE34': np.float32(0.84186566),
 'mean_FEATURE35': np.float32(1.1881871),
 'mean_FEATURE36': np.float32(0.8173567),
 'mean_FEATURE37': np.float32(0.9609527),
 'mean_FEATURE38': np.float32(0.70542216),
 'mean_FEATURE39': np.float32(0.9593457),
 'mean_FEATURE40': np.float32(0.24280973),
 'mean_FEATURE41': np.float32(6.729699),
 'mean_FEATURE42': np.float32(65.7806),
 'mean_FEATURE43': np.float32(29.497036),
 'mean_FEATURE44': np.float32(73.27882),
 'mean_FEATURE45': np.float32(72.04436),
 'mean_FEATURE46': np.float32(0.12973951),
 'mean_FEATURE47': np.float32(39.81871),
 'mean_FEATURE48': np.float32(0.6874012),
 'mean_FEATURE49': np.float32(2.631487),
 'mean_FEATURE50': np.float32(8.569691),
 'mean_FEATURE51': np.float32(16.772371),
 'mean_FEATURE52': np.float32(4.852936),
 'mean_FEATURE53': np.float32(4.394761),
 'mean_FEATURE54': np.float32(1.4145783),
 'mean_FEATURE55': np.float32(1.4421047),
 'mean_FEATURE56': np.float32(1.7880461),
 'mean_FEATURE57': np.float32(1.4248106),
 'mean_FEATURE58': np.float32(1.2754639),
 'mean_FEATURE59': np.float32(0.5794825),
 'mean_FEATURE60': np.float32(0.6251785),
 'mean_FEATURE61': np.float32(0.46940377),
 'mean_FEATURE62': np.float32(6.2848806),
 'mean_FEATURE63': np.float32(10.612053),
 'mean_FEATURE64': np.float32(0.38072827),
 'mean_FEATURE65': np.float32(0.48745218),
 'mean_FEATURE66': np.float32(1.2376535),
 'mean_FEATURE67': np.float32(1.0539542),
 'mean_FEATURE68': np.float32(6.6541386),
 'mean_FEATURE69': np.float32(6.602502),
 'mean_FEATURE70': np.float32(0.064529434),
 'mean_FEATURE71': np.float32(3.0184615),
 'mean_FEATURE72': np.float32(5.8108697),
 'mean_FEATURE73': np.float32(1.5436399),
 'mean_FEATURE74': np.float32(1.5069355),
 'mean_FEATURE75': np.float32(1.4054474),
 'mean_FEATURE77': np.float32(13.545437),
 'mean_FEATURE78': np.float32(3.8061817),
 'mean_FEATURE79': np.float32(4.2149167),
 'mean_FEATURE80': np.float32(0.7022148),
 'mean_FEATURE81': np.float32(4.649055),
 'mean_FEATURE82': np.float32(3.7959857),
 'mean_FEATURE83': np.float32(0.64080125),
 'mean_FEATURE84': np.float32(0.34190345),
 'mean_FEATURE85': np.float32(5.2414594),
 'mean_FEATURE86': np.float32(9.276079),
 'mean_FEATURE88': np.float32(1.0083756),
 'mean_FEATURE89': np.float32(1.7892221),
 'mean_FEATURE90': np.float32(1.2886363),
 'mean_FEATURE91': np.float32(6.958557),
 'mean_FEATURE92': np.float32(6.505038),
 'mean_FEATURE93': np.float32(12.650433),
 'mean_FEATURE94': np.float32(1.8494279),
 'mean_FEATURE95': np.float32(2.8939722),
 'mean_FEATURE96': np.float32(2.5196197),
 'mean_FEATURE97': np.float32(4.460838),
 'mean_FEATURE98': np.float32(3.183247),
 'mean_FEATURE99': np.float32(6.1216874),
 'mean_FEATURE100': np.float32(7.7625403),
 'mean_FEATURE101': np.float32(26.114592),
 'mean_FEATURE102': np.float32(2.2858553),
 'mean_FEATURE103': np.float32(8.826485),
 'mean_FEATURE104': np.float32(1.4738482)}

In [5]:
def plot_error(true_df, reconstructed_df, feature, threshholds, save_path=False):
    hv.extension('bokeh')
    feature = feature
    thresh = threshholds[feature].item()
    orig = f'original {feature}'
    reconst = f'reconstructed {feature}'

    new_df = pl.DataFrame({'Timestamp': true_df['Timestamp'], 
                            orig: true_df[feature], 
                            reconst: reconstructed_df[feature],
                            'alarm_active': true_df['alarm_active'],
                            'maintenance_active': true_df['maintenance_active']                            
                            })
    
    new_df = new_df.with_columns(
         error=(pl.col(orig) - pl.col(reconst)).pow(2))
    new_df = new_df.with_columns(
        anomaly = (pl.col('error') >= thresh)
    )
    
    curve_true = hv.Curve((new_df['Timestamp'], new_df[orig]), 
                          label = orig).opts(color='green', alpha = 0.5)
    curve_reconstructed = hv.Curve((new_df['Timestamp'], new_df[reconst]), 
                                   label = reconst).opts(color='red', alpha = 0.5)
    curve_error = hv.Curve((new_df['Timestamp'], new_df['error']), 
                           label = f'Reconstruction squared error').opts(color='purple', alpha = 0.2)

    anomaly_timestamps = new_df.filter(pl.col('anomaly')).select('Timestamp')
    anomaly_lines = hv.VLines(anomaly_timestamps['Timestamp'].to_list(), label = 'anomaly').opts(
        color='purple',
        line_width=1.5,
        alpha = 0.3
        )
    
    max_error = new_df['error'].max()
    new_df = new_df.with_columns(
    scaled_maintenance_active=pl.when(pl.col('maintenance_active') == 1)
                                .then(max_error)
                                .otherwise(0)
    )
    maint = hv.Area(
    (new_df['Timestamp'], new_df['scaled_maintenance_active']), 
                    label='maintenance_active').opts(
    color='grey', alpha=0.2)
    maint_neg = hv.Area(
    (new_df['Timestamp'], -new_df['scaled_maintenance_active']), 
                    label='maintenance_active').opts(
    color='grey', alpha=0.2)

    overlay = (curve_true * curve_reconstructed * anomaly_lines * maint * maint_neg * curve_error).opts(width=1600, height=600, show_grid=True)

    if save_path:
        save_path = f'reconst errors\\{feature}_reconstruction_error'
        hv.save(overlay, save_path, fmt='html')
        hv.save(overlay, save_path, fmt='png')
        print(f'File saved at {save_path}')
    # overlay
    else:
        return overlay #combined_dashboard

    

In [None]:
feature = 'mean_FEATURE7'
plot_error(original_df_adjusted, reconstructed_df, feature, thresholds_for_detection, save_path=False)

In [None]:
for feature in list(reconstructed_df.schema)[1:]:
    plot_error(original_df_adjusted, reconstructed_df, feature, save_path=True)