In [49]:
import pandas as pd
import numpy as np
import os
import warnings
import torch 
import torch.nn as nn 
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm

In [53]:
os.getcwd()

'c:\\Users\\shara\\Projects\\2025_Siam-ML-Hack\\src\\raw_data'

In [54]:
device = "cuda" if torch.cuda.is_available() else "cpu"
torch.cuda.empty_cache()
torch.cuda.ipc_collect()
torch.cuda.set_per_process_memory_fraction(0.7, device = 0)
warnings.filterwarnings('ignore')
torch.manual_seed(52)
np.random.seed(52)

In [55]:
label_df = pd.read_csv('ground_truth.csv', sep = ';')
label_df

Unnamed: 0,file,mark,recovery,drop
0,00e03657-8e1e-4c8c-a724-1d3c77b48510,"[0.0,235.9225,237.06666666666666,2076.06055555...","[[2419.9805555555554,2437.4241666666667],[3177...","[[3453.6875,3763.9605555555554]]"
1,00e4dba2-36d2-42b4-beb1-c55aed75f506,"[0.0,7979.234444444444,13284.465,19439.8005555...",[],"[[13284.465,19439.800555555557]]"
2,00f035b7-ad7a-4f30-9081-522a3c10805b,"[0.0,42.75,2438.3330555555553]",[],"[[0.0,42.75]]"
3,01a0c034-6afc-4e73-95fa-621f702a0b7d,"[0.0,491.98305555555555,1439.9830555555557,154...",[],"[[0.0,491.98305555555555]]"
4,01a530d3-6496-4515-9fbb-4f44e298fd29,"[0.0,1287.0341666666666,1288.0483333333334,156...",[],"[[4920.376666666667,6208.231666666667]]"
...,...,...,...,...
95,1dfaf03c-e297-4d92-a0bf-40b1a829391f,"[0.0,7.4,7.933055555555556,14.466666666666667,...",[],[]
96,1e149fbd-41c6-4779-b87d-c5dc17fbb4c0,"[0.0,635.3127777777778]",[],"[[0.0,635.3127777777778]]"
97,1e19b77c-8a0e-4749-a384-9c1e679035bf,"[0.0,82.16555555555556,216.66027777777776,229....",[],[]
98,1e4b4c18-1e32-45eb-917a-5760e33fbaca,"[0.0,1217.8258333333333,1223.6030555555556,125...","[[9541.77638888889,10288.5075]]","[[10339.343055555555,10739.613055555556],[1311..."


In [56]:
SEQUENCE_LENGTH = 100  
BATCH_SIZE = 32
NOISE_FACTOR = 0.05
TRAIN_DIR = "../train_reduced/"
CHECKPOINT_PATH = '../../models/checkpoint.pth'

In [57]:
def load_raw_data(file_path: str, sequence_length = SEQUENCE_LENGTH, noise_factor = NOISE_FACTOR):
    df = pd.read_csv(file_path, sep="\\s+", names=["time", "pressure"])  
    
    if df.empty or "pressure" not in df:
        return torch.empty(0), torch.empty(0) 
    
    scaler = MinMaxScaler()
    df["pressure"] = scaler.fit_transform(df[["pressure"]]) 

    sequences, noisy_sequences = [], []
    
    for i in range(len(df) - sequence_length):
        seq = df["pressure"].iloc[i : i + sequence_length].values
        noisy_seq = seq + noise_factor * np.random.normal(0, 1, seq.shape) 
        
        sequences.append(seq)
        noisy_sequences.append(noisy_seq)

    return torch.tensor(noisy_sequences, dtype=torch.float32).unsqueeze(-1), torch.tensor(sequences, dtype=torch.float32).unsqueeze(-1)

In [58]:
class TorchTypeDataset(Dataset):
    def __init__(self, file_paths, sequence_length = SEQUENCE_LENGTH, noise_factor = NOISE_FACTOR):
        self.noisy_data, self.clean_data = [], []
        
        for file_path in file_paths:
            if os.path.exists(file_path): 
                noisy, clean = load_raw_data(file_path, sequence_length, noise_factor)

                if noisy.shape[0] > 1 and clean.shape[0] > 1:  
                    noisy = noisy.unsqueeze(-1) if noisy.dim() == 2 else noisy
                    clean = clean.unsqueeze(-1) if clean.dim() == 2 else clean

                    self.noisy_data.append(noisy)
                    self.clean_data.append(clean)

        self.noisy_data = torch.cat(self.noisy_data, dim=0)  
        self.clean_data = torch.cat(self.clean_data, dim=0)

    def __len__(self):
        return len(self.clean_data)

    def __getitem__(self, idx):
        return self.noisy_data[idx], self.clean_data[idx]

In [59]:
class CharbonnierLoss(nn.Module):
    def __init__(self, epsilon = 1e-3):
        super(CharbonnierLoss, self).__init__()
        self.epsilon = epsilon
    
    def forward(self, x, y):
        return torch.mean(torch.sqrt((x - y) ** 2 + self.epsilon ** 2))

class Attention(nn.Module):
    def __init__(self, dim):
        super(Attention, self).__init__()
        self.attn = nn.MultiheadAttention(embed_dim = dim, num_heads = 4, batch_first = True)
    
    def forward(self, x):
        attn_output, _ = self.attn(x, x, x)
        return attn_output

class DAE(nn.Module):
    def __init__(self, input_dim = 1, hidden_dim = 64, bottleneck_dim = 32):
        super(DAE, self).__init__()  
      
        self.conv1 = nn.Conv1d(in_channels = input_dim, out_channels = 32, kernel_size = 5, padding = 2)
        self.lstm1 = nn.LSTM(input_size = 32, hidden_size = hidden_dim, batch_first = True, bidirectional = False)
        self.attn = Attention(hidden_dim)
        self.bottleneck = nn.Linear(hidden_dim, bottleneck_dim)
        
        self.fc = nn.Linear(bottleneck_dim, hidden_dim)
        self.lstm2 = nn.LSTM(input_size = hidden_dim, hidden_size = hidden_dim, batch_first = True, bidirectional = False)
        self.conv2 = nn.Conv1d(in_channels = hidden_dim, out_channels = input_dim, kernel_size = 5, padding = 2)
    
    def forward(self, x):
        x = self.conv1(x.transpose(1, 2))
        x = F.relu(x).transpose(1, 2)
        x, _ = self.lstm1(x)
        x = self.attn(x)
        x = self.bottleneck(x)
        
        x = self.fc(x)
        x, _ = self.lstm2(x)
        x = self.conv2(x.transpose(1, 2)).transpose(1, 2)
        return x

In [69]:
def DAE_train(model, dataloader, epochs=50, lr=1e-3, device='cuda',
              save_path="../../models/dae_checkpoint.pth",
              final_save_path="../../models/dae_final.pth",
              patience=7):  # Добавили patience
    
    model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = CharbonnierLoss()

    best_loss = float("inf")  
    patience_counter = 0  # Счетчик для Early Stopping

    for epoch in range(epochs):
        total_loss = 0.0
        progress_bar = tqdm(dataloader, desc=f"Epoch {epoch + 1} / {epochs}") 
        
        for noisy, clean in progress_bar:
            noisy, clean = noisy.to(device), clean.to(device)
            optimizer.zero_grad()
            output = model(noisy)
            loss = criterion(output, clean)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            progress_bar.set_postfix(loss=loss.item())

        avg_loss = total_loss / len(dataloader)
        print(f"Epoch {epoch + 1} / {epochs}, Avg Loss: {avg_loss:.6f}")

        if avg_loss < best_loss:
            best_loss = avg_loss
            patience_counter = 0  # Сбрасываем счетчик, так как модель улучшилась
            torch.save(model.state_dict(), save_path)  
            print(f"Model checkpoint saved at {save_path} (Loss improved: {best_loss:.6f})")
        else:
            patience_counter += 1  # Увеличиваем счетчик, так как улучшения нет
            print(f"⚠ No improvement. Patience: {patience_counter}/{patience}")

        if patience_counter >= patience:
            print(f"⏹ Early Stopping triggered at epoch {epoch + 1}. Best loss: {best_loss:.6f}")
            break  # Останавливаем обучение

    torch.save(model.state_dict(), final_save_path)
    print(f"Final model saved at {final_save_path}")

    return model

In [70]:
def run_inference(model, input_tensor, scaler, chunk_size=500, device='cuda'):
    model.to(device)
    input_tensor = input_tensor.to(device)
    denoised_chunks = []
    
    with torch.no_grad():
        for chunk in torch.split(input_tensor, chunk_size, dim=0):
            denoised_chunk = model(chunk)
            denoised_chunks.append(denoised_chunk.cpu())
    
    denoised_output = torch.cat(denoised_chunks, dim=0)
    return scaler.inverse_transform(denoised_output.numpy().squeeze())


In [71]:
file_paths = [os.path.join(TRAIN_DIR, file) for file in label_df['file'][:100] if file in os.listdir(TRAIN_DIR)]

dataset = TorchTypeDataset(file_paths)
dataloader = DataLoader(dataset, batch_size = BATCH_SIZE, shuffle = True)

In [72]:
for noisy_batch, clean_batch in dataloader:
    print("Noisy batch shape:", noisy_batch.shape, end = ' ')  
    print("Clean batch shape:", clean_batch.shape)
    break

Noisy batch shape: torch.Size([32, 100, 1]) Clean batch shape: torch.Size([32, 100, 1])


In [73]:
dae_model = DAE()

if os.path.exists(CHECKPOINT_PATH):
    dae_model.load_state_dict(torch.load(CHECKPOINT_PATH))
    print(f"Checkpoint loaded from {CHECKPOINT_PATH}")

trained_model = DAE_train(dae_model, dataloader, epochs = 50, lr=1e-3, device='cuda')

Epoch 1 / 50: 100%|██████████| 13539/13539 [01:47<00:00, 125.53it/s, loss=0.00647]


Epoch 1 / 50, Avg Loss: 0.012635
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.012635)


Epoch 2 / 50: 100%|██████████| 13539/13539 [01:51<00:00, 121.00it/s, loss=0.0109] 


Epoch 2 / 50, Avg Loss: 0.010472
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.010472)


Epoch 3 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.08it/s, loss=0.00771]


Epoch 3 / 50, Avg Loss: 0.010101
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.010101)


Epoch 4 / 50: 100%|██████████| 13539/13539 [01:48<00:00, 124.43it/s, loss=0.00934]


Epoch 4 / 50, Avg Loss: 0.009939
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009939)


Epoch 5 / 50: 100%|██████████| 13539/13539 [01:47<00:00, 125.76it/s, loss=0.0101] 


Epoch 5 / 50, Avg Loss: 0.009780
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009780)


Epoch 6 / 50: 100%|██████████| 13539/13539 [01:47<00:00, 125.60it/s, loss=0.00927]


Epoch 6 / 50, Avg Loss: 0.009708
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009708)


Epoch 7 / 50: 100%|██████████| 13539/13539 [01:48<00:00, 125.28it/s, loss=0.0101] 


Epoch 7 / 50, Avg Loss: 0.009631
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009631)


Epoch 8 / 50: 100%|██████████| 13539/13539 [01:48<00:00, 124.97it/s, loss=0.0111] 


Epoch 8 / 50, Avg Loss: 0.009580
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009580)


Epoch 9 / 50: 100%|██████████| 13539/13539 [01:48<00:00, 124.61it/s, loss=0.00778]


Epoch 9 / 50, Avg Loss: 0.009521
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009521)


Epoch 10 / 50: 100%|██████████| 13539/13539 [01:48<00:00, 124.77it/s, loss=0.00746]


Epoch 10 / 50, Avg Loss: 0.009485
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009485)


Epoch 11 / 50: 100%|██████████| 13539/13539 [01:48<00:00, 125.24it/s, loss=0.0104] 


Epoch 11 / 50, Avg Loss: 0.009448
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009448)


Epoch 12 / 50: 100%|██████████| 13539/13539 [01:47<00:00, 126.21it/s, loss=0.00823]


Epoch 12 / 50, Avg Loss: 0.009417
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009417)


Epoch 13 / 50: 100%|██████████| 13539/13539 [01:50<00:00, 122.55it/s, loss=0.0102] 


Epoch 13 / 50, Avg Loss: 0.009391
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009391)


Epoch 14 / 50: 100%|██████████| 13539/13539 [01:51<00:00, 121.53it/s, loss=0.0108] 


Epoch 14 / 50, Avg Loss: 0.009367
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009367)


Epoch 15 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.78it/s, loss=0.00963]


Epoch 15 / 50, Avg Loss: 0.009340
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009340)


Epoch 16 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.57it/s, loss=0.00898]


Epoch 16 / 50, Avg Loss: 0.009330
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009330)


Epoch 17 / 50: 100%|██████████| 13539/13539 [01:48<00:00, 124.93it/s, loss=0.00928]


Epoch 17 / 50, Avg Loss: 0.009313
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009313)


Epoch 18 / 50: 100%|██████████| 13539/13539 [01:48<00:00, 124.94it/s, loss=0.00657]


Epoch 18 / 50, Avg Loss: 0.009308
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009308)


Epoch 19 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 124.04it/s, loss=0.0108] 


Epoch 19 / 50, Avg Loss: 0.009293
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009293)


Epoch 20 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.61it/s, loss=0.00923]


Epoch 20 / 50, Avg Loss: 0.009283
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009283)


Epoch 21 / 50: 100%|██████████| 13539/13539 [01:51<00:00, 121.94it/s, loss=0.00812]


Epoch 21 / 50, Avg Loss: 0.009266
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009266)


Epoch 22 / 50: 100%|██████████| 13539/13539 [01:50<00:00, 122.15it/s, loss=0.00879]


Epoch 22 / 50, Avg Loss: 0.009259
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009259)


Epoch 23 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 124.14it/s, loss=0.0118] 


Epoch 23 / 50, Avg Loss: 0.009246
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009246)


Epoch 24 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.20it/s, loss=0.00849]


Epoch 24 / 50, Avg Loss: 0.009223
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009223)


Epoch 25 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.74it/s, loss=0.00823]


Epoch 25 / 50, Avg Loss: 0.009216
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009216)


Epoch 26 / 50: 100%|██████████| 13539/13539 [01:50<00:00, 122.83it/s, loss=0.0101] 


Epoch 26 / 50, Avg Loss: 0.009202
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009202)


Epoch 27 / 50: 100%|██████████| 13539/13539 [01:50<00:00, 122.99it/s, loss=0.00959]


Epoch 27 / 50, Avg Loss: 0.009200
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009200)


Epoch 28 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.17it/s, loss=0.0127] 


Epoch 28 / 50, Avg Loss: 0.009191
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009191)


Epoch 29 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.37it/s, loss=0.0115] 


Epoch 29 / 50, Avg Loss: 0.009183
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009183)


Epoch 30 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.16it/s, loss=0.00857]


Epoch 30 / 50, Avg Loss: 0.009181
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009181)


Epoch 31 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.23it/s, loss=0.00765]


Epoch 31 / 50, Avg Loss: 0.009165
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009165)


Epoch 32 / 50: 100%|██████████| 13539/13539 [01:50<00:00, 122.90it/s, loss=0.00873]


Epoch 32 / 50, Avg Loss: 0.009166
⚠ No improvement. Patience: 1/7


Epoch 33 / 50: 100%|██████████| 13539/13539 [01:51<00:00, 121.93it/s, loss=0.00946]


Epoch 33 / 50, Avg Loss: 0.009159
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009159)


Epoch 34 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.71it/s, loss=0.00778]


Epoch 34 / 50, Avg Loss: 0.009150
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009150)


Epoch 35 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.60it/s, loss=0.0086] 


Epoch 35 / 50, Avg Loss: 0.009148
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009148)


Epoch 36 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.15it/s, loss=0.00816]


Epoch 36 / 50, Avg Loss: 0.009143
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009143)


Epoch 37 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.43it/s, loss=0.00737]


Epoch 37 / 50, Avg Loss: 0.009132
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009132)


Epoch 38 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.47it/s, loss=0.00709]


Epoch 38 / 50, Avg Loss: 0.009135
⚠ No improvement. Patience: 1/7


Epoch 39 / 50: 100%|██████████| 13539/13539 [01:48<00:00, 124.22it/s, loss=0.00788]


Epoch 39 / 50, Avg Loss: 0.009133
⚠ No improvement. Patience: 2/7


Epoch 40 / 50: 100%|██████████| 13539/13539 [01:48<00:00, 124.24it/s, loss=0.0109] 


Epoch 40 / 50, Avg Loss: 0.009123
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009123)


Epoch 41 / 50: 100%|██████████| 13539/13539 [01:53<00:00, 118.78it/s, loss=0.00898]


Epoch 41 / 50, Avg Loss: 0.009117
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009117)


Epoch 42 / 50: 100%|██████████| 13539/13539 [01:53<00:00, 119.74it/s, loss=0.0128] 


Epoch 42 / 50, Avg Loss: 0.009123
⚠ No improvement. Patience: 1/7


Epoch 43 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.80it/s, loss=0.011]  


Epoch 43 / 50, Avg Loss: 0.009105
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009105)


Epoch 44 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 123.88it/s, loss=0.0114] 


Epoch 44 / 50, Avg Loss: 0.009100
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009100)


Epoch 45 / 50: 100%|██████████| 13539/13539 [01:49<00:00, 124.02it/s, loss=0.00722]


Epoch 45 / 50, Avg Loss: 0.009086
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009086)


Epoch 46 / 50: 100%|██████████| 13539/13539 [01:55<00:00, 117.37it/s, loss=0.00855]


Epoch 46 / 50, Avg Loss: 0.009081
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009081)


Epoch 47 / 50: 100%|██████████| 13539/13539 [01:56<00:00, 115.72it/s, loss=0.00782]


Epoch 47 / 50, Avg Loss: 0.009076
Model checkpoint saved at ../../models/dae_checkpoint.pth (Loss improved: 0.009076)


Epoch 48 / 50: 100%|██████████| 13539/13539 [01:57<00:00, 115.60it/s, loss=0.0214]


Epoch 48 / 50, Avg Loss: 0.015985
⚠ No improvement. Patience: 1/7


Epoch 49 / 50: 100%|██████████| 13539/13539 [01:57<00:00, 115.04it/s, loss=0.0146]


Epoch 49 / 50, Avg Loss: 0.018229
⚠ No improvement. Patience: 2/7


Epoch 50 / 50: 100%|██████████| 13539/13539 [01:58<00:00, 114.59it/s, loss=0.0454]

Epoch 50 / 50, Avg Loss: 0.024982
⚠ No improvement. Patience: 3/7
Final model saved at ../../models/dae_final.pth





In [74]:
trained_model.eval()
trained_model.to(device)

def preprocess_file(file_path, sequence_length=100):
    df = pd.read_csv(file_path, sep="\\s+", names=["time", "pressure"])
    
    scaler = MinMaxScaler()
    df["pressure"] = scaler.fit_transform(df[["pressure"]])
    
    sequences = []
    for i in range(len(df) - sequence_length):
        seq = df["pressure"].iloc[i : i + sequence_length].values
        sequences.append(seq)

    input_tensor = torch.tensor(np.array(sequences), dtype=torch.float32).unsqueeze(-1)  # (batch, seq_len, 1)

    print(f"Размер `df`: {df.shape}")
    print(f"Размер `sequences`: {len(sequences)}")
    print(f"Размер `input_tensor`: {input_tensor.shape}")

    if "time" in df and len(df["time"]) >= len(sequences):
        time_values = df["time"][:len(sequences)].values
        return input_tensor, scaler, time_values
    else:
        return input_tensor, scaler



In [75]:
torch.cuda.empty_cache()
torch.cuda.ipc_collect()

test_file = "../raw_data/test/1cbce6e5-9f0b-419f-9527-7add4e255217" 
result = preprocess_file(test_file)

Размер `df`: (54215, 2)
Размер `sequences`: 54115
Размер `input_tensor`: torch.Size([54115, 100, 1])


In [76]:



if len(result) == 3:
    input_tensor, scaler, time_values = result
else:
    input_tensor, scaler = result
    time_values = None

input_tensor = input_tensor.to(device)

chunk_size = 500  
denoised_chunks = []

with torch.no_grad():
    
    for chunk in torch.split(input_tensor, chunk_size, dim=0):
        denoised_chunk = dae_model(chunk.to(device))  
        denoised_chunks.append(denoised_chunk.cpu()) 

denoised_output = torch.cat(denoised_chunks, dim=0)
denoised_data = scaler.inverse_transform(denoised_output.numpy().squeeze())

In [77]:
denoised_data

array([[45.084717, 46.894352, 44.368584, ..., 36.28862 , 35.009655,
        34.316067],
       [45.101864, 46.939762, 44.44655 , ..., 36.276417, 34.99842 ,
        34.30871 ],
       [45.119556, 46.975174, 44.491104, ..., 36.267742, 34.99256 ,
        34.305954],
       ...,
       [28.904837, 27.34679 , 26.473814, ..., 27.222586, 27.846796,
        29.233868],
       [28.891726, 27.341179, 26.460127, ..., 27.22533 , 27.851019,
        29.243263],
       [28.888218, 27.333359, 26.449112, ..., 27.226812, 27.84957 ,
        29.23902 ]], dtype=float32)

In [78]:
file_path = "../raw_data/test/1cbce6e5-9f0b-419f-9527-7add4e255217" 
orig_df = pd.read_csv(file_path, sep="\\s+", names=["time", "pressure"])
pred_df = orig_df
pred_df['pressure'] = denoised_data

ValueError: Length of values (54115) does not match length of index (54215)

In [80]:
first_100_tensor = torch.tensor(orig_df["pressure"].values[:100], dtype=torch.float32).unsqueeze(0).unsqueeze(-1).to(device)

with torch.no_grad():
    first_100_denoised = dae_model(first_100_tensor).cpu().numpy().squeeze()

# === Приводим к (100, 1) ===
first_100_denoised = first_100_denoised.reshape(-1, 1)

# === Приводим denoised_data к (N, 1) ===
denoised_data = denoised_data[:, -1]  # Берем последнее значение в каждом окне
denoised_data = denoised_data.reshape(-1, 1)

# === Объединяем очищенные данные ===
full_denoised = np.concatenate([first_100_denoised, denoised_data], axis=0)

# Теперь можно добавить в DataFrame
orig_df["denoised_pressure"] = full_denoised

print(f"Размер итогового `orig_df`: {orig_df.shape}")  # Должно быть (54215, 2)


Размер итогового `orig_df`: (54215, 3)


In [81]:
orig_df

Unnamed: 0,time,pressure,denoised_pressure
0,0.000000,45.093716,0.870010
1,0.396667,44.994029,1.127810
2,0.729722,44.994029,1.275829
3,1.148611,44.894341,1.275476
4,1.517500,44.894341,1.246966
...,...,...,...
54210,15767.052500,27.093852,29.230717
54211,15767.360278,27.093852,29.233156
54212,15767.461111,27.093852,29.233868
54213,15767.635556,27.093852,29.243263


In [82]:
import plotly.graph_objects as go

# === Создаем график для оригинального давления ===
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=orig_df["time"],
    y=orig_df["pressure"],
    mode='lines',
    name='Original Pressure',
    line=dict(color='blue')
))

# === Создаем график для очищенного давления ===
fig.add_trace(go.Scatter(
    x=orig_df["time"],
    y=orig_df["denoised_pressure"],
    mode='lines',
    name='Denoised Pressure',
    line=dict(color='red')
))

# === Настройки графика ===
fig.update_layout(
    title="Pressure vs Denoised Pressure Over Time",
    xaxis_title="Time",
    yaxis_title="Pressure",
    legend=dict(x=0, y=1),
)

# === Отображаем график ===
fig.show()

