In [None]:
### load the sensor data 

import pandas as pd
import numpy as np

sensor_path = "../data/sensors/train_FD001.txt"

df = pd.read_csv(sensor_path, sep=" ", header=None)
df = df.dropna(axis=1)

print("Sensor data shape:", df.shape)

##feature extraction

WINDOW = 30
sensor_data = df.iloc[:, 2:]  # remove engine id & cycle

def extract_sensor_features(data, window):
    feats = []
    for i in range(len(data) - window):
        w = data.iloc[i:i+window]
        feats.append([
            w.mean().mean(),
            w.std().mean(),
            w.max().mean(),
            w.min().mean()
        ])
    return np.array(feats)

sensor_features = extract_sensor_features(sensor_data, WINDOW)

print("Sensor feature shape:", sensor_features.shape)
## sliding window 

import numpy as np

data = np.array([1, 2, 3, 4, 5, 6])
window_size = 3

sequences = create_sequences(data, window_size)

def create_sequences(data, window_size):
    sequences = []
    for i in range(len(data) - window_size):
        sequences.append(data[i:i+window_size])
    return np.array(sequences)

X_train = create_sequences(data, WINDOW_SIZE)

print(sequences)

## lstm autoencoder

import torch
import torch.nn as nn

# Your model
class LSTMAutoEncoder(nn.Module):
    def __init__(self, n_features, hidden_dim=64):
        super().__init__()

        self.encoder = nn.LSTM(
            input_size=n_features,
            hidden_size=hidden_dim,
            batch_first=True
        )

        self.decoder = nn.LSTM(
            input_size=hidden_dim,
            hidden_size=n_features,
            batch_first=True
        )

    def forward(self, x):
        _, (hidden, _) = self.encoder(x)

        # repeat hidden state across time steps
        hidden = hidden.repeat(x.size(1), 1, 1).permute(1, 0, 2)

        reconstructed, _ = self.decoder(hidden)
        return reconstructed


# -----------------------------
# CREATE INPUT
# -----------------------------
batch_size = 2
seq_len = 5
n_features = 3

x = torch.randn(batch_size, seq_len, n_features)

# -----------------------------
# RUN MODEL
# -----------------------------
model = LSTMAutoEncoder(n_features)
output = model(x)

# -----------------------------
# PRINT OUTPUT
# -----------------------------
print("Input shape:", x.shape)
print("Output shape:", output.shape)
print("Output tensor:\n", output)

import numpy as np
import torch
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split

train_seq, val_seq = train_test_split(train_seq, test_size=0.2, shuffle=False)


from sklearn.preprocessing import MinMaxScaler
from torch.utils.data import DataLoader, TensorDataset
df = pd.read_csv(
    "../data/sensors/train_FD001.txt",
    sep=" ",
    header=None
)

df = df.dropna(axis=1)
print("Raw data shape:", df.shape)
df.head()

sensor_cols = df.columns[2:]   
sensor_data = df[sensor_cols].values

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
sensor_data = scaler.fit_transform(sensor_data)

def create_sequences(data, window_size):
    sequences = []
    for i in range(len(data) - window_size):
        sequences.append(data[i:i+window_size])
    return np.array(sequences)

WINDOW_SIZE = 30

sequences = create_sequences(sensor_data, WINDOW_SIZE)
print("Sequences shape:", sequences.shape)

train_seq = torch.tensor(sequences, dtype=torch.float32)
print("Train tensor shape:", train_seq.shape)

n_features = train_seq.shape[2]
print("Number of sensor features:", n_features)

model = LSTMAutoEncoder(
    n_features=n_features,
    hidden_dim=64
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
train_seq = train_seq.to(device)

epochs = 10

for epoch in range(epochs):
    model.train()

    output = model(train_seq)
    loss = criterion(output, train_seq)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    print(f"Epoch [{epoch+1}/{epochs}] | Loss: {loss.item():.6f}")

    from sklearn.model_selection import train_test_split

    train_seq, val_seq = train_test_split(train_seq, test_size=0.2, shuffle=False)

    torch.save(model.state_dict(), "../models/sensor_lstm_ae.pth")
    import joblib
    joblib.dump(scaler, "../models/sensor_scaler.pkl")

    from torch.utils.data import TensorDataset, DataLoader
    import torch

    train_tensor = torch.tensor(X_train, dtype=torch.float32)

    dataset = TensorDataset(train_tensor)    # <-- REQUIRED

    train_loader = DataLoader(
    dataset,
    batch_size=64,
    shuffle=False
)

model.eval()
recon_errors = []

with torch.no_grad():
    for batch in train_loader:
        batch_seq = batch[0]
        output = model(batch_seq)

        mse = torch.mean((batch_seq - output) ** 2, dim=(1, 2))
        recon_errors.extend(mse.cpu().numpy())











