# MSL z pyCLAD

## Dataset

### Załadowanie danych treningowych i testowych

In [9]:
import pandas as pd
import numpy as np

metadata = pd.read_csv("labeled_anomalies.csv")
msl_channels = metadata[metadata['spacecraft'] == 'MSL']['chan_id'].tolist()

msl_train_data = {}
for channel in msl_channels:
    try:
        data = np.load(f"data/train/{channel}.npy")
        msl_train_data[channel] = data
    except FileNotFoundError:
        print(f"{channel}: brak danych")
    
msl_test_data = {}
for channel in msl_channels:
    try:
        data = np.load(f"data/test/{channel}.npy")
        msl_test_data[channel] = data
    except FileNotFoundError:
        print(f"{channel}: brak danych")

del data
del channel

### Okna czasowe

In [10]:
import numpy as np

def create_window(data, window_size):
    return np.array([data[i:i+window_size].flatten() for i in range(len(data) - window_size + 1)])

### Stworzenie conceptu treningowego i testowego z okien czasowych

In [11]:
import ast
from pyclad.data.concept import Concept

window_size = 100
train_concepts = []
test_concepts = []

for channel in msl_train_data:
    # Tworzenie okien
    train_windows = create_window(msl_train_data[channel], window_size)
    test_windows = create_window(msl_test_data[channel], window_size)

    # Przygotowanie etykiet testowych
    labels = np.zeros(test_windows.shape[0], dtype=int)
    anomaly_rows = metadata[(metadata['chan_id'] == channel) & (metadata['spacecraft'] == 'MSL')]
    for _, row in anomaly_rows.iterrows():
        sequences_str = row['anomaly_sequences']
        if pd.notna(sequences_str):
            sequences = ast.literal_eval(sequences_str)
            for start, end in sequences:
                start_idx = max(0, start - window_size + 1)
                end_idx = min(labels.shape[0]-1, end)
                labels[start_idx:end_idx+1] = 1

    train_concepts.append(Concept(name=channel, data=train_windows))
    test_concepts.append(Concept(name=channel, data=test_windows, labels=labels))

del anomaly_rows, row, sequences_str, sequences, start, end, start_idx, end_idx, labels, channel, test_windows, train_windows

print(f"Liczba konceptów treningowych: {len(train_concepts)}")
print(f"Liczba konceptów testowych: {len(test_concepts)}")

Liczba konceptów treningowych: 27
Liczba konceptów testowych: 27


### Stworzenie datasetu

In [12]:
from pyclad.data.datasets.concepts_dataset import ConceptsDataset

dataset = ConceptsDataset(
    name="MSL_Dataset",
    train_concepts=train_concepts,
    test_concepts=test_concepts
)

## Model

In [13]:
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from pyclad.models.model import Model

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

class AE(nn.Module):
    def __init__(self, input_dim):
        super(AE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.Linear(256, 128)
        )
        self.decoder = nn.Sequential(
            nn.Linear(128, 256),
            nn.ReLU(),
            nn.Linear(256, input_dim),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))

class AEp(Model):
    def __init__(self, input_dim, lr=1e-3, threshold=0.9, epochs=80):
        super().__init__()
        self.window_size = window_size
        self.input_dim = input_dim
        self.module = AE(self.input_dim).to(device)
        self.lr = lr
        self.threshold = threshold
        self.epochs = epochs

    def fit(self, data: np.ndarray):
        dataset = TensorDataset(torch.Tensor(data))
        loader = DataLoader(dataset, batch_size=128, shuffle=True)
        optimizer = torch.optim.Adam(self.module.parameters(), lr=self.lr)
        loss_fn = nn.MSELoss()
        self.module.train()
        for epoch in range(self.epochs):
            for batch in loader:
                x = batch[0].to(device)
                optimizer.zero_grad()
                x_hat = self.module(x)
                loss = loss_fn(x_hat, x)
                loss.backward()
                optimizer.step()

    def predict(self, data: np.ndarray):
        self.module.eval()
        with torch.no_grad():
            x = torch.Tensor(data).to(device)
            x_hat = self.module(x)
            rec_error = ((data - x_hat.cpu().numpy()) ** 2).mean(axis=1)
            binary_preds = (rec_error > self.threshold).astype(int)
        return binary_preds, rec_error

    def name(self):
        return "AEp"
input_dim = window_size * msl_train_data[msl_channels[0]].shape[1]
model = AEp(input_dim)


Using device: cuda


## Strategia

In [14]:
from pyclad.strategies.baselines.cumulative import CumulativeStrategy
from pyclad.strategies.baselines.naive import NaiveStrategy
from pyclad.strategies.replay.replay import ReplayEnhancedStrategy

strategy = NaiveStrategy(model)

## Uruchomienie eksperymentu

### Callbacki

In [15]:
from pyclad.callbacks.evaluation.concept_metric_evaluation import ConceptMetricCallback
from pyclad.callbacks.evaluation.memory_usage import MemoryUsageCallback
from pyclad.callbacks.evaluation.time_evaluation import TimeEvaluationCallback
from pyclad.metrics.base.roc_auc import RocAuc
from pyclad.metrics.continual.average_continual import ContinualAverage
from pyclad.metrics.continual.backward_transfer import BackwardTransfer
from pyclad.metrics.continual.forward_transfer import ForwardTransfer
from pyclad.output.json_writer import JsonOutputWriter

callbacks = [
    ConceptMetricCallback(
        base_metric=RocAuc(),
        metrics=[ContinualAverage(), BackwardTransfer(), ForwardTransfer()],
    ),
    TimeEvaluationCallback(),
    MemoryUsageCallback(),
]

In [16]:
from pyclad.scenarios.concept_aware import ConceptAwareScenario
from pyclad.scenarios.concept_agnostic import ConceptAgnosticScenario
import pathlib

scenario = ConceptAgnosticScenario(
    dataset=dataset,
    strategy=strategy,
    callbacks=callbacks
)

# Uruchomienie eksperymentu
scenario.run()

output_writer = JsonOutputWriter(pathlib.Path("output-strategy2.json"))
output_writer.write([model, dataset, strategy, *callbacks])
