In [1]:
import pandas as pd
import numpy as np
import json
import os
from os.path import join
from tqdm import tqdm

In [2]:
!pip -q install gdown
!gdown --folder "https://drive.google.com/drive/folders/1bmzxvtIw96SQb0YAwhcZ6_SJlHygv6NE?usp=sharing"

Retrieving folder contents
Processing file 1Ll4ZjcbRTqoFLvbgay_F6K4BFPyzggT1 test_df.csv
Processing file 1e3H2ZFEjg0hQNq33DxB30pU9eJsRqO-T test_target.csv
Processing file 1lMtq0Whm7fq5zLS1z5J2dPCJRs9iaV9e train_df.csv
Processing file 1E4VKq0K38HNNxP1stgGbJrCUXFvdanq8 train_target.csv
Processing file 11R-jDwnZ8xgtVAcYollV5mUqs5WSP1Ao val_df.csv
Processing file 1B-ukExifYunCl6kf8hBil4Wl0H2YcR8_ val_target.csv
Retrieving folder contents completed
Building directory structure
Building directory structure completed
Downloading...
From (original): https://drive.google.com/uc?id=1Ll4ZjcbRTqoFLvbgay_F6K4BFPyzggT1
From (redirected): https://drive.google.com/uc?id=1Ll4ZjcbRTqoFLvbgay_F6K4BFPyzggT1&confirm=t&uuid=9a696fb2-6056-4a8c-902c-fa3709416ef0
To: /content/files/test_df.csv
100% 742M/742M [00:06<00:00, 108MB/s] 
Downloading...
From: https://drive.google.com/uc?id=1e3H2ZFEjg0hQNq33DxB30pU9eJsRqO-T
To: /content/files/test_target.csv
100% 71.0M/71.0M [00:01<00:00, 68.4MB/s]
Downloading...
From

In [3]:
train_df = pd.read_csv(
    "/content/files/train_df.csv",
    index_col=[0, 1],
    parse_dates=[1],
)

train_target = pd.read_csv(
    "/content/files/train_target.csv",
    index_col=[0, 1],
    parse_dates=[1]
)

In [4]:
val_df = pd.read_csv(
    "/content/files/val_df.csv",
    index_col=[0, 1],
    parse_dates=[1],
)

val_target = pd.read_csv(
    "/content/files/val_target.csv",
    index_col=[0, 1],
    parse_dates=[1]
)

test_df = pd.read_csv(
    "/content/files/test_df.csv",
    index_col=[0, 1],
    parse_dates=[1],
)

test_target = pd.read_csv(
    "/content/files/test_target.csv",
    index_col=[0, 1],
    parse_dates=[1]
)

In [5]:
os.makedirs("saved_models", exist_ok=True)

In [6]:
import torch
from torch.utils.data import DataLoader, Dataset

In [7]:
from torch.utils.data import Dataset
import numpy as np
import torch
from tqdm import tqdm
import pandas as pd


class SlidingWindowDataset(Dataset):
    def __init__(
        self,
        df: pd.DataFrame,
        target: pd.Series,
        window_size: int,
        stride: int = 1,
        get_step_next: bool = False,
    ):
        self.features = list(df.columns)
        self.data = torch.as_tensor(df.values, dtype=torch.float32)
        self.target = torch.as_tensor(target.values, dtype=torch.float32) if target is not None else None

        self.index = df.index
        self.window_size = window_size
        self.get_step_next = get_step_next

        # NOTE: оставляем как у тебя (если не используешь get_step_next — не важно)
        if self.get_step_next:
            self.window_size += 1

        self.valid_windows = self._precompute_valid_windows(stride)

    def _precompute_valid_windows(self, stride: int):
        valid_windows = []
        run_ids = self.index.get_level_values(0).unique()

        for run_id in tqdm(run_ids, desc="Building safe windows"):
            run_mask = self.index.get_level_values(0) == run_id
            run_indices = np.where(run_mask)[0]

            if len(run_indices) < self.window_size:
                continue

            # end_pos — позиция последнего элемента окна (inclusive) внутри run
            for end_pos in range(self.window_size - 1, len(run_indices), stride):
                start_pos = end_pos - (self.window_size - 1)

                start_idx = run_indices[start_pos]
                end_idx = run_indices[end_pos]  # inclusive

                # корректная проверка границы run_id
                assert self.index[start_idx][0] == self.index[end_idx][0], "Window crosses run_id boundary!"

                valid_windows.append((start_idx, end_idx))

        return valid_windows

    def __len__(self):
        return len(self.valid_windows)

    def __getitem__(self, idx):
        start_idx, end_idx = self.valid_windows[idx]

        # включаем end_idx => +1
        sample = self.data[start_idx:end_idx + 1]

        target = self.target[start_idx:end_idx + 1].max() if self.target is not None else sample[-1]
        return sample, target

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F


# A = ReLu(W)
class Graph_ReLu_W(nn.Module):
    def __init__(self, n_nodes, k, device):
        super(Graph_ReLu_W, self).__init__()
        self.num_nodes = n_nodes
        self.k = k
        self.A = nn.Parameter(torch.randn(n_nodes, n_nodes).to(device),
                              requires_grad=True).to(device)

    def forward(self, idx):
        adj = F.relu(self.A)
        if self.k:
            mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device)
            mask.fill_(float('0'))
            v, id = (adj + torch.rand_like(adj)*0.01).topk(self.k, 1)
            mask.scatter_(1, id, v.fill_(1))
            adj = adj*mask
        return adj


# A for Directed graphs:
class Graph_Directed_A(nn.Module):
    def __init__(self, n_nodes, window_size, alpha, k, device):
        super(Graph_Directed_A, self).__init__()
        self.alpha = alpha
        self.k = k
        self.device = device
        self.e1 = nn.Embedding(n_nodes, window_size)
        self.e2 = nn.Embedding(n_nodes, window_size)
        self.l1 = nn.Linear(window_size, window_size)
        self.l2 = nn.Linear(window_size, window_size)

    def forward(self, idx):
        m1 = torch.tanh(self.alpha*self.l1(self.e1(idx)))
        m2 = torch.tanh(self.alpha*self.l2(self.e2(idx)))
        adj = F.relu(torch.tanh(self.alpha*torch.mm(m1, m2.transpose(1, 0))))
        if self.k:
            mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device)
            mask.fill_(float('0'))
            v, id = (adj + torch.rand_like(adj)*0.01).topk(self.k, 1)
            mask.scatter_(1, id, v.fill_(1))
            adj = adj*mask
        return adj


# A for Uni-directed graphs:
class Graph_Uni_Directed_A(nn.Module):
    def __init__(self, n_nodes, window_size, alpha, k, device):
        super(Graph_Directed_A, self).__init__()
        self.alpha = alpha
        self.k = k
        self.device = device
        self.e1 = nn.Embedding(n_nodes, window_size)
        self.e2 = nn.Embedding(n_nodes, window_size)
        self.l1 = nn.Linear(window_size, window_size)
        self.l2 = nn.Linear(window_size, window_size)

    def forward(self, idx):
        m1 = torch.tanh(self.alpha*self.l1(self.e1(idx)))
        m2 = torch.tanh(self.alpha*self.l2(self.e2(idx)))
        adj = F.relu(torch.tanh(self.alpha*(torch.mm(m1, m2.transpose(1, 0))
                                - torch.mm(m2, m1.transpose(1, 0)))))
        if self.k:
            mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device)
            mask.fill_(float('0'))
            v, id = (adj + torch.rand_like(adj)*0.01).topk(self.k, 1)
            mask.scatter_(1, id, v.fill_(1))
            adj = adj*mask
        return adj


# A for Undirected graphs:
class Graph_Undirected_A(nn.Module):
    def __init__(self, n_nodes, window_size, alpha, k, device):
        super(Graph_Directed_A, self).__init__()
        self.alpha = alpha
        self.k = k
        self.device = device
        self.e1 = nn.Embedding(n_nodes, window_size)
        self.l1 = nn.Linear(window_size, window_size)

    def forward(self, idx):
        m1 = torch.tanh(self.alpha*self.l1(self.e1(idx)))
        m2 = torch.tanh(self.alpha*self.l1(self.e1(idx)))
        adj = F.relu(torch.tanh(self.alpha*torch.mm(m1, m2.transpose(1, 0))))
        if self.k:
            mask = torch.zeros(idx.size(0), idx.size(0)).to(self.device)
            mask.fill_(float('0'))
            v, id = (adj + torch.rand_like(adj)*0.01).topk(self.k, 1)
            mask.scatter_(1, id, v.fill_(1))
            adj = adj*mask
        return adj


class GSL(nn.Module):
    """
    Graph structure learning block.
    """
    def __init__(
            self,
            gsl_type,
            n_nodes,
            window_size,
            alpha,
            k,
            device):
        super(GSL, self).__init__()
        self.gsl_layer = None
        if gsl_type == 'relu':
            self.gsl_layer = Graph_ReLu_W(n_nodes, k, device)
        elif gsl_type == 'directed':
            self.self.gsl_layer = Graph_Directed_A(n_nodes, window_size,
                                                   alpha, k, device)
        elif gsl_type == 'unidirected':
            self.self.gsl_layer = Graph_Uni_Directed_A(n_nodes, window_size,
                                                       alpha, k, device)
        elif gsl_type == 'undirected':
            self.self.gsl_layer = Graph_Undirected_A(n_nodes, window_size,
                                                     alpha, k, device)
        else:
            print('Wrong name of graph structure learning layer!')

    def forward(self, idx):
        return self.gsl_layer(idx)

In [9]:
import torch
import torch.nn as nn


class GCLayer(nn.Module):
    """
    Graph convolution layer.
    """
    def __init__(self, in_dim, out_dim):
        super().__init__()
        self.dense = nn.Linear(in_dim, out_dim)

    def forward(self, adj, X):
        adj = adj + torch.eye(adj.size(0)).to(adj.device)
        h = self.dense(X)
        norm = adj.sum(1)**(-1/2)
        h = norm[None, :] * adj * norm[:, None] @ h
        return h


class GNN_TAM(nn.Module):
    """
    Model architecture from the paper "Graph Neural Networks with Trainable
    Adjacency Matrices for Fault Diagnosis on Multivariate Sensor Data".
    https://doi.org/10.1109/ACCESS.2024.3481331
    """
    def __init__(
            self,
            n_nodes: int,
            window_size: int,
            n_classes: int,
            n_gnn: int = 1,
            gsl_type: str = 'relu',
            n_hidden: int = 1024,
            alpha: float = 0.1,
            k: int = None,
            device: str = 'cpu'
            ):
        """
        Args:
            n_nodes (int): The number of nodes/sensors.
            window_size (int): The number of timestamps in one sample.
            n_classes (int): The number of classes.
            n_gnn (int): The number of GNN modules.
            gsl_type (str): The type of GSL block.
            n_hidden (int): The number of hidden parameters in GCN layers.
            alpha (float): Saturation rate for GSL block.
            k (int): The maximum number of edges from one node.
            device (str): The name of a device to train the model. `cpu` and
                `cuda` are possible.
        """
        super(GNN_TAM, self).__init__()
        self.window_size = window_size
        self.nhidden = n_hidden
        self.device = device
        self.idx = torch.arange(n_nodes).to(device)
        self.adj = [0 for i in range(n_gnn)]
        self.h = [0 for i in range(n_gnn)]
        self.skip = [0 for i in range(n_gnn)]
        self.z = (torch.ones(n_nodes, n_nodes) - torch.eye(n_nodes)).to(device)
        self.n_gnn = n_gnn

        self.gsl = nn.ModuleList()
        self.conv1 = nn.ModuleList()
        self.bnorm1 = nn.ModuleList()
        self.conv2 = nn.ModuleList()
        self.bnorm2 = nn.ModuleList()

        for i in range(self.n_gnn):
            self.gsl.append(GSL(gsl_type, n_nodes,
                                window_size, alpha, k, device))
            self.conv1.append(GCLayer(window_size, n_hidden))
            self.bnorm1.append(nn.BatchNorm1d(n_nodes))
            self.conv2.append(GCLayer(n_hidden, n_hidden))
            self.bnorm2.append(nn.BatchNorm1d(n_nodes))

        self.fc = nn.Linear(n_gnn*n_hidden, n_classes)

    def forward(self, X):
        X = X.to(self.device)
        for i in range(self.n_gnn):
            self.adj[i] = self.gsl[i](self.idx)
            self.adj[i] = self.adj[i] * self.z
            self.h[i] = self.conv1[i](self.adj[i], X).relu()
            self.h[i] = self.bnorm1[i](self.h[i])
            self.skip[i], _ = torch.min(self.h[i], dim=1)
            self.h[i] = self.conv2[i](self.adj[i], self.h[i]).relu()
            self.h[i] = self.bnorm2[i](self.h[i])
            self.h[i], _ = torch.min(self.h[i], dim=1)
            self.h[i] = self.h[i] + self.skip[i]

        h = torch.cat(self.h, 1)
        output = self.fc(h)

        return output

    def get_adj(self):
        return self.adj

In [20]:
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader
from tqdm.auto import tqdm, trange

# === параметры окна ===
window_size = 100
stride = 1

# === scaler на train_df ===
scaler = StandardScaler()
scaler.fit(train_df.values)
train_df_scaled = train_df.copy()
train_df_scaled[:] = scaler.transform(train_df.values)

# === dataset + dataloader ===
train_dataset = SlidingWindowDataset(
    df=train_df_scaled,
    target=train_target,
    window_size=window_size,
    stride=stride,
)

train_loader = DataLoader(
    train_dataset,
    batch_size=512,
    shuffle=True,
    drop_last=False,
    num_workers=0
)

Building safe windows:   0%|          | 0/21 [00:00<?, ?it/s]

In [21]:
val_df_scaled = val_df.copy()
val_df_scaled[:] = scaler.transform(val_df.values)

In [22]:
val_ds = SlidingWindowDataset(
    df=val_df_scaled,
    target=val_target,
    window_size=window_size,
    stride=stride,
)

Building safe windows:   0%|          | 0/21 [00:00<?, ?it/s]

In [23]:
from sklearn.metrics import accuracy_score, f1_score

@torch.no_grad()
def eval_windows_accuracy_f1(
    model,
    window_ds,
    batch_size=512,
    device="cpu",
    average="macro",
):
    model = model.to(device)
    model.eval()

    loader = DataLoader(
        window_ds,
        batch_size=batch_size,
        shuffle=False,   # КРИТИЧНО
        num_workers=0,
    )

    y_true_all = []
    y_pred_all = []

    for x, y in tqdm(loader):
        x = x.to(device).transpose(1, 2)  # (B, F, T)
        y = y.to(device).long()

        logits = model(x)
        preds = logits.argmax(dim=1)

        y_true_all.append(y.cpu().numpy())
        y_pred_all.append(preds.cpu().numpy())

    y_true = np.concatenate(y_true_all)
    y_pred = np.concatenate(y_pred_all)

    metrics = {
        "accuracy": accuracy_score(y_true, y_pred),
        f"f1_{average}": f1_score(y_true, y_pred, average=average),
        "n_windows": len(y_true),
    }
    return metrics, y_true, y_pred

In [24]:
# === модель ===
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

n_nodes = train_df.shape[1]
n_classes = int(train_target.nunique())  # или len(set(train_target))

model = GNN_TAM(
    n_nodes=n_nodes,
    window_size=window_size,
    n_classes=n_classes,
    n_gnn=1,
    gsl_type="relu",
    n_hidden=1024,
    alpha=0.1,
    k=None,
    device=device,
).to(device)

optimizer = Adam(model.parameters(), lr=1e-3)

weight = torch.ones(n_classes, device=device) * 0.5
if n_classes > 1:
    weight[1:] /= (n_classes - 1)

# === обучение ===
model.train()
for e in trange(10, desc="Epochs"):
    model.train()
    losses = []
    for ts, y in tqdm(train_loader, desc="Batches", leave=False):
        # ts: (B, T, F) -> (B, F, T)
        ts = ts.to(device).transpose(1, 2)
        y = y.to(device).long()

        logits = model(ts)
        loss = F.cross_entropy(logits, y, weight=weight)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        losses.append(loss.item())

    print(f"Epoch {e+1}/10: avg CE loss = {sum(losses)/len(losses):.4f}")
    model.eval()

    metrics, y_true, y_pred = eval_windows_accuracy_f1(
        model,
        val_ds,
        batch_size=512,
        device=device,
        average="macro",
    )

    print("Val metrics: ", metrics)

torch.save(model, "saved_models/gnn1.pt")
print("Saved to: saved_models/gnn1.pt")

Using device: cuda


  n_classes = int(train_target.nunique())  # или len(set(train_target))


Epochs:   0%|          | 0/10 [00:00<?, ?it/s]

Batches:   0%|          | 0/12271 [00:00<?, ?it/s]

Epoch 1/10: avg CE loss = 0.5550


  0%|          | 0/3529 [00:00<?, ?it/s]

Val metrics:  {'accuracy': 0.6113328030440157, 'f1_macro': 0.5943784128013846, 'n_windows': 1806561}


Batches:   0%|          | 0/12271 [00:00<?, ?it/s]

Epoch 2/10: avg CE loss = 0.4763


  0%|          | 0/3529 [00:00<?, ?it/s]

Val metrics:  {'accuracy': 0.6267787248811416, 'f1_macro': 0.5983948737076735, 'n_windows': 1806561}


Batches:   0%|          | 0/12271 [00:00<?, ?it/s]

Epoch 3/10: avg CE loss = 0.4630


  0%|          | 0/3529 [00:00<?, ?it/s]

Val metrics:  {'accuracy': 0.6168615396878379, 'f1_macro': 0.6105857125129173, 'n_windows': 1806561}


Batches:   0%|          | 0/12271 [00:00<?, ?it/s]

Epoch 4/10: avg CE loss = 0.4546


  0%|          | 0/3529 [00:00<?, ?it/s]

Val metrics:  {'accuracy': 0.6169722472698126, 'f1_macro': 0.594948825622579, 'n_windows': 1806561}


Batches:   0%|          | 0/12271 [00:00<?, ?it/s]

Epoch 5/10: avg CE loss = 0.4493


  0%|          | 0/3529 [00:00<?, ?it/s]

Val metrics:  {'accuracy': 0.6384766415305102, 'f1_macro': 0.6056395496682498, 'n_windows': 1806561}


Batches:   0%|          | 0/12271 [00:00<?, ?it/s]

Epoch 6/10: avg CE loss = 0.4444


  0%|          | 0/3529 [00:00<?, ?it/s]

Val metrics:  {'accuracy': 0.6255476565695817, 'f1_macro': 0.6078073974789767, 'n_windows': 1806561}


Batches:   0%|          | 0/12271 [00:00<?, ?it/s]

Epoch 7/10: avg CE loss = 0.4411


  0%|          | 0/3529 [00:00<?, ?it/s]

Val metrics:  {'accuracy': 0.6220670101922935, 'f1_macro': 0.6023717598502547, 'n_windows': 1806561}


Batches:   0%|          | 0/12271 [00:00<?, ?it/s]

Epoch 8/10: avg CE loss = 0.4372


  0%|          | 0/3529 [00:00<?, ?it/s]

Val metrics:  {'accuracy': 0.6350956319769994, 'f1_macro': 0.6293471873849347, 'n_windows': 1806561}


Batches:   0%|          | 0/12271 [00:00<?, ?it/s]

Epoch 9/10: avg CE loss = 0.4343


  0%|          | 0/3529 [00:00<?, ?it/s]

Val metrics:  {'accuracy': 0.6249752983707718, 'f1_macro': 0.608225435543064, 'n_windows': 1806561}


Batches:   0%|          | 0/12271 [00:00<?, ?it/s]

Epoch 10/10: avg CE loss = 0.4313


  0%|          | 0/3529 [00:00<?, ?it/s]

Val metrics:  {'accuracy': 0.6418482409395531, 'f1_macro': 0.6022515253097926, 'n_windows': 1806561}
Saved to: saved_models/gnn1.pt


In [26]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = torch.load(
    "saved_models/gnn1.pt",
    map_location=device,
    weights_only=False,   # ключевая строка
)
model.eval()

metrics, y_true, y_pred = eval_windows_accuracy_f1(
    model,
    val_ds,
    batch_size=512,
    device=device,
    average="macro",
)

print(metrics)

  0%|          | 0/3529 [00:00<?, ?it/s]

{'accuracy': 0.6418482409395531, 'f1_macro': 0.6022515253097926, 'n_windows': 1806561}


In [27]:
test_df_scaled = test_df.copy()
test_df_scaled[:] = scaler.transform(test_df.values)

In [29]:
test_ds = SlidingWindowDataset(
    df=test_df_scaled,
    target=test_target,
    window_size=window_size,
    stride=stride,
)

Building safe windows:   0%|          | 0/21 [00:00<?, ?it/s]

In [30]:
model = torch.load(
    "saved_models/gnn1.pt",
    map_location=device,
    weights_only=False,   # ключевая строка
)
model.eval()

metrics, y_true, y_pred = eval_windows_accuracy_f1(
    model,
    test_ds,
    batch_size=512,
    device=device,
    average="macro",
)

print(metrics)

  0%|          | 0/5321 [00:00<?, ?it/s]

{'accuracy': 0.6105357799754905, 'f1_macro': 0.5774866384137212, 'n_windows': 2723842}


In [32]:
model = torch.load(
    "saved_models/gnn1.pt",
    map_location=device,
    weights_only=False,   # ключевая строка
)
model.eval()

metrics, y_true, y_pred = eval_windows_accuracy_f1(
    model,
    train_dataset,
    batch_size=512,
    device=device,
    average="macro",
)

print(metrics)

  0%|          | 0/12271 [00:00<?, ?it/s]

{'accuracy': 0.6320823028524285, 'f1_macro': 0.598693200322618, 'n_windows': 6282261}
