In [1]:
%env TORCH_USE_CUDA_DSA=1
%env CUDA_LAUNCH_BLOCKING=1

env: TORCH_USE_CUDA_DSA=1
env: CUDA_LAUNCH_BLOCKING=1


In [16]:
from typing import Tuple
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import pandas as pd

import numpy as np

import torch

from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Optimizer, Adam
from torch.optim.lr_scheduler import LRScheduler

In [17]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [18]:
torch.set_default_dtype(torch.float64)

In [19]:
test_df = pd.read_pickle("test_proc.pkl")
test_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1000,element_1,element_2,element_3,element_1_ratio,element_2_ratio,element_3_ratio,temp,pressure,air_ratio
51976,1.429725e-20,3.325318e-20,1.513897e-20,1.119836e-21,1.209848e-21,2.024112e-21,7.172782e-21,4.953834e-21,1.539859e-21,1.285451e-21,...,9.105786e-25,9,12,22,0.136030,0.262144,0.601825,283.0,0.2,0.6
18970,5.940243e-23,7.336601e-23,9.720283e-23,1.447083e-22,2.680262e-22,8.343595e-22,1.010416e-20,6.042984e-21,7.479574e-22,3.643073e-22,...,1.465311e-22,0,21,22,0.188355,0.316809,0.494835,283.0,0.1,0.6
30497,1.193615e-20,6.945339e-21,6.543001e-22,2.529143e-22,1.749403e-22,3.243140e-22,3.550860e-22,1.091960e-22,9.629268e-23,1.882503e-22,...,7.445923e-23,5,16,19,0.444071,0.477905,0.078024,323.0,0.1,0.6
30356,1.624151e-20,5.577240e-21,9.267746e-22,2.168774e-21,5.549700e-21,4.163047e-21,1.670818e-21,8.912111e-22,8.374945e-22,1.444200e-21,...,3.604447e-22,4,21,24,0.509421,0.224513,0.266066,323.0,0.1,0.6
140548,2.003258e-21,1.077312e-21,3.272797e-22,1.452913e-22,9.167639e-23,6.963908e-23,5.832702e-23,5.251748e-23,4.962818e-23,4.851140e-23,...,9.615524e-22,4,13,18,0.227958,0.482563,0.289478,263.0,0.5,0.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
218031,3.535618e-22,4.722553e-22,4.692500e-22,5.307244e-22,6.048017e-22,6.291295e-22,6.453254e-22,6.485575e-22,7.700791e-22,1.061828e-21,...,8.640137e-25,6,9,-1,0.530767,0.469233,0.000000,323.0,0.8,0.6
57587,9.397569e-22,1.173034e-21,2.279960e-21,8.301202e-22,8.418463e-22,9.303921e-22,1.093095e-21,2.257390e-21,3.849821e-21,3.221007e-21,...,1.020158e-21,14,15,18,0.600135,0.375171,0.024694,303.0,0.2,0.6
254241,7.204798e-22,1.141918e-21,2.021069e-21,3.898292e-21,4.167931e-21,3.322392e-21,2.260052e-21,1.356805e-21,9.869838e-22,1.119172e-21,...,1.174975e-22,10,18,20,0.273032,0.469203,0.257765,293.0,1.0,0.6
30894,4.030195e-22,5.592868e-22,9.234086e-22,2.112966e-21,5.189306e-21,3.964872e-21,1.755474e-21,1.090197e-21,1.088081e-21,1.695364e-21,...,1.690917e-23,8,14,24,0.375711,0.384662,0.239627,323.0,0.1,0.6


In [20]:
class CustomTransformerSpectraDataset(Dataset):
    def __init__(
        self,
        data: pd.DataFrame,
        device: str = "cuda:0",
        chunk_size: int = 100,
        normalize_elems: bool = False,
    ) -> None:
        self.data = data
        self.elements = np.unique(
            self.data[["element_1", "element_2", "element_3"]].to_numpy()
        )
        self.chunk_size = chunk_size
        self.air_ratios = data.air_ratio.to_numpy(dtype=np.float64)

        self.spectras = torch.log(
            torch.tensor(
                self.data[[str(i) for i in range(1001)]].to_numpy(dtype=np.float64)
            )
        ).to(device)

        self.ratios = torch.tensor(
            self.data[
                ["element_1_ratio", "element_2_ratio", "element_3_ratio"]
            ].to_numpy(dtype=np.float64)
        ).to(device)

        if normalize_elems:
            elems = np.unique(
                self.data[["element_1", "element_2", "element_3"]].to_numpy()
            )
            elem2id = {-1: -1}
            idx = 0
            for elem in elems:
                if elem != -1:
                    elem2id[elem] = idx
                    idx += 1
            self.data["element_1"] = self.data["element_1"].apply(lambda x: elem2id[x])
            self.data["element_2"] = self.data["element_2"].apply(lambda x: elem2id[x])
            self.data["element_3"] = self.data["element_3"].apply(lambda x: elem2id[x])

        self.element_indices = self.data[
            ["element_1", "element_2", "element_3"]
        ].to_numpy(dtype=np.float64)

        self.elements_distributions = torch.zeros(
            [
                len(self.data),
                len(self.elements) - 1 if -1 in self.elements else len(self.elements),
            ],
            dtype=torch.float64,  # -1 as there is index that shows that there is no element
        ).to(device)

        for idx in range(len(self.data)):
            indices = self.element_indices[idx, :]
            indices = indices[indices != -1]

            self.elements_distributions[idx, indices] = torch.where(self.ratios[idx][
                range(indices.shape[0])
            ] > 0, 1.0, 0.0).double()

        self.elements_distributions = self.elements_distributions[
            ~torch.isnan(self.spectras).any(dim=1)
        ]
        self.spectras = self.spectras[~torch.isnan(self.spectras).any(dim=1)]

    def __len__(self) -> int:
        return len(self.spectras)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        spectra = torch.stack(
            self.spectras[idx, 1:].split(
                self.chunk_size
            )  # input will be split into chunks of self.chunk_size elements in them
        )
        elements_distribution = self.elements_distributions[idx]

        return spectra, elements_distribution

In [21]:
test_dataset = CustomTransformerSpectraDataset(test_df, chunk_size=250)

In [22]:
val_loader = DataLoader(
    test_dataset,
    batch_size=512,
)

In [23]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 1000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model))
        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer("pe", pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Arguments:
            x: Tensor, shape ``[batch_size, seq_len, embedding_dim]``
        """
        x = x + self.pe[:, : x.size(1), :]
        return self.dropout(x)

In [24]:
class THzTransformer(nn.Module):
    def __init__(
        self,
        chunk_size: int = 250,
        nhead: int = 2,
        dim_feedforward: int = 1024,
        dropout: float = 0.0,
        batch_first: bool = True,
        activation: str = "relu",
        num_layers: int = 6,
        linear_head_size: int = 1024,
        output_size: int = 26,
        device: str = "cuda:0",
    ) -> None:
        super().__init__()
        self.cls_token_embedding = nn.Embedding(
            num_embeddings=1, embedding_dim=chunk_size
        )
        self.batch_first = batch_first
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=chunk_size,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=batch_first,
            activation=activation,
        )
        self.positional_encoding = PositionalEncoding(
            d_model=chunk_size, dropout=dropout
        )
        self.encoder = nn.TransformerEncoder(
            encoder_layer=encoder_layer, num_layers=num_layers
        )
        self.linear_head = nn.Sequential(
            nn.Linear(chunk_size, linear_head_size),
            nn.ReLU(),
            nn.Linear(linear_head_size, output_size),
        )
        self.cls_token_index = torch.tensor([0]).to(device)

        self.softmax = nn.Softmax(dim=2)

    def init_weights(self) -> None:
        initrange = 0.1
        self.cls_token_embedding.weight.data.uniform_(-initrange, initrange)
        self.linear_head.bias.data.zero_()
        self.linear_head.weight.data.uniform_(-initrange, initrange)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        cls_token = self.cls_token_embedding(self.cls_token_index)
        cls_token = cls_token.expand(x.shape[0], 1, cls_token.shape[1])

        x_with_cls = torch.cat((cls_token, x), dim=1)
        x_with_pos_encoding = self.positional_encoding(x_with_cls)

        encoder_output = self.encoder(x_with_pos_encoding)[:, self.cls_token_index, :]
        predictions = self.softmax(self.linear_head(encoder_output))
        return predictions

In [25]:
chunk_size = 250
nhead = 2
dim_feedforward = 1024
dropout = 0
num_layers = 15
linear_head_size = 1024

label_smoothing = 0.0

lr = 5e-5
n_epochs = 23
batch_size = 128

In [26]:
net = THzTransformer(
    chunk_size=chunk_size,
    nhead=nhead,
    dim_feedforward=dim_feedforward,
    dropout=dropout,
    num_layers=num_layers,
    linear_head_size=linear_head_size,
    output_size=test_dataset[0][1].shape[0],
    device='cuda:0'
)
net.load_state_dict(torch.load("./transformer-detection-scheduler-50epochs.model"))

net.to(device)


print(device)


pred, y_test = np.empty((0, 25)), np.empty((0, 25))
for spectra, target in tqdm(val_loader, desc="testing"):
    net.eval()
    ans = torch.squeeze(net(spectra), dim=1)
    ans = ans.cpu().detach().numpy()
    pred = np.append(pred, ans, axis=0)
    y_test = np.append(y_test, target.cpu().numpy(), axis=0)

cuda:0


testing: 100%|██████████| 256/256 [01:56<00:00,  2.19it/s]


In [None]:
# for spectra, target in tqdm(val_loader, desc="testing"):
#     ans = net(spectra)
#     ans = nn.Softmax()(ans)
#     break
# ans[5], target[5]

In [13]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    mean_absolute_error,
)

In [14]:
def cross_entropy(predictions, targets, epsilon=1e-12):
    """
    Computes cross entropy between targets (encoded as one-hot vectors)
    and predictions.
    Input: predictions (N, k) ndarray
           targets (N, k) ndarray
    Returns: scalar
    """
    predictions = np.clip(predictions, epsilon, 1.0 - epsilon)
    N = predictions.shape[0]
    ce = -np.sum(targets * np.log(predictions + 1e-9)) / N
    return ce

In [None]:
# reg transformer-reg.model
print(f'MAE: {mean_absolute_error(y_test, pred)}')
print(f"MAE: {mean_absolute_error(y_test, pred, multioutput='raw_values')}")
print(f"Cross Entropy: {cross_entropy(y_test, pred)}")

MAE: 0.010891827321013573
MAE: [0.00481605 0.0207287  0.01958805 0.00459834 0.00471008 0.00496477
 0.00573083 0.00536138 0.02247059 0.00468851 0.0046347  0.00456631
 0.00512989 0.00511868 0.00460899 0.00519681 0.00455206 0.00515543
 0.00484662 0.00510472 0.00471393 0.00514337 0.0045685  0.00460082
 0.00484011 0.11274925]
Cross Entropy: 1.7402779462582796


In [15]:
# reg transformer-reg-scheduler-50epochs.model
print(f"MAE: {mean_absolute_error(y_test, pred)}")
print(f"MAE: {mean_absolute_error(y_test, pred, multioutput='raw_values')}")
print(f"Cross Entropy: {cross_entropy(y_test, pred)}")

MAE: 0.01067634701709007
MAE: [0.00465198 0.0197805  0.01893218 0.0046238  0.00465663 0.00484213
 0.00547316 0.00529517 0.02098098 0.00470893 0.00472482 0.00454156
 0.00503814 0.00487319 0.00461781 0.00502078 0.0045812  0.00499301
 0.00477711 0.00508459 0.00478933 0.00511053 0.00466365 0.00459313
 0.00479964 0.11143107]
Cross Entropy: 1.6899654094326868


In [30]:
# detection
print(f'F1: {f1_score(y_test, np.where(pred > 0.006, 1, 0), average="macro")}')
print(
    f'precision: {precision_score(y_test, np.where(pred > 0.006, 1, 0), average="macro")}'
)
print(f'recall: {recall_score(y_test, np.where(pred > 0.006, 1, 0), average="macro")}')
print(f"accuracy: {accuracy_score(y_test, np.where(pred > 0.006, 1, 0))}")

F1: 0.9268807579372814
precision: 0.9137851666078969
recall: 0.9848391434861555
accuracy: 0.5983973747472049


In [31]:
print(
    f"precision: {precision_score(y_test, np.where(pred > 0.006, 1, 0), average=None)}"
)

print(f"recall: {recall_score(y_test, np.where(pred > 0.006, 1, 0), average=None)}")
print(f"F1: {f1_score(y_test, np.where(pred > 0.006, 1, 0), average=None)}")

precision: [0.99953243 0.30591509 0.31370312 0.9996615  0.99946247 0.99711177
 0.98478114 0.99535697 0.28260015 0.99530579 0.99514759 0.99582716
 0.99973039 0.99986403 0.99918578 0.99478853 0.99952179 0.99993295
 0.99979914 0.9992555  0.9941646  0.99756378 0.99850197 0.99858719
 0.99932832]
recall: [0.98863636 0.99477247 0.99304348 0.98499099 0.98660211 0.98828307
 0.97162979 0.97785417 1.         0.97644737 0.9805432  0.98233966
 0.98623579 0.98612042 0.9840294  0.98897376 0.98207813 0.98820489
 0.98671865 0.983742   0.98126448 0.98483431 0.98001738 0.98441438
 0.97920232]
F1: [0.99405454 0.46793083 0.47678844 0.99227202 0.99299065 0.99267779
 0.97816127 0.98652794 0.44066757 0.9857864  0.98779142 0.98903743
 0.99293724 0.99294467 0.99154968 0.99187263 0.99072319 0.99403433
 0.99321583 0.99143807 0.98767242 0.99115818 0.98917333 0.99145014
 0.98916295]


In [41]:
# detection transformer-detection-scheduler-50epochs.model
print(f'F1: {f1_score(y_test, np.where(pred > 0.07, 1, 0), average="macro")}')
print(
    f'precision: {precision_score(y_test, np.where(pred > 0.07, 1, 0), average="macro")}'
)
print(f'recall: {recall_score(y_test, np.where(pred > 0.07, 1, 0), average="macro")}')
print(f"accuracy: {accuracy_score(y_test, np.where(pred > 0.07, 1, 0))}")

F1: 0.9356594070858323
precision: 0.9243913374570089
recall: 0.9580367279301882
accuracy: 0.6753920708207731


In [42]:
print(
    f"precision: {precision_score(y_test, np.where(pred > 0.07, 1, 0), average=None)}"
)

print(f"recall: {recall_score(y_test, np.where(pred > 0.07, 1, 0), average=None)}")
print(f"F1: {f1_score(y_test, np.where(pred > 0.07, 1, 0), average=None)}")

precision: [1.         0.38522608 0.40905413 0.99986604 1.         0.99793732
 0.98940549 0.99866006 0.34934879 0.99773996 0.9973312  0.99633651
 0.99960024 0.99993278 1.         0.99668259 0.99986532 1.
 0.99980152 1.         0.99607007 0.99845875 0.99993256 0.99920085
 0.99933316]
recall: [0.99881078 0.71543462 0.71016722 0.99579748 0.99761226 0.99846881
 0.97415454 0.98539036 0.66302187 0.9875     0.99262899 0.99309521
 0.99760622 0.99738501 0.99772803 0.99780804 0.99664384 0.99840965
 0.99854632 0.99686834 0.99000331 0.99545697 0.99097775 0.99509219
 0.98631039]
F1: [0.99940504 0.50079752 0.51910524 0.99782761 0.9988047  0.998203
 0.98172079 0.99198083 0.45759107 0.99259357 0.99497454 0.99471322
 0.99860224 0.99865727 0.99886272 0.997245   0.99825198 0.99920419
 0.99917353 0.99843171 0.99302743 0.99695561 0.99543502 0.99714229
 0.99277907]
