In [1]:
import os

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [2]:
from typing import Tuple
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import pandas as pd

import numpy as np

import torch

from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.optim import Optimizer, Adam
from torch.optim.lr_scheduler import LRScheduler
import wandb

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [4]:
torch.set_default_dtype(torch.float64)

In [5]:
# df = pd.concat([pd.read_csv("more-elements/more-elements.csv"), pd.read_csv("new-more-data/new-more-data.csv")])
# df
df = pd.read_pickle("half_data.pkl")
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1000,element_1,element_2,element_3,element_1_ratio,element_2_ratio,element_3_ratio,temp,pressure,air_ratio
0,1.402026e-21,1.650584e-21,1.910661e-21,2.264304e-21,2.829955e-21,3.673546e-21,4.745899e-21,5.773241e-21,6.446840e-21,6.741727e-21,...,3.466256e-20,17,23,-1,0.440157,0.559843,0.000000,283.0,0.775,0.0
1,4.102767e-22,4.371429e-22,4.698476e-22,5.105580e-22,5.626400e-22,6.315813e-22,7.266133e-22,8.610945e-22,1.048941e-21,1.323529e-21,...,2.481710e-21,0,19,-1,0.428047,0.571953,0.000000,303.0,0.800,0.6
2,3.610634e-23,2.739156e-23,1.628379e-23,1.195799e-23,1.069667e-23,1.058083e-23,1.121691e-23,1.274356e-23,1.515703e-23,1.882304e-23,...,2.756785e-21,2,11,13,0.342547,0.376666,0.280788,283.0,0.500,0.6
3,1.016487e-22,1.356304e-22,1.894228e-22,3.406116e-22,8.410447e-22,2.039525e-21,1.635235e-21,6.587985e-22,3.087047e-22,2.124933e-22,...,1.240249e-22,6,10,13,0.053299,0.321178,0.625524,293.0,0.900,0.6
4,1.637227e-21,2.113385e-21,2.633061e-21,3.098701e-21,3.509187e-21,3.569827e-21,3.557713e-21,3.356628e-21,3.389955e-21,3.744591e-21,...,3.614986e-23,9,18,24,0.470739,0.251465,0.277796,273.0,0.500,0.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183548,1.720379e-22,2.933964e-22,4.190668e-22,5.128756e-22,6.537258e-22,6.598291e-22,7.231058e-22,6.818061e-22,8.002978e-22,1.130003e-21,...,8.585612e-23,9,17,23,0.401527,0.454117,0.144356,313.0,0.500,0.6
183549,2.392508e-21,1.998411e-21,1.320957e-21,9.131813e-22,6.948151e-22,5.720024e-22,4.987314e-22,4.533583e-22,4.270046e-22,4.143369e-22,...,2.460996e-21,4,8,16,0.348675,0.389984,0.261341,323.0,1.000,0.6
183550,1.947286e-22,2.092204e-22,2.261748e-22,2.462494e-22,2.703056e-22,2.995569e-22,3.357735e-22,3.814871e-22,4.405433e-22,5.189812e-22,...,1.838447e-23,1,7,14,0.116401,0.430989,0.452610,313.0,0.100,0.6
183551,4.523992e-22,6.372981e-22,1.661855e-21,1.874233e-22,1.226359e-22,1.804612e-22,3.107570e-22,1.559673e-21,3.250324e-21,2.427107e-21,...,1.508687e-21,15,18,23,0.422131,0.441890,0.135979,283.0,0.200,0.3


In [6]:
class CustomTransformerSpectraDataset(Dataset):
    def __init__(self, data: pd.DataFrame, device: str = "cuda:0", chunk_size: int = 100, normalize_elems: bool = False) -> None:
        self.data = data
        self.elements = np.unique(
            self.data[["element_1", "element_2", "element_3"]].to_numpy()
        )
        self.chunk_size = chunk_size
        self.air_ratios = data.air_ratio.to_numpy(dtype=np.float64)

        self.spectras = torch.log(
            torch.tensor(
                self.data[[str(i) for i in range(1001)]].to_numpy(dtype=np.float64)
            )
        ).to(device)

        self.ratios = torch.tensor(
            self.data[
                ["element_1_ratio", "element_2_ratio", "element_3_ratio"]
            ].to_numpy(dtype=np.float64)
        ).to(device)

        if normalize_elems:
            elems = np.unique(
                self.data[["element_1", "element_2", "element_3"]].to_numpy()
            )
            elem2id = {-1: -1}
            idx = 0
            for elem in elems:
                if elem != -1:
                    elem2id[elem] = idx
                    idx += 1
            self.data["element_1"] = self.data["element_1"].apply(lambda x: elem2id[x])
            self.data["element_2"] = self.data["element_2"].apply(lambda x: elem2id[x])
            self.data["element_3"] = self.data["element_3"].apply(lambda x: elem2id[x])

        self.element_indices = self.data[
            ["element_1", "element_2", "element_3"]
        ].to_numpy(dtype=np.float64)

        self.elements_distributions = torch.zeros(
            [len(self.data), len(self.elements) if -1 in self.elements else len(self.elements) + 1], dtype=torch.float64   # -1 as there is index that shows that there is no element
        ).to(device)

        for idx in range(len(self.data)):
            indices = self.element_indices[idx, :]
            indices = indices[indices != -1]

            self.elements_distributions[idx, indices] = self.ratios[idx][
                range(indices.shape[0])
            ] 

        self.elements_distributions = self.elements_distributions[
            ~torch.isnan(self.spectras).any(dim=1)
        ]
        self.spectras = self.spectras[~torch.isnan(self.spectras).any(dim=1)]

    def __len__(self) -> int:
        return len(self.spectras)

    def __getitem__(self, idx: int) -> Tuple[torch.Tensor, torch.Tensor]:
        spectra = torch.stack(
            self.spectras[idx, 1:].split(self.chunk_size)   # input will be split into chunks of self.chunk_size elements in them
        )
        elements_distribution = self.elements_distributions[idx] * (
            1 - self.air_ratios[idx]
        )
        elements_distribution[-1] = self.air_ratios[idx]

        return spectra, elements_distribution

In [7]:
class PositionalEncoding(nn.Module):

    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 1000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)

        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model)
        )
        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer("pe", pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Arguments:
            x: Tensor, shape ``[batch_size, seq_len, embedding_dim]``
        """
        x = x + self.pe[:, : x.size(1), :]
        return self.dropout(x)

In [8]:
class THzTransformer(nn.Module):
    def __init__(
        self,
        chunk_size: int = 100,
        nhead: int = 1,
        dim_feedforward: int = 128,
        dropout: float = 0.0,
        batch_first: bool = True,
        activation: str = "relu",
        num_layers: int = 6,
        linear_head_size: int = 128,
        output_size: int = 25,
        device: str = "cuda:0"
    ) -> None:
        super().__init__()
        self.cls_token_embedding = nn.Embedding(
            num_embeddings=1, embedding_dim=chunk_size
        )
        self.batch_first = batch_first
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=chunk_size,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            dropout=dropout,
            batch_first=batch_first,
            activation=activation
        )
        self.positional_encoding = PositionalEncoding(d_model=chunk_size, dropout=dropout)
        self.encoder = nn.TransformerEncoder(encoder_layer=encoder_layer, num_layers=num_layers)
        self.linear_head = nn.Sequential(
            nn.Linear(chunk_size, linear_head_size),
            nn.ReLU(),
            nn.Linear(linear_head_size, output_size),
        )
        self.cls_token_index = torch.tensor([0]).to(device)
        
        self.softmax = nn.Softmax(dim=2)

    def init_weights(self) -> None:
        initrange = 0.1
        self.cls_token_embedding.weight.data.uniform_(-initrange, initrange)
        self.linear_head.bias.data.zero_()
        self.linear_head.weight.data.uniform_(-initrange, initrange)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        cls_token = self.cls_token_embedding(self.cls_token_index)
        cls_token = cls_token.expand(x.shape[0], 1, cls_token.shape[1])

        x_with_cls = torch.cat((cls_token, x), dim=1)
        x_with_pos_encoding = self.positional_encoding(x_with_cls)

        encoder_output = self.encoder(x_with_pos_encoding)[:, self.cls_token_index, :]
        predictions = self.linear_head(encoder_output)
        return predictions

In [9]:
def train_epoch(
    model: nn.Module,
    dataloader: DataLoader,
    optimizer: Optimizer,
    loss_fn: nn.Module,
    scheduler: LRScheduler = None,
):
    model.train()

    total_loss = 0
    mae = 0
    for spectra, target in tqdm(dataloader, desc="Training"):
        optimizer.zero_grad()
        pred = torch.squeeze(model(spectra), dim=1)
        loss = loss_fn(pred, target)

        mae += float(nn.L1Loss()(nn.Softmax(dim=1)(pred), target).item())
        total_loss += loss.item()

        loss.backward()
        # nn.utils.clip_grad_norm_(model.parameters(), 10)
        optimizer.step()

        if scheduler is not None:
            scheduler.step()

    return total_loss / len(dataloader), mae / len(dataloader)


@torch.no_grad()
def val_epoch(
    model: nn.Module, dataloader: DataLoader, loss_fn: nn.Module
):
    model.eval()

    loss = 0
    mae = 0
    for spectra, target in tqdm(dataloader, desc="Validating"):
        pred = torch.squeeze(model(spectra), dim=1)
        loss += float(loss_fn(pred, target).item())
        mae += float(nn.L1Loss()(nn.Softmax(dim=1)(pred), target).item())

    return loss / len(dataloader), mae / len(dataloader)

In [10]:
def train(
    model: nn.Module,
    train_loader: DataLoader,
    val_loader: DataLoader,
    optimizer: Optimizer,
    n_epochs: int,
    loss_fn: nn.Module = nn.CrossEntropyLoss(reduction="mean"),
    scheduler: LRScheduler = None,
    run = None
) -> None:

    for i in range(1, n_epochs + 1):
        train_loss, train_mae = train_epoch(
            model, train_loader, optimizer, loss_fn, scheduler
        )

        val_loss, val_mae = val_epoch(model, val_loader, loss_fn)

        print(
            f"Epoch {i}: \n   Train loss: {train_loss:.5f}    |   Val loss: {val_loss:.5f}    |   train MAE: {train_mae:.5f}  |   val MAE: {val_mae:.5f}\n"
        )
        if run:
            run.log({"train_loss": train_loss, "val_loss": val_loss, "train_mae": train_mae, "val_mae": val_mae})
        # print(
        #     f"Epoch {i}: \n   Train loss: {train_loss:.8f}\n"
        # )

In [11]:
train_df, test_df = train_test_split(df, test_size=0.15, random_state=42)

In [12]:
chunk_size = 250
nhead = 2
dim_feedforward = 1024
dropout = 0
num_layers = 15
linear_head_size = 1024

label_smoothing = 0.0

lr = 5e-5
n_epochs = 23
batch_size = 128

In [13]:
train_loader = DataLoader(
        CustomTransformerSpectraDataset(train_df, chunk_size=chunk_size),
        batch_size=batch_size,
    )
val_dataset = CustomTransformerSpectraDataset(
        test_df, chunk_size=chunk_size
    )

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
)

In [14]:
run = wandb.init(
    # set the wandb project where this run will be logged
    project="course-work",
    save_code=True,
    group="Transformer",
    name="Change scheduler",
    notes="make warmup 2 epochs, then exponential lr",
    config={
        "learning_rate": lr,
        "architecture": "Transformer",
        "epochs": n_epochs,
        "batch_size": batch_size,
        "chunk_size": chunk_size,
        "nhead": nhead,
        "dim_feedforward": dim_feedforward,
        "dropout": dropout,
        "num_layers": num_layers,
        "linear_head_size": linear_head_size,
        "label_smoothing": label_smoothing,
    },
)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmax23-ost[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [19]:
net = THzTransformer(
    chunk_size=chunk_size,
    nhead=nhead,
    dim_feedforward=dim_feedforward,
    dropout=dropout,
    num_layers=num_layers,
    linear_head_size=linear_head_size,
    output_size=val_dataset[0][1].shape[0],
    device='cuda:0'
)
# net.load_state_dict(torch.load("./transformer-chunk250.model"))
# net.eval()
net.to(device)

THzTransformer(
  (cls_token_embedding): Embedding(1, 250)
  (positional_encoding): PositionalEncoding(
    (dropout): Dropout(p=0, inplace=False)
  )
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-14): 15 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=250, out_features=250, bias=True)
        )
        (linear1): Linear(in_features=250, out_features=1024, bias=True)
        (dropout): Dropout(p=0, inplace=False)
        (linear2): Linear(in_features=1024, out_features=250, bias=True)
        (norm1): LayerNorm((250,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((250,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0, inplace=False)
        (dropout2): Dropout(p=0, inplace=False)
      )
    )
  )
  (linear_head): Sequential(
    (0): Linear(in_features=250, out_features=1024, bias=True)
    (1): ReLU()
    (2): Linear(in_features=1024, out_

In [16]:
from torch.optim.lr_scheduler import SequentialLR, LinearLR, CosineAnnealingWarmRestarts, ExponentialLR
from torch.optim import SGD

In [20]:
optimizer = Adam(net.parameters(), lr=lr)
# optimizer = SGD(net.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
scheduler1 = LinearLR(optimizer, start_factor=0.05, end_factor=1, total_iters=2400)
scheduler2 = ExponentialLR(optimizer, gamma=0.999814)
scheduler = SequentialLR(
    optimizer, schedulers=[scheduler1, scheduler2], milestones=[2400]
)

In [21]:
print(device)

# train(net, val_loader, val_loader, optimizer, n_epochs, scheduler=None, run=run, loss_fn=nn.CrossEntropyLoss(reduction="mean", label_smoothing=label_smoothing))
# train(net, val_loader, val_loader, optimizer, n_epochs, scheduler=None, run=None, loss_fn=nn.MSELoss(reduction="mean"))
train(
    net,
    train_loader,
    val_loader,
    optimizer,
    n_epochs,
    scheduler=scheduler,
    run=run,
)


cuda:0


Training: 100%|██████████| 1219/1219 [07:34<00:00,  2.68it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.91it/s]


Epoch 1: 
   Train loss: 2.17739    |   Val loss: 1.68125    |   train MAE: 0.04258  |   val MAE: 0.02985



Training: 100%|██████████| 1219/1219 [07:31<00:00,  2.70it/s]
Validating: 100%|██████████| 216/216 [00:29<00:00,  7.40it/s]


Epoch 2: 
   Train loss: 1.54750    |   Val loss: 1.41478    |   train MAE: 0.02565  |   val MAE: 0.02107



Training: 100%|██████████| 1219/1219 [07:29<00:00,  2.71it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.93it/s]


Epoch 3: 
   Train loss: 1.36921    |   Val loss: 1.31367    |   train MAE: 0.01959  |   val MAE: 0.01796



Training: 100%|██████████| 1219/1219 [07:28<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.91it/s]


Epoch 4: 
   Train loss: 1.30176    |   Val loss: 1.27182    |   train MAE: 0.01713  |   val MAE: 0.01656



Training: 100%|██████████| 1219/1219 [07:28<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.95it/s]


Epoch 5: 
   Train loss: 1.26625    |   Val loss: 1.27863    |   train MAE: 0.01577  |   val MAE: 0.01614



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.87it/s]


Epoch 6: 
   Train loss: 1.23814    |   Val loss: 1.23189    |   train MAE: 0.01463  |   val MAE: 0.01447



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.93it/s]


Epoch 7: 
   Train loss: 1.21997    |   Val loss: 1.20864    |   train MAE: 0.01386  |   val MAE: 0.01373



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.92it/s]


Epoch 8: 
   Train loss: 1.20308    |   Val loss: 1.19397    |   train MAE: 0.01312  |   val MAE: 0.01324



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.92it/s]


Epoch 9: 
   Train loss: 1.19050    |   Val loss: 1.18688    |   train MAE: 0.01253  |   val MAE: 0.01295



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.94it/s]


Epoch 10: 
   Train loss: 1.18098    |   Val loss: 1.17645    |   train MAE: 0.01205  |   val MAE: 0.01212



Training: 100%|██████████| 1219/1219 [07:28<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.96it/s]


Epoch 11: 
   Train loss: 1.17242    |   Val loss: 1.17120    |   train MAE: 0.01163  |   val MAE: 0.01169



Training: 100%|██████████| 1219/1219 [07:28<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.89it/s]


Epoch 12: 
   Train loss: 1.16537    |   Val loss: 1.16722    |   train MAE: 0.01126  |   val MAE: 0.01149



Training: 100%|██████████| 1219/1219 [07:28<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.97it/s]


Epoch 13: 
   Train loss: 1.16046    |   Val loss: 1.16271    |   train MAE: 0.01099  |   val MAE: 0.01138



Training: 100%|██████████| 1219/1219 [07:28<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.91it/s]


Epoch 14: 
   Train loss: 1.15553    |   Val loss: 1.15655    |   train MAE: 0.01072  |   val MAE: 0.01096



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.93it/s]


Epoch 15: 
   Train loss: 1.15178    |   Val loss: 1.15394    |   train MAE: 0.01051  |   val MAE: 0.01077



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.88it/s]


Epoch 16: 
   Train loss: 1.14902    |   Val loss: 1.15203    |   train MAE: 0.01036  |   val MAE: 0.01066



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.93it/s]


Epoch 17: 
   Train loss: 1.14673    |   Val loss: 1.15045    |   train MAE: 0.01022  |   val MAE: 0.01063



Training: 100%|██████████| 1219/1219 [07:28<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.89it/s]


Epoch 18: 
   Train loss: 1.14473    |   Val loss: 1.14948    |   train MAE: 0.01011  |   val MAE: 0.01055



Training: 100%|██████████| 1219/1219 [07:28<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.89it/s]


Epoch 19: 
   Train loss: 1.14317    |   Val loss: 1.14824    |   train MAE: 0.01001  |   val MAE: 0.01040



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.91it/s]


Epoch 20: 
   Train loss: 1.14190    |   Val loss: 1.14736    |   train MAE: 0.00994  |   val MAE: 0.01037



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.96it/s]


Epoch 21: 
   Train loss: 1.14088    |   Val loss: 1.14674    |   train MAE: 0.00988  |   val MAE: 0.01036



Training: 100%|██████████| 1219/1219 [07:28<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.92it/s]


Epoch 22: 
   Train loss: 1.14003    |   Val loss: 1.14583    |   train MAE: 0.00983  |   val MAE: 0.01028



Training: 100%|██████████| 1219/1219 [07:27<00:00,  2.72it/s]
Validating: 100%|██████████| 216/216 [00:27<00:00,  7.96it/s]

Epoch 23: 
   Train loss: 1.13933    |   Val loss: 1.14526    |   train MAE: 0.00979  |   val MAE: 0.01022






In [None]:
# for spectra, target in val_loader:
#     pred = net(spectra)
#     pred = torch.squeeze(net(spectra), dim=1)
#     break

# target[0], nn.Softmax()(pred[0])


In [None]:
torch.save(net.state_dict(), "./transformer-reg-scheduler.model")

In [None]:
# preds, y_val = np.empty((0, 26)), np.empty((0, 26))
# for spectra, target in val_loader:
#     pred = net(spectra)
#     pred = torch.squeeze(net(spectra), dim=1)
#     preds = np.append(preds, nn.Softmax(dim=1)(pred).cpu().detach().numpy(), axis=0)
#     y_val = np.append(y_val, target.cpu().detach().numpy(), axis=0)

# preds.shape, y_val.shape

In [None]:
# from sklearn.metrics import mean_absolute_error

# mean_absolute_error(y_val, preds, multioutput="raw_values")

In [None]:
wandb.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_loss,▄▂▂▂▁▁▁▁▁▁▁▁██████████████████
train_mae,▅▃▂▂▂▂▂▂▁▁▁▁██████████████████
val_loss,▂▂▂▁▁▂▁▁▁▁▁███████████████████
val_mae,▃▂▂▂▁▂▂▂▁▁▁███████████████████

0,1
train_loss,2.64632
train_mae,0.05143
val_loss,2.64502
val_mae,0.05149


In [None]:
# torch.save(net.state_dict(), "./transformer.model")

In [None]:
for param in net.parameters():
    print(param)


Parameter containing:
tensor([[ 0.9981, -0.2479, -0.9899,  1.3833, -0.2871, -0.4596, -2.1525,  0.8887,
         -1.0385,  0.2964,  0.8880,  0.0553,  0.8148, -0.3724,  0.2941, -1.5743,
         -0.3755, -0.8611,  0.9099, -0.4918,  0.4218, -1.0261,  0.0090, -1.0740,
         -0.0639,  0.4602,  1.9664, -0.2385, -0.1446, -1.5913,  0.4268, -0.7112,
         -0.2851,  0.2090, -1.2609, -0.9365,  2.2311, -0.6020, -0.1344,  0.4912,
         -0.3201,  0.5691,  0.1117, -0.8349, -0.7963,  2.5996, -0.7392,  0.4801,
         -1.1806, -0.6762,  0.3063, -0.9796, -1.3238, -1.3681, -0.4913,  0.0392,
          0.9174, -0.5773, -1.2121, -0.5295,  0.4659, -0.3321,  1.0619, -0.1051,
          1.4006,  0.8004,  1.7167, -0.1149, -1.9531, -1.7734,  0.1237,  0.2429,
          0.2804, -0.8194, -1.4675,  0.2095, -0.4832, -2.3460, -1.7322, -1.0878,
         -0.9365, -0.4779,  0.2935,  0.5664,  0.4598, -1.4081,  0.3180,  0.3359,
         -0.4131,  0.1828,  1.7941, -0.0324, -0.3277,  1.5862,  1.0770, -0.8740,
      

In [None]:
total_params = 0
for param in net.parameters():
    total_params += torch.prod(torch.tensor(param.shape))
total_params.item()

11763034

In [None]:
i = 0
for spectra, target in val_loader:
    if i < 0:
        i += 1
        continue
    x = spectra[3]
    y = target[3]
    pred = net(x[None, :, :])
    break
y

tensor([0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0871, 0.4238, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
        0.0000, 0.0000, 0.0000, 0.0000, 0.4891, 0.0000, 0.0000, 0.0000],
       device='cuda:0')

In [None]:
pred = pred.squeeze(dim=(0, 1))
pred

tensor([-0.4874, -0.4284, -0.4372, -0.4161, -0.3863, -0.4363, -0.4065, -0.4263,
        -0.4347, -0.4153, -0.4514, -0.4375, -0.4332, -0.4037, -0.4129, -0.4411,
        -0.4496, -0.3903, -0.4616, -0.4284, -0.4171, -0.4021, -0.4431, -0.4184,
        -0.4103,  2.3153], device='cuda:0', grad_fn=<SqueezeBackward2>)

In [None]:
pred.shape, y.shape

(torch.Size([26]), torch.Size([26]))

In [None]:
nn.CrossEntropyLoss(pred, y)

RuntimeError: Boolean value of Tensor with more than one value is ambiguous

In [None]:
pred - y[None, :]

tensor([[ 0.0100,  0.0155, -0.0839,  0.0107,  0.0138,  0.0148, -0.0827,  0.0183,
          0.0182,  0.0185,  0.0178,  0.0122,  0.0154,  0.0279,  0.0143,  0.0208,
         -0.0870,  0.0254]], device='cuda:0', grad_fn=<SubBackward0>)

In [None]:
torch.sum(torch.abs(pred - y[None, :])) / pred.shape[0]

tensor(0.0115, device='cuda:0', grad_fn=<DivBackward0>)

In [None]:
nn.L1Loss()(pred, y[None, :])

  return F.l1_loss(input, target, reduction=self.reduction)


tensor(0.0115, device='cuda:0', grad_fn=<MeanBackward0>)

In [None]:
loss = val_epoch(net, val_loader, lambda x, y: nn.MSELoss()(nn.Softmax()(x), y))

  return self._call_impl(*args, **kwargs)
Validating: 100%|██████████| 861/861 [01:07<00:00, 12.81it/s]


In [None]:
np.sqrt(loss)

0.02595855770471056