In [1]:
!pip install pytorch-lightning



In [2]:
import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import torchvision.transforms as transforms
from torch.utils.data import DataLoader



In [3]:
activation_fun = nn.ReLU()
layer_sizes = [1, 8, 8, 8, 1]


class SamenessModule(nn.Module):
    def __init__(self):
        super().__init__()

        self.run_counter = 0

        layers_list = []
        for i in range(len(layer_sizes) - 1):
            layers_list.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
            if i != len(layer_sizes) - 2:
                layers_list.append(activation_fun)

        self.l1 = nn.Sequential(*layers_list)
        print(self)

        for layer in self.l1:
            if isinstance(layer, nn.Linear):
                layer.weight = nn.Parameter(layer.weight.double())
                layer.bias = nn.Parameter(layer.bias.double())

    def forward(self, x):
        self.run_counter += 1
        return self.l1(x.double())

class SamenessAutoEncoder(pl.LightningModule):
    def __init__(self, encoder: SamenessModule):
        super().__init__()
        self.encoder = encoder

    def training_step(self, batch, batch_idx):
        batch = batch[:, 0, :]
        x, y = batch.reshape(2, len(batch), 1)
        x_hat = self.encoder(x)
        loss = F.mse_loss(x_hat, x)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=5 * 1e-3)
        return optimizer

In [4]:
from torch.utils.data import Dataset
import torch
import os
import torch.random
import pandas as pd

In [5]:
from notebooks.data.SimpleRandomDataset import SimpleRandomDataset

In [12]:
dataset = SimpleRandomDataset(50000, transform=transforms.ToTensor())
train_loader = DataLoader(dataset, batch_size=5000)

# model
autoencoder = SamenessAutoEncoder(SamenessModule())

# train model
trainer = pl.Trainer(max_epochs=100)
# trainer.fit(model=autoencoder, train_dataloaders=train_loader)
trainer.fit(model=autoencoder, train_dataloaders=train_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3050 Ti Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type           | Params
-------------------------------------------
0 | encoder | SamenessModule | 169   
-------------------------------------------
169       Trainable params
0         Non-trainable params
169       Total params
0.001     Total estimated model params size (MB)


SamenessModule(
  (l1): Sequential(
    (0): Linear(in_features=1, out_features=8, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8, out_features=8, bias=True)
    (3): ReLU()
    (4): Linear(in_features=8, out_features=8, bias=True)
    (5): ReLU()
    (6): Linear(in_features=8, out_features=1, bias=True)
  )
)
Epoch 249: 100%|██████████| 100/100 [00:01<00:00, 96.92it/s, v_num=111]

`Trainer.fit` stopped: `max_epochs=250` reached.


Epoch 249: 100%|██████████| 100/100 [00:01<00:00, 96.17it/s, v_num=111]


In [7]:
from tqdm import tqdm

In [13]:
test_dataset = SimpleRandomDataset(1000, transform=transforms.ToTensor())
test_loader = DataLoader(test_dataset, batch_size=10)

sum_dif = 0
with torch.no_grad():
    # for batch in tqdm(test_loader):
    for index, batch in tqdm(enumerate(test_loader)):
        batch = batch[:, 0, :]
        x, y = batch.reshape(2, 10, 1)
        reconstructed_x = autoencoder.encoder(x)
        if index == 0:
            print("Original x:", x)
            print("Reconstructed x:", reconstructed_x)
        sum_dif += sum((x - reconstructed_x)**2)
print(f"square sum: {(sum_dif / (index + 1) / 10).__float__()}")


100it [00:00, 2273.00it/s]

Original x: tensor([[-4.8214],
        [-4.8214],
        [ 3.6566],
        [ 3.6566],
        [-1.8263],
        [-1.8263],
        [-0.6447],
        [-0.6447],
        [ 4.0437],
        [ 4.0437]], dtype=torch.float64)
Reconstructed x: tensor([[-4.8156],
        [-4.8156],
        [ 3.6582],
        [ 3.6582],
        [-1.8239],
        [-1.8239],
        [-0.6431],
        [-0.6431],
        [ 4.0453],
        [ 4.0453]], dtype=torch.float64)
square sum: 7.98055116988657e-06





In [14]:
n = 1000
s = 0
c = 0
with torch.no_grad():
    for val in test_dataset:
        c += 1
        s += autoencoder.encoder(val)
print(f"avarage diff: {s / c}")

avarage diff: tensor([[[-0.0678],
         [-0.0678]]], dtype=torch.float64)


In [15]:
x = np.linspace(-20, 20, 1000)
x = x.reshape(len(x), 1)
x = torch.tensor(x)

y = autoencoder.encoder(x)

x = x.reshape(len(x)).detach().numpy()
y = y.reshape(len(y)).detach().numpy()
differ = (x - y)**2

In [16]:
import plotly.express as px

In [30]:
fig = px.line(x=x, y=y)
fig.show()

fig = px.line(x=x, y=differ)
fig.show()