In [1]:
!pip install pytorch-lightning



In [2]:
import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [3]:
from src.tools import *

In [4]:
torch.set_printoptions(precision=3, edgeitems=20, linewidth=250)   

In [5]:
class SamenessModule(nn.Module):
    def __init__(self, layer_sizes=(1, 8, 8, 8, 1), activation_fun=nn.ReLU()):
        super().__init__()

        self.run_counter = 0

        layers_list = []
        for i in range(len(layer_sizes) - 1):
            layers_list.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
            if i != len(layer_sizes) - 2:
                layers_list.append(activation_fun)

        self.l1 = nn.Sequential(*layers_list)
        print(self)

        for layer in self.l1:
            if isinstance(layer, nn.Linear):
                layer.weight = nn.Parameter(layer.weight.double())
                layer.bias = nn.Parameter(layer.bias.double())

    def forward(self, x):
        self.run_counter += 1
        return self.l1(x.double())


class SamenessAutoEncoder(pl.LightningModule):
    def __init__(self, encoder: SamenessModule):
        super().__init__()
        self.encoder = encoder

    def training_step(self, batch, batch_idx):
        batch = batch[:, 0, :]
        x, y = batch.reshape(2, len(batch), 1)
        x_hat = self.encoder(x)
        loss = F.mse_loss(x_hat, x)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=5 * 1e-3)
        return optimizer

In [6]:
class SamenessBinModule(nn.Module):
    def __init__(self, layer_sizes=(64, 8, 8, 8, 64), activation_fun=nn.ReLU()):
        super().__init__()

        self.run_counter = 0

        layers_list = []
        for i in range(len(layer_sizes) - 1):
            layers_list.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
            if i != len(layer_sizes) - 2:
                layers_list.append(activation_fun)

        self.l1 = nn.Sequential(*layers_list)
        print(self)

        for layer in self.l1:
            if isinstance(layer, nn.Linear):
                layer.weight = nn.Parameter(layer.weight.double())
                layer.bias = nn.Parameter(layer.bias.double())

    def forward(self, x):
        self.run_counter += 1
        return self.l1(x.double())


class SamenessBinAutoEncoder(pl.LightningModule):
    def __init__(self, encoder: SamenessModule):
        super().__init__()
        self.encoder = encoder

    def training_step(self, batch, batch_idx):
        batch = batch[:, 0, :]
        x, y = batch.reshape(2, len(batch), 64)
        x_hat = self.encoder(x)
        loss = F.mse_loss(x_hat, x)
        return loss

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=5 * 1e-3)
        return optimizer

In [7]:
from torch.utils.data import Dataset
import torch
import os
import torch.random
import pandas as pd

In [8]:
from notebooks.data.SimpleRandomDataset import SimpleRandomDataset
from notebooks.data.SimpleRandomDataset import SimpleRandomBitDataset

In [9]:
# Parameters:
activation_fun = nn.ReLU()
layer_sizes = [1, 8, 8, 8, 1]
# layer_sizes = [1, 32, 32, 32, 1]

#### Training with:
n = 50000 batch_size = 5000 max_epochs=100 -> takes 102 sec
n = 50000 batch_size = 2500 max_epochs=100 -> takes 66 sec
n = 50000 batch_size = 1000 max_epochs=100 -> takes 81 sec
n = 50000 batch_size = 500 max_epochs=100 -> takes 107 sec

In [10]:
# dataset = SimpleRandomDataset(50000, transform=transforms.ToTensor())
# train_loader = DataLoader(dataset, batch_size=2500,
#                           # num_workers=8
#                           )
#
# # model
# autoencoder = SamenessAutoEncoder(SamenessModule(layer_sizes, activation_fun))

In [11]:
dataset = SimpleRandomBitDataset(50000, transform=transforms.ToTensor())
train_loader = DataLoader(dataset, batch_size=2500,
                          # num_workers=8
                          )

# model
autoencoder = SamenessBinAutoEncoder(SamenessBinModule())

SamenessBinModule(
  (l1): Sequential(
    (0): Linear(in_features=64, out_features=8, bias=True)
    (1): ReLU()
    (2): Linear(in_features=8, out_features=8, bias=True)
    (3): ReLU()
    (4): Linear(in_features=8, out_features=8, bias=True)
    (5): ReLU()
    (6): Linear(in_features=8, out_features=64, bias=True)
  )
)


In [12]:
# train model
trainer = pl.Trainer(max_epochs=100)
# trainer.fit(model=autoencoder, train_dataloaders=train_loader)
with Timer():
    trainer.fit(autoencoder, train_dataloaders=train_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Running with 


You are using a CUDA device ('NVIDIA GeForce RTX 3050 Ti Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type              | Params
----------------------------------------------
0 | encoder | SamenessBinModule | 1.2 K 
----------------------------------------------
1.2 K     Trainable params
0         Non-trainable params
1.2 K     Total params
0.005     Total estimated model params size (MB)
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=100` reached.


Took 62.9106 seconds


In [20]:
from tqdm import tqdm

In [21]:
# test_dataset = SimpleRandomDataset(10000, transform=transforms.ToTensor())
# test_loader = DataLoader(test_dataset, batch_size=10)

In [22]:
test_dataset = SimpleRandomBitDataset(10000, transform=transforms.ToTensor())
test_loader = DataLoader(test_dataset, batch_size=10)

In [23]:

sum_dif = 0
acc = 0
n = 0
with torch.no_grad():
    # for batch in tqdm(test_loader):
    n += 1
    for index, batch in tqdm(enumerate(test_loader)):
        batch = batch[:, 0, :]
        x, y = batch.reshape(2, len(batch), 64)
        y_hat = autoencoder.encoder(x)
        my_acc = (y == torch.round(y_hat)).all(axis=1).sum().item() / y.shape[0]
        acc += my_acc 
        if index == 0:
            print("Original x:", x)
            print("Reconstructed x:", y_hat)
        sum_dif += ((x - y_hat) ** 2).sum()
print(f"accuracy: {acc /n}")
print(f"square sum: {(sum_dif / (index + 1) / 10)}")

0it [00:00, ?it/s]

Original x: tensor([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 1., 0., 1., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 1., 1., 1., 0., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 0., 0.,
         0., 0., 0., 0.],
        [0., 0., 1., 0., 1., 1., 1., 0., 0., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 1., 1., 1., 0., 0., 0., 1., 0., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 0.

1000it [00:00, 2904.10it/s]

accuracy: 0.0
square sum: 12.320128776078027





In [17]:
n = 1000
s = 0
c = 0
with torch.no_grad():
    for val in test_dataset:
        c += 1
        s += autoencoder.encoder(val)
print(f"avarage diff: {s / c}")

avarage diff: tensor([[[0.510, 0.482, 0.493, 0.466, 0.432, 0.371, 0.268, 0.070, 0.503, 0.491, 0.504, 0.499, 0.505, 0.514, 0.501, 0.515, 0.515, 0.496, 0.522, 0.514, 0.496, 0.491, 0.492, 0.511, 0.497, 0.471, 0.497, 0.501, 0.495, 0.465, 0.503, 0.513, 0.517, 0.500,
          0.500, 0.480, 0.482, 0.487, 0.495, 0.500, 0.506, 0.479, 0.476, 0.494, 0.498, 0.525, 0.491, 0.493, 0.409, 0.374, 0.326, 0.454, 0.393, 0.400, 0.507, 0.510, 0.482, 0.601, 0.397, 0.405, 0.400, 0.404, 0.392, 0.400],
         [0.510, 0.482, 0.493, 0.466, 0.432, 0.371, 0.268, 0.070, 0.503, 0.491, 0.504, 0.499, 0.505, 0.514, 0.501, 0.515, 0.515, 0.496, 0.522, 0.514, 0.496, 0.491, 0.492, 0.511, 0.497, 0.471, 0.497, 0.501, 0.495, 0.465, 0.503, 0.513, 0.517, 0.500,
          0.500, 0.480, 0.482, 0.487, 0.495, 0.500, 0.506, 0.479, 0.476, 0.494, 0.498, 0.525, 0.491, 0.493, 0.409, 0.374, 0.326, 0.454, 0.393, 0.400, 0.507, 0.510, 0.482, 0.601, 0.397, 0.405, 0.400, 0.404, 0.392, 0.400]]], dtype=torch.float64)


In [18]:
x = np.array([FloatBitsConverter.get_float_bits_in_memory(val) for val in np.linspace(-20, 20, 1000)])
x = x.reshape(len(x), 64)
x = torch.tensor(x)

y = autoencoder.encoder(x)

x = x.detach().numpy()
y = y.detach().numpy()
differ = (x - y) ** 2
print(differ.sum(axis=1))
x_to_plot = [FloatBitsConverter.get_float_by_bits(val) for val in x]
y_to_plot = [FloatBitsConverter.get_float_by_bits(val) for val in y]

[ 5.80536684 10.03165292 11.11330816 12.15354934 10.51204465 11.11122781
 13.25821883 13.2355479  11.95338963 12.04109997 11.85705404 12.31332344
 13.15148366 11.80520051 13.50976217 13.00526406 12.09180656 13.0359287
 12.58642785 13.3036195  12.17891203 12.92736482 12.57799643 13.09145882
 13.32986844  9.61204512 13.00832978 13.35025851 13.47797857 12.63046657
 13.16503786 13.33140043 10.18101471 12.42854152 13.15225413 13.10879907
 12.81095544 13.04799235 13.00009067 12.98554396 12.17018801 11.76217339
 13.48539785 13.0423147  13.48100122 13.06633251 12.39957927 13.14063366
 13.43460878 13.26836372  9.9775123  12.63980476 11.9025952  12.81638807
 13.06594308 13.05116897 13.01563688 12.26896345 13.37676964 13.30352229
 13.23204777 12.6082651  13.27649656 12.41895439 10.36455637 12.53265375
 12.24091851 13.43028346 13.06880617 13.13144517 13.15354126 13.22273933
 12.54876306 12.09088759 13.3941249  11.3568514  13.24919882 13.18387379
 13.01389294 12.93813746 11.77303577 13.25052615 12.

In [19]:
import plotly.express as px

ModuleNotFoundError: No module named 'plotly'

In [None]:
print(x[0].astype(int))
# print(y[0])
print(y[0].round().astype(int))

In [None]:
# print(x_to_plot)
print(y_to_plot)

In [None]:
fig = px.line(x=x_to_plot, y=y_to_plot)
fig.show()

fig = px.line(x=x_to_plot, y=differ.sum(axis=1))
fig.show()