In [249]:
import multiprocessing
from typing import Tuple, Any
!pip install pytorch-lightning



In [250]:
import numpy as np
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
import os
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

In [251]:
from src.tools import *

In [252]:
torch.set_printoptions(precision=3, edgeitems=20, linewidth=250)
INT_BITS = 32

In [253]:
class SamenessNaturalBinModule(nn.Module):
    def __init__(self, layer_sizes=(INT_BITS, 64, 64, INT_BITS), activation_fun=nn.ReLU()):
        super().__init__()

        self.run_counter = 0

        layers_list = []
        for i in range(len(layer_sizes) - 1):
            layers_list.append(nn.Linear(layer_sizes[i], layer_sizes[i + 1]))
            # if i != len(layer_sizes) - 2:
            layers_list.append(activation_fun)

        self.l1 = nn.Sequential(*layers_list)

        for layer in self.l1:
            if isinstance(layer, nn.Linear):
                layer.weight = nn.Parameter(layer.weight.double())
                layer.bias = nn.Parameter(layer.bias.double())

    def forward(self, x):
        self.run_counter += 1
        return self.l1(x.double())


class SamenessNaturalBinAutoEncoder(pl.LightningModule):
    def __init__(self, encoder: SamenessNaturalBinModule):
        super().__init__()
        self.encoder = encoder

    def predict(self, x):
        return self.encoder(x)
    
    def training_step(self, batch, batch_idx):
        x, y = batch
        x_hat = self.encoder(x)
        loss = F.mse_loss(x_hat, x)
        return loss

    def _get_accuracy(self, y_hat, y) -> tuple[float, float]:
        eq = (y == torch.round(y_hat))
        return eq.all(axis=1).double().mean(), eq.double().mean()
    

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.encoder(x)
        loss = F.mse_loss(y_hat, y)
        acc, bin_acc = self._get_accuracy(y_hat, y)
        metrics = {"test_acc": acc, "test_loss": loss, "test_bin_acc": bin_acc}
        self.log_dict(metrics)
        return metrics

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=5 * 1e-3)
        return optimizer

In [254]:
from torch.utils.data import Dataset
import torch
import os
from tqdm import tqdm
import torch.random
import numpy as np
import pandas as pd

In [255]:
class SimpleRandomNaturalBinDataset(Dataset):
    def __init__(self, size, transform=None, target_transform=None):
        self.size = size
        self.transform = transform
        self.target_transform = target_transform

        self.X = np.random.randint(low=0, high=2, size=(self.size, INT_BITS)).astype(np.float64)
        self.labels = self.X

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        values, labels = self.X[idx], self.labels[idx]

        if self.transform:
            values = self.transform(values)
        if self.target_transform:
            labels = self.target_transform(labels)
        return values, labels


In [256]:
# Parameters:
activation_fun = nn.ReLU()
# layer_sizes = [INT_BITS, 8, 8, 8, INT_BITS]
layer_sizes = [INT_BITS, 64, INT_BITS]
# layer_sizes = [1, 32, 32, 32, 1]

In [257]:
dataset = SimpleRandomNaturalBinDataset(50000)
train_loader = DataLoader(dataset, batch_size=1000,
                          num_workers=multiprocessing.cpu_count(),
                          )

# model
autoencoder = SamenessNaturalBinAutoEncoder(SamenessNaturalBinModule(layer_sizes, activation_fun))

SamenessNaturalBinModule(
  (l1): Sequential(
    (0): Linear(in_features=32, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=32, bias=True)
    (3): ReLU()
  )
)


In [258]:
# train model
trainer = pl.Trainer(max_epochs=50)
# trainer.fit(model=autoencoder, train_dataloaders=train_loader)
with Timer():
    trainer.fit(autoencoder, train_dataloaders=train_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Running with 


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name    | Type                     | Params
-----------------------------------------------------
0 | encoder | SamenessNaturalBinModule | 4.2 K 
-----------------------------------------------------
4.2 K     Trainable params
0         Non-trainable params
4.2 K     Total params
0.017     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Exception ignored in: <function _releaseLock at 0x7f908e9a88b0>
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 228, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


Took 1.1732 seconds


  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


In [259]:
test_dataset = SimpleRandomNaturalBinDataset(10000)
test_loader = DataLoader(test_dataset, batch_size=100, num_workers=multiprocessing.cpu_count())

In [260]:
trainer.test(autoencoder, test_loader)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
Exception ignored in: <function _releaseLock at 0x7f908e9a88b0>
Traceback (most recent call last):
  File "/usr/lib/python3.10/logging/__init__.py", line 228, in _releaseLock
    def _releaseLock():
KeyboardInterrupt: 


Testing: 0it [00:00, ?it/s]

In [261]:
x_rand = torch.tensor(np.random.randint(low=0, high=2, size=(1, INT_BITS)).astype(np.float64))
y_hat = autoencoder.predict(x_rand).round()
print(x_rand == y_hat)


KeyboardInterrupt



In [None]:
# import plotly.express as px

In [None]:
# print(x[0].astype(int))
# print(y[0])
# print(y[0].round().astype(int))

In [None]:
# print(x_to_plot)
# print(y_to_plot)

In [None]:
# fig = px.line(x=x_to_plot, y=y_to_plot)
# fig.show()
# 
# fig = px.line(x=x_to_plot, y=differ.sum(axis=1))
# fig.show()