In [1]:
import import_ipynb
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import numpy as np
import pytorch_lightning as pl
import torch.nn.functional as F
from torch.optim import Adam, SGD

from sampler import *
from data_modules import *
from callbacks import *

In [23]:
n = 100000
local = sample_no_signalling(n, True, output_img=False)

In [3]:
n = 100000
non_local =  sample_no_signalling(n, False, output_img=False)

In [10]:
with open('non_local.txt', 'w') as f:
    f.write(str(non_local))

In [4]:
class Generator(nn.Module):
    def __init__(self, input_size, output_size, network_width, network_depth):
        super().__init__()
        self.input_size = input_size
        self.output_size = output_size
        self.network_depth = network_depth
        self.l1 = nn.Linear(self.input_size, network_width)
        self.l2 = nn.Linear(network_width, network_width)
        self.l3 = nn.Linear(network_width, self.output_size)
        self.bn = nn.BatchNorm1d(network_width)
        
    def forward(self, x):
        #x = x.flatten(start_dim=1)
        x = self.l1(x)
        x = self.bn(F.leaky_relu(x))
        for _ in range(self.network_depth - 2):
            x = self.l2(x)
            x = self.bn(F.leaky_relu(x))
        x =  F.softmax(self.l3(x))
        return x.view(-1, self.output_size)

class Discriminator(nn.Module):
    def __init__(self, input_size, network_width, network_depth):
        super().__init__()
        self.network_depth = network_depth
        self.input_size = input_size
        self.l11 = nn.Linear(self.input_size, network_width)
        self.l2 = nn.Linear(network_width, network_width)
        self.l3 = nn.Linear(network_width, 1)
        self.sigmoid = nn.Sigmoid()
        self.bn = nn.BatchNorm1d(network_width)

    def forward(self, x):
        #x = x.flatten(start_dim=1)
        x = self.l11(x)
        x = self.bn(F.leaky_relu(x))
        for _ in range(self.network_depth - 2):
            x = self.l2(x)
            x = self.bn(F.leaky_relu(x))
        x = self.sigmoid(self.l3(x))
        return x

In [65]:
gen = Generator(5, 16, 100, 5)
disc= Discriminator(16, 100, 3)

In [66]:
gen.eval()
disc.eval()

Discriminator(
  (l11): Linear(in_features=16, out_features=100, bias=True)
  (l2): Linear(in_features=100, out_features=100, bias=True)
  (l3): Linear(in_features=100, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (bn): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

Epoch 59:  59%|████████████▍        | 925/1563 [00:24<00:16, 38.24it/s, v_num=2]

In [73]:
np.sum(gen(torch.Tensor([[1,2,1,1,1]])).detach().numpy())

0.9999999

In [22]:
disc(gen(torch.Tensor([[1,2,2,2,2]])))

tensor([[0.4875]], grad_fn=<SigmoidBackward>)

In [92]:
model.generator.eval()

Generator(
  (l1): Linear(in_features=5, out_features=100, bias=True)
  (l2): Linear(in_features=100, out_features=100, bias=True)
  (l3): Linear(in_features=100, out_features=16, bias=True)
  (bn): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [93]:
model.generator(torch.Tensor([[1,2,2,2,2]]))

  x =  F.softmax(self.l3(x))


tensor([[1.6913e-06, 2.6358e-06, 5.0972e-07, 4.5313e-06, 9.3738e-07, 1.6703e-06,
         1.0966e-05, 5.2563e-07, 1.9234e-06, 4.3404e-06, 4.2747e-06, 2.2593e-06,
         9.9995e-01, 1.8144e-06, 5.5871e-06, 5.9132e-06]],
       grad_fn=<ViewBackward>)

In [98]:
check_locality(model.generator(torch.Tensor([[1,2,2,2,2]])).detach().numpy()[0])

  x =  F.softmax(self.l3(x))


IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed

In [105]:
check_locality(np.reshape(model.generator(torch.Tensor([[1,5,9,2,2]])).detach().numpy()[0], (4,4)))

  x =  F.softmax(self.l3(x))


True

In [120]:
non_local = sample_no_signalling(20, False, output_img=False)

In [123]:
model.discriminator.eval()

Discriminator(
  (l11): Linear(in_features=16, out_features=100, bias=True)
  (l2): Linear(in_features=100, out_features=100, bias=True)
  (l3): Linear(in_features=100, out_features=1, bias=True)
  (sigmoid): Sigmoid()
  (bn): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

In [130]:
torch.Tensor(np.reshape(non_local[0], (4,4)))

tensor([[0.3112, 0.0317, 0.1653, 0.1776],
        [0.2042, 0.4529, 0.4407, 0.2164],
        [0.0106, 0.3266, 0.0656, 0.2716],
        [0.5048, 0.1580, 0.5403, 0.1225]])

In [132]:
torch.Tensor([non_local[0]])

tensor([[0.3112, 0.0317, 0.1653, 0.1776, 0.2042, 0.4529, 0.4407, 0.2164, 0.0106,
         0.3266, 0.0656, 0.2716, 0.5048, 0.1580, 0.5403, 0.1225]])

In [159]:
for i in sample_no_signalling(20, False, output_img=False):
    print(model.discriminator(torch.Tensor([i])))

tensor([[0.5045]], grad_fn=<SigmoidBackward>)
tensor([[0.5044]], grad_fn=<SigmoidBackward>)
tensor([[0.5052]], grad_fn=<SigmoidBackward>)
tensor([[0.5046]], grad_fn=<SigmoidBackward>)
tensor([[0.5044]], grad_fn=<SigmoidBackward>)
tensor([[0.5047]], grad_fn=<SigmoidBackward>)
tensor([[0.5047]], grad_fn=<SigmoidBackward>)
tensor([[0.5048]], grad_fn=<SigmoidBackward>)
tensor([[0.5045]], grad_fn=<SigmoidBackward>)
tensor([[0.5046]], grad_fn=<SigmoidBackward>)
tensor([[0.5051]], grad_fn=<SigmoidBackward>)
tensor([[0.5050]], grad_fn=<SigmoidBackward>)
tensor([[0.5045]], grad_fn=<SigmoidBackward>)
tensor([[0.5051]], grad_fn=<SigmoidBackward>)
tensor([[0.5053]], grad_fn=<SigmoidBackward>)
tensor([[0.5041]], grad_fn=<SigmoidBackward>)
tensor([[0.5051]], grad_fn=<SigmoidBackward>)
tensor([[0.5044]], grad_fn=<SigmoidBackward>)
tensor([[0.5053]], grad_fn=<SigmoidBackward>)
tensor([[0.5052]], grad_fn=<SigmoidBackward>)


In [160]:
for i in sample_no_signalling(20, True, output_img=False):
    print(model.discriminator(torch.Tensor([i])))

tensor([[0.5046]], grad_fn=<SigmoidBackward>)
tensor([[0.5049]], grad_fn=<SigmoidBackward>)
tensor([[0.5047]], grad_fn=<SigmoidBackward>)
tensor([[0.5051]], grad_fn=<SigmoidBackward>)
tensor([[0.5048]], grad_fn=<SigmoidBackward>)
tensor([[0.5046]], grad_fn=<SigmoidBackward>)
tensor([[0.5046]], grad_fn=<SigmoidBackward>)
tensor([[0.5048]], grad_fn=<SigmoidBackward>)
tensor([[0.5044]], grad_fn=<SigmoidBackward>)
tensor([[0.5044]], grad_fn=<SigmoidBackward>)
tensor([[0.5043]], grad_fn=<SigmoidBackward>)
tensor([[0.5044]], grad_fn=<SigmoidBackward>)
tensor([[0.5046]], grad_fn=<SigmoidBackward>)
tensor([[0.5047]], grad_fn=<SigmoidBackward>)
tensor([[0.5046]], grad_fn=<SigmoidBackward>)
tensor([[0.5050]], grad_fn=<SigmoidBackward>)
tensor([[0.5052]], grad_fn=<SigmoidBackward>)
tensor([[0.5041]], grad_fn=<SigmoidBackward>)
tensor([[0.5038]], grad_fn=<SigmoidBackward>)
tensor([[0.5049]], grad_fn=<SigmoidBackward>)


In [151]:
for i in local[:20]:
    print(model.discriminator(torch.Tensor([i])))

tensor([[0.5047]], grad_fn=<SigmoidBackward>)
tensor([[0.5042]], grad_fn=<SigmoidBackward>)
tensor([[0.5044]], grad_fn=<SigmoidBackward>)
tensor([[0.5043]], grad_fn=<SigmoidBackward>)
tensor([[0.5048]], grad_fn=<SigmoidBackward>)
tensor([[0.5043]], grad_fn=<SigmoidBackward>)
tensor([[0.5043]], grad_fn=<SigmoidBackward>)
tensor([[0.5046]], grad_fn=<SigmoidBackward>)
tensor([[0.5046]], grad_fn=<SigmoidBackward>)
tensor([[0.5046]], grad_fn=<SigmoidBackward>)
tensor([[0.5049]], grad_fn=<SigmoidBackward>)
tensor([[0.5043]], grad_fn=<SigmoidBackward>)
tensor([[0.5045]], grad_fn=<SigmoidBackward>)
tensor([[0.5043]], grad_fn=<SigmoidBackward>)
tensor([[0.5041]], grad_fn=<SigmoidBackward>)
tensor([[0.5051]], grad_fn=<SigmoidBackward>)
tensor([[0.5047]], grad_fn=<SigmoidBackward>)
tensor([[0.5048]], grad_fn=<SigmoidBackward>)
tensor([[0.5040]], grad_fn=<SigmoidBackward>)
tensor([[0.5044]], grad_fn=<SigmoidBackward>)


In [137]:
np.random.uniform(0,1,5)

array([0.39847612, 0.67295759, 0.06251784, 0.05309058, 0.94803463])

In [143]:
for _ in range(100):
    print(check_locality(np.reshape(model.generator(torch.Tensor([np.random.uniform(0,1,5)])).detach().numpy()[0], (4,4))))

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


  x =  F.softmax(self.l3(x))


In [5]:
class GAN(pl.LightningModule):
    def __init__(self, dataset, image_size=28, batch_size=64, generator_width=800, generator_depth=6,
                discriminator_width=400, discriminator_depth=3, discriminator_training_loops=1, num_workers=16):
        super().__init__()
        self.image_size = image_size
        self.batch_size = batch_size
        self.generator_width = generator_width
        self.generator_depth = generator_depth
        self.discriminator_width = discriminator_width
        self.discriminator_depth = discriminator_depth
        self.discriminator_training_loops = discriminator_training_loops
        self.generator = Generator(5, 16, 100, 5)#self.image_size, self.generator_width, self.generator_depth)
        self.discriminator = Discriminator(16, 100, 3)#self.image_size, self.discriminator_width, self.discriminator_depth)
        self.automatic_optimization=False
        self.dataset = dataset
        self.num_workers = num_workers
        
    def train_dataloader(self):
        return DataLoader(self.dataset,
                          self.batch_size * self.discriminator_training_loops,
                          shuffle=True,
                          num_workers=self.num_workers)
    
    def training_step(self, batch, batch_idx):
        X, _ = batch
        generator_optimizer, discriminator_optimizer = self.optimizers()
        for data_batch in torch.split(X, self.batch_size):
            noise_batch = torch.randn(self.batch_size, 5).cuda()
            discriminator_optimizer.zero_grad()
            loss_discriminator = self.discriminator_loss(data_batch, noise_batch)
            self.manual_backward(loss_discriminator)
            
            grad_max = max([torch.max(p.grad) for p in self.discriminator.parameters() if p.grad is not None])
            torch.nn.utils.clip_grad_value_(self.discriminator.parameters(), 0.5)
            grad_max_a = max([torch.max(p.grad) for p in self.discriminator.parameters() if p.grad is not None])
            
            
            discriminator_optimizer.step()
        generator_optimizer.zero_grad()
        noise_batch = torch.randn(self.batch_size, 5).cuda()
        loss_generator = self.generator_loss(noise_batch)
        self.manual_backward(loss_generator)

        grad_max2 = max([torch.max(p.grad) for p in self.generator.parameters() if p.grad is not None])
        torch.nn.utils.clip_grad_value_(self.generator.parameters(), 0.5)
        grad_max2_a = max([torch.max(p.grad) for p in self.generator.parameters() if p.grad is not None])
            
        generator_optimizer.step()
        self.logger.experiment.add_scalar("Generator_loss", loss_generator, self.current_epoch)
        self.logger.experiment.add_scalar("Discriminator_loss", loss_discriminator, self.current_epoch)
        self.logger.experiment.add_scalar("Grad_max_before", grad_max, self.current_epoch)
        self.logger.experiment.add_scalar("Grad_max_after", grad_max_a, self.current_epoch)
        self.logger.experiment.add_scalar("Grad_max_before2", grad_max2, self.current_epoch)
        self.logger.experiment.add_scalar("Grad_max_after2", grad_max2_a, self.current_epoch)
        return 
    
    def configure_optimizers(self):
        generator_optimizer = Adam(self.generator.parameters(), lr=0.0002, betas=(0.5, 0.999))
        discriminator_optimizer = Adam(self.discriminator.parameters(), lr=0.0002, betas=(0.5, 0.999))
        return generator_optimizer, discriminator_optimizer
    
    def generator_loss(self, noise):
        return -torch.mean(torch.log(self.discriminator(self.generator(noise)) + 1e-16))
    
    def discriminator_loss(self, data, noise):
        return -0.5 * torch.mean(torch.log(self.discriminator(data) + 1e-16)) - \
    0.5 * torch.mean(torch.log(1-self.discriminator(self.generator(noise)) + 1e-16))

In [8]:
ds = MyDataset(non_local)

In [9]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger
logger = TensorBoardLogger('my_logs', 'GAN')
tr1 = Trainer(max_epochs=800, gpus=1, logger=logger)

  return torch._C._cuda_getDeviceCount() > 0


MisconfigurationException: You requested GPUs: [0]
 But your machine only has: []

In [168]:
import numpy as np
model = GAN(ds)
tr1.fit(model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


RuntimeError: CUDA error: unspecified launch failure

In [84]:
%reload_ext tensorboard
%tensorboard --logdir my_logs/

Reusing TensorBoard on port 6006 (pid 76199), started 0:02:29 ago. (Use '!kill 76199' to kill it.)

In [7]:
class MyDataset(Dataset):
    def __init__(self, data):
        self.data = data
        
    def __getitem__(self, idx):
        #x = Image.fromarray(self.data[idx].astype(np.uint8))
        #x = self.transform(x)
        x = torch.FloatTensor(self.data[idx])
        return x, 0
    
    def __len__(self):
        return len(self.data)

In [87]:
import numpy as np
model = GAN(ds)
tr.fit(model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name          | Type          | Params
------------------------------------------------
0 | generator     | Generator     | 12.5 K
1 | discriminator | Discriminator | 12.1 K
------------------------------------------------
24.6 K    Trainable params
0         Non-trainable params
24.6 K    Total params
0.098     Total estimated model params size (MB)


Epoch 0:   1%|▏                      | 15/1563 [00:00<00:44, 34.69it/s, v_num=4]

  x =  F.softmax(self.l3(x))


Epoch 0:   0%|                                       | 0/1563 [1:54:41<?, ?it/s]
Epoch 59:  59%|███████████▏       | 925/1563 [1:23:37<57:40,  5.42s/it, v_num=2]
Epoch 0:   0%|                                       | 0/1563 [1:53:23<?, ?it/s]
Epoch 499: 100%|███████████████████| 1563/1563 [00:18<00:00, 82.41it/s, v_num=4]


In [None]:
gen()