# Pretrained VAE + GAN

1. Load pre-trained VAE and make layers non-differentiable
2. Add fully connected layer after VAE latent layer
3. Add Generative model

Refs: https://github.com/lyeoni/pytorch-mnist-GAN/blob/master/pytorch-mnist-GAN.ipynb

In [24]:
import torch
import torch.nn as nn

Tensor = torch.tensor

In [8]:
from models import SVAE, Discriminator
from connections import load_config

In [9]:
config = load_config()

## Initialise model from config and load stored weights

In [11]:
svae_params = config['Models']['SVAE']['Parameters']
svae = SVAE(**svae_params, vocab_size=9840)

In [12]:
svae_best_model_path = 'best models/svae.pt'
svae.load_state_dict(torch.load(svae_best_model_path))

<All keys matched successfully>

In [13]:
print(svae)

SVAE(
  (embedding): Embedding(9840, 512)
  (embedding_dropout): Dropout(p=0.5, inplace=False)
  (encoder_rnn): GRU(512, 512, batch_first=True)
  (decoder_rnn): GRU(512, 512, batch_first=True)
  (hidden2mean): Linear(in_features=512, out_features=16, bias=True)
  (hidden2logv): Linear(in_features=512, out_features=16, bias=True)
  (z2hidden): Linear(in_features=16, out_features=512, bias=True)
  (outputs2vocab): Linear(in_features=512, out_features=9840, bias=True)
  (NLL): NLLLoss()
)


## Remove last layer of SVAE

In [16]:
# Identify layer for replacing unwanted layers
# see: https://discuss.pytorch.org/t/how-to-delete-layer-in-pretrained-model/17648
class Identity(nn.Module):
    def __init__(self):
        super(Identity, self).__init__()
    
    def forward(self, x):
        return x

In [17]:
# remove outputs2vocab layer in SVAE
svae.outputs2vocab = Identity()
print(svae)

## Freeze layers in SVAE
Not required as we'll be passing through the SVAE in the Generator

## Initialise Generator

In [25]:
class Generator(nn.Module):
    def __init__(self, z_dim_in, z_dim_out, fc_dim=128):
        super(Generator, self).__init__()
        self.z_dim_in = z_dim_in
        self.z_dim_out = z_dim_out
        self.fc_dim = fc_dim
        self.net = nn.Sequential(nn.Linear(self.z_dim_in, self.fc_dim),
                                 nn.ReLU(True),
                                 nn.Linear(self.fc_dim, self.fc_dim),
                                 nn.ReLU(True),
                                 nn.Linear(self.fc_dim, self.z_dim_out)
                                )
        # Initialise weights
        self.init_weights()

    def init_weights(self):
        """
        Initialises weights with Xavier method rather than Kaiming (TODO: investigate which is more suitable for LM and RNNs)
        - See: https://pytorch.org/cppdocs/api/function_namespacetorch_1_1nn_1_1init_1ace282f75916a862c9678343dfd4d5ffe.html
        """
        for block in self._modules:
            for m in self._modules[block]:
                if type(m) == nn.Linear:
                    torch.nn.init.xavier_uniform_(m.weight)
                    m.bias.data.fill_(0.01)

    def forward(self, z: Tensor) -> Tensor:
        return self.net(z)

In [41]:
G = Generator(z_dim_in=512, z_dim_out=64)

## Initialise Discriminator

In [44]:
D = Discriminator(z_dim=64)

## Train Model

In [None]:
task_type = 'SEQ'
data_name = 'conll2003'
dataset_splits = ['train', 'valid', 'test']

#### Set up data loaders

In [None]:
def _init_data(batch_size=None):
    x_y_pair_name = 'seq_label_pairs_enc' if data_name == 'ag_news' else 'seq_tags_pairs_enc' # Key in dataset - semantically correct for the task at hand.

    if batch_size is None:
        batch_size = config['Train']['batch_size']

    # Load pre-processed data
    path_data = os.path.join('/home/tyler/Desktop/Repos/s-vaal/data', task_type, data_name, 'data.json')
    path_vocab = os.path.join('/home/tyler/Desktop/Repos/s-vaal/data', task_type, data_name, 'vocabs.json')
    data = load_json(path_data)
    vocab = load_json(path_vocab)       # Required for decoding sequences for interpretations. TODO: Find suitable location... or leave be...
    vocab_size = len(vocab['words'])  # word vocab is used for model dimensionality setting + includes special characters (EOS, SOS< UNK, PAD)
    tagset_size = len(vocab['tags'])  # this includes special characters (EOS, SOS, UNK, PAD)

    datasets = dict()
    for split in data_splits:
        # Access data
        split_data = data[split][x_y_pair_name]
        # Convert lists of encoded sequences into tensors and stack into one large tensor
        split_seqs = torch.stack([torch.tensor(enc_pair[0]) for key, enc_pair in split_data.items()])
        split_tags = torch.stack([torch.tensor(enc_pair[1]) for key, enc_pair in split_data.items()])
        # Create torch dataset from tensors
        split_dataset = RealDataset(sequences=split_seqs, tags=split_tags)
        # Add to dictionary
        datasets[split] = split_dataset #split_dataloader

        # Create torch dataloader generator from dataset
        if split == 'test':
            test_dataloader = DataLoader(dataset=split_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
        if split == 'valid':
            val_dataloader = DataLoader(dataset=split_dataset, batch_size=batch_size, shuffle=True, num_workers=0)

#### Training Routine
X_U = Unlabelled dataset <br>
X_L = Labelled dataset

In [None]:
G_rounds = 1
D_rounds = 1

In [None]:
# loss
criterion = nn.BCELoss()    # Needs to be updated for earth mover distance metric in the future

In [None]:
# optimizer
lr = 0.0002
G_optim = optim.Adam(G.parameters(), lr=lr)
D_optim = optim.Adam(D.paramaters(), lr=lr)

In [None]:
def D_train(x):
    # Train the discriminator
    
    D.zero_grad()
    
    # Train discriminator on real
    x_real, y_real = 0,0
    
    D_output = D(x_real)
    D_real_loss = criterion(D_output, y_real)
    D_real_score = D_output
    
    # Train discriminator on fake
    z = 'random'
    x_fake, y_fake = G(z), 0

    D_output = D(x_fake)
    D_fake_loss = criterion(D_output, y_fake)
    D_fake_score = D_output
    
    # Gradient backprop and optimise only D's parameters
    D_loss = D_real_loss + D_fake_loss
    D_loss.backward()
    D_optim.step()
    
    return D_loss.data.item()

In [None]:
def G_train(x):
    # Train the generator
    G.zero_grad()
    
    z = 'random'
    y = 1
    
    G_output = G(z)
    D_output = D(G_output)
    G_loss = criterion(D_output, y)
    
    # Gradient backprop and optimise only G's parameters
    G_loss.backward()
    G_optim.step()
    
    return G_loss.data.item()

In [50]:
epochs = 10
for epoch in range(1, epochs+1, 1):
    
    D_losses, G_losses = list(), list()
    for batch_seq, batch_len, batch_tags in dataloader:
        
        if torch.cuda.is_available():
            batch_seq = 0
            batch_len = 0
        
        
        # pass data through pre-trained VAE
        z_vae = vae(batch_seq, batch_len)
        
        # Train D and G
        D_losses.append(D_train(x))
        G_losses.append(G_train(x))
        
    print(f'Losses go here...')

NameError: name 'dataloader' is not defined