In [1]:
import sys
import torch
import os

In [2]:
if "gan.ipynb" in os.listdir():
    os.chdir("..")

In [3]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
from src.data_utils.batch import rebatch
from src.data_utils.data import get_training_iterators
from src.model.loss_optim import MultiGPULossCompute, SimpleLossCompute
from src.model.model import make_model, NoamOpt, LabelSmoothing, translate_sentence
from src.utils.utils import get_tokenizer

In [5]:
tok = get_tokenizer("tr")

In [6]:
train_iter, valid_iter, test_iter, train_idx, dev_idx, test_idx = get_training_iterators("tur", batch_size=512)



In [7]:
# mini dev set
with open("data/tr/tur.dev.tgt", encoding="utf-8") as infile:
    toystrings = [x.strip() for x in infile.readlines()[:20]]

In [8]:
toyset = [torch.LongTensor([1] + tok.Encode(x) + [2])  for x in toystrings]
toyset = torch.nn.utils.rnn.pad_sequence(sequences=toyset, padding_value=3)

In [9]:
toyset

tensor([[    1,     1,     1,  ...,     1,     1,     1],
        [ 5605,     8,  1330,  ...,     8,   771,  2804],
        [27861,  2475, 10284,  ...,  3987,  5057, 11694],
        ...,
        [    3,     3,     3,  ...,     3,     3,     3],
        [    3,     3,     3,  ...,     3,     3,     3],
        [    3,     3,     3,  ...,     3,     3,     3]])

Two critics:
- Input related to output or not
- Classifier into poetry, prose, generated, scrambled poetry

One word/token selector:
- Choose tokens from input sequence to use for topic
- 

In [10]:
from torchtext import data
import torchtext as tt
from src.data_utils.batch import MyIterator
from src.model.model import batch_size_val

def each_line(fname):
    c = 0
    lines = []
    with open(fname, "r", encoding="utf-8") as infile:
        for line in infile:
            if line.count(" ") > 200 or line.count(" ") < 10:
                continue
            lines.append(line.strip())
            c += 1
            if c >= 2000000: 
                break
    return lines

def make_iter(lines, tokenizer, batch_size=256):
    
    def tok(seq):
        return tokenizer.EncodeAsIds(seq)

    field = data.Field(tokenize=tok, init_token=1, eos_token=2, pad_token=3, use_vocab=False)
    #ds = data.TabularDataset(fpath, "tsv", [("src", field)], skip_header=True)

    examples = [tt.data.Example.fromdict({"src": x}, {"src": ("src", field)}) for x in lines]
    ds = tt.data.Dataset(examples, {"src": field})
    iter = MyIterator(ds, batch_size=batch_size, device="cpu",
                             repeat=False, sort_key=lambda x: len(x.src),
                             batch_size_fn=batch_size_val, train=False, sort=True)

    return iter




In [11]:
prose_iter = make_iter(each_line("data/tr/prose/prose_gan.txt"), tok, batch_size=512)




In [12]:
import random

to_scramble = each_line("data/tr/tur.train.tgt")
scrambled = []
for poem in to_scramble:
    new_poem = poem.split("¬")
    random.shuffle(new_poem)
    scrambled.append("¬".join(new_poem))

In [13]:
scrambled_iter = make_iter(scrambled, tok, batch_size=3000)

In [14]:
import copy
from src.model.model import MultiHeadedAttention, PositionwiseFeedForward, \
                    PositionalEncoding, Encoder, EncoderLayer, Generator, Embeddings
import torch.nn as nn

class Critic(nn.Module):

    def __init__(self, encoder, src_embed, generator):
        super(Critic, self).__init__()
        self.encoder = encoder
        self.src_embed = src_embed
        self.generator = generator
        self.steps = 0

        
    def forward(self, x, mask):
        """Pass the input (and mask) through each layer in turn."""
        x = self.src_embed(x)
        for layer in self.encoder.layers:
            x = layer(x, mask)
        return self.encoder.norm(x)    


def make_critic(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1):
    """Helper: Construct a model from hyper-parameters."""
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    generator = Generator(d_model, tgt_vocab)
    embed = nn.Sequential(Embeddings(d_model, src_vocab), c(position))
    encoder = Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N)
    critic = Critic(encoder, embed, generator)
    
    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in critic.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)

    return critic


In [15]:
ntokens = 32000
enc_dec = make_model(ntokens, ntokens, N=6).to(device)
token_selector = make_critic(ntokens, 2, N=2).to(device)
style_critic = make_critic(ntokens, 4, N=2).to(device)
relevance_critic = make_critic(ntokens + 1, 1, N=2).to(device)

  nn.init.xavier_uniform(p)


In [16]:
from torch.autograd import Variable
import numpy as np
def subsequent_mask(size):
    "Mask out subsequent positions."
    attn_shape = (1, size, size)
    subsequent_mask = np.triu(np.ones(attn_shape), k=1).astype('uint8')
    return torch.from_numpy(subsequent_mask) == 0


def prep_tensors( src, trg, pad=3):
    src_mask = (src != pad).unsqueeze(-2)
    trg_in = trg[:, :-1]
    trg_y = trg[:, 1:]
    trg_mask = make_std_mask(trg_in, pad)
    return src, trg_y, src_mask, trg_mask

def make_std_mask(tgt, pad):
    """Create a mask to hide padding and future words."""
    tgt_mask = (tgt != pad).unsqueeze(-2)
    tgt_mask = tgt_mask & Variable(
        subsequent_mask(tgt.size(-1)).type_as(tgt_mask.data))
    return tgt_mask


In [17]:
def get_dae_input(tgt, token_selector):
    select_prob_embeds = token_selector.forward(tgt.to(device), 
                                         (tgt != 3).unsqueeze(-2).to(device))
    select_prob = token_selector.generator(select_prob_embeds)
    select_indices = torch.max(select_prob, dim=2).indices.type(torch.ByteTensor)
    dae_list = []
    for ind, row in zip(select_indices, tgt):
        dae_list.append(torch.masked_select(row, ind)[:15])
    dae_input = torch.nn.utils.rnn.pad_sequence(dae_list, batch_first=False, padding_value=3)
    return dae_input

In [18]:
rebatched = (rebatch(3, b) for b in train_iter)

In [19]:
torch


<module 'torch' from '/auto/praha1/memduh/versetorch/venv/lib/python3.6/site-packages/torch/__init__.py'>

In [20]:
from src.model.adafactor import Adafactor

enc_dec_opt = NoamOpt(enc_dec.src_embed[0].d_model, 1, 2000,
                        torch.optim.Adam(enc_dec.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))
#enc_dec_opt = Adafactor(enc_dec.parameters())

style_criterion = nn.BCELoss()
relevance_criterion = nn.BCELoss()

token_optim = Adafactor(token_selector.parameters())
style_optim = Adafactor(style_critic.parameters())
rel_optim = Adafactor(relevance_critic.parameters())

In [21]:
relevance_criterion = torch.nn.BCEWithLogitsLoss()

In [22]:
def get_relevance_input(dae_input, tgt):
    mid_point = torch.ones((tgt.shape[0], 1), dtype=torch.long) * ntokens
    return torch.cat((dae_input, mid_point.to(device), tgt), dim=1)


In [23]:
accumulation_steps = 8

In [24]:
# get validation iterator


def validate_batch(model, src, max_len=256, start_symbol=1, end_symbol=2):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    src_mask = (src != 3).unsqueeze(-2)
    memory = model.encode(src.to(device), src_mask.to(device))
    ys = torch.ones(src.shape[0], 1).fill_(start_symbol).type_as(src.data).to(device)
    finished = torch.zeros((src.shape[0], 1))
    for i in range(max_len-1):
        out = model.decode(memory, src_mask,
                           Variable(ys).to(device),
                           Variable(subsequent_mask(ys.size(1)).type_as(src.data)).to(device))
        prob = model.generator(out[:, -1])
        _, next_word = torch.max(prob, dim = 1)
        # next_word = next_word.data_utils[0]
        unsqueezed = next_word.unsqueeze(1)
        for c, token in enumerate(unsqueezed):
            if token == end_symbol:
                finished[c] = 1
        if sum(finished) >= src.shape[0]:
            break
        ys = torch.cat([ys, unsqueezed], dim=1)
                        # torch.ones(src.shape[0], 1).type_as(src.data_utils).fill_(next_word).to(device)], dim=1)
    return ys


def greedy_generate(model, src, max_len=256, start_symbol=1, end_symbol=2):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    src_mask = (src != 3).unsqueeze(-2)
    memory = model.encode(src.to(device), src_mask.to(device))
    ys = torch.ones(src.shape[0], 1).fill_(start_symbol).type_as(src.data).to(device)
    finished = torch.zeros((src.shape[0], 1))
    for i in range(max_len-1):
        out = model.decode(memory, src_mask,
                           Variable(ys).to(device),
                           Variable(subsequent_mask(ys.size(1)).type_as(src.data)).to(device))
        prob = model.generator(out[:, -1])
        _, next_word = torch.max(prob, dim = 1)
        # next_word = next_word.data_utils[0]
        unsqueezed = next_word.unsqueeze(1)
        for c, token in enumerate(unsqueezed):
            if finished[c] == 1:
                unsqueezed[c] = 3
            if token == end_symbol:
                finished[c] = 1
        if sum(finished) >= src.shape[0]:
            break
        ys = torch.cat([ys, unsqueezed], dim=1)
                        # torch.ones(src.shape[0], 1).type_as(src.data_utils).fill_(next_word).to(device)], dim=1)
    return ys

In [25]:

label_smoothing = LabelSmoothing(size=32000, padding_idx=3, smoothing=0.1)



In [26]:
[tok.Decode(x.tolist()) for x in toyset.transpose(0, 1)]

['ayrılığın sınırlı ışığında¬ışıldar gözler¬dar gelir dünya¬sıkıcı karanlıktan¬çıkış başlar¬sevgi akla gelince¬ışık küçücüktür¬sevgi karanlığında¬sonsuzluğu hatırlatır insana¬ışık kandırıcıdır belki¬ayrılığı sevenler için¬orada yuvarlak bir masa¬üç beş sigara biraz da çay¬sevgi karanlığında herşey¬coşkuyla¬zaten aranan da o değil mi¬sınırlı değilmiş hiçbir şey',
 'Şair sözün gösterer,¬Gözel gözün gösterer.¬Payız özün gösterer,¬Saralmış yarpağıyla.¬Uzaqda quzey durar,¬Şa ⁇ ta qelbi hey vurar.¬Önümde muzey qurar,¬Heyat çılpaqlığıyla.¬Yel eser deyinerek,¬ ⁇ um-paltar geyinerek,¬Fe ⁇ r eder öyünərek¬Insan torpaqlığıyla.',
 'Ey yakarış¬Ey uyanış¬Ey sevgi¬Ey aşk¬Ey arayış¬Ey tükeniş¬Ey özleyiş¬Uyanın artık¬O derin uykulardan¬Ve devam edin¬Katıksızca aramaya¬Aşkı,sevdayı,¬Yakarışı,uyanışı¬Arayışı,tükenişi¬Özleyişi,özlemi',
 'ben ölsemde aşkım cümle alemde¬kulakdan kulaga yayılır belki¬adımız gecer her bir kalemde¬gönül nikahımız kıyılır belki¬sevdik sevildik ölürcesine¬başını sonunu bilircesi

In [27]:
!ls checkpoints

cz-acc	     cz-dae   eng-baseline  tur-acc	  tur-dae
cz-baseline  eng-acc  eng-dae	    tur-baseline


In [28]:
last = sorted(os.listdir("checkpoints/tur-dae"), reverse=False, key=lambda x: int(x.partition(".")[0]))
hundred_k_plus = [x for x in last if len(x) > 8]


In [29]:
checkpoint = torch.load("checkpoints/tur-dae/" + hundred_k_plus[0])
token_selector.load_state_dict(checkpoint['selector_state_dict'])
token_optim.load_state_dict(checkpoint['selector_optim_state_dict'])
enc_dec.load_state_dict(checkpoint['model_state_dict'])
#enc_dec_opt.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])

<All keys matched successfully>

In [None]:
import time 

soft = torch.nn.Softmax(dim=1)
start = time.time()
all_tokens = 0
for c, (poetry_batch, prose_batch, scrambled_batch) in enumerate(zip(rebatched, prose_iter, scrambled_iter)):
    enc_dec_opt.optimizer.zero_grad()
    token_optim.zero_grad()
    all_tokens += poetry_batch.ntokens
    tgt, tgt_mask = poetry_batch.trg.to(device), poetry_batch.trg_mask.to(device)
    # classify tokens, get the first 15 tokens selected.
    dae_input = get_dae_input(poetry_batch.trg, token_selector).transpose(0,1).to(device)
    # create src and src mask from selected tokens
    dae_input_mask = (dae_input != 3).unsqueeze(-2)
    
    # get output of poetry generator
    #output_embeds = enc_dec.forward(dae_input, tgt, dae_input_mask, tgt_mask)
    #output = enc_dec.generator(output_embeds)
    #reconstruction_loss = label_smoothing(output.contiguous().view(-1, output.size(-1)),
    #                         poetry_batch.trg_y.to(device).contiguous().view(-1)) / poetry_batch.ntokens
    #reconstruction_loss.backward()
    
    # critic output
    output_selected = greedy_generate(enc_dec, dae_input)
    critic_scores = soft(style_critic.generator(style_critic.forward(output_selected.to(device), 
                                            (output_selected != 3).unsqueeze(-2).to(device)))[:,0,:])

    critic_loss = style_criterion(critic_scores[:, 0], torch.ones((critic_scores.shape[0])).to(device))
    critic_loss.backward()
    
    
    token_optim.step() 
    enc_dec_opt.step()
    
    # train critic (real, fake, scrambled, prose)
    style_optim.zero_grad()

    real_scores = soft(style_critic.generator(style_critic.forward(poetry_batch.trg_y.to(device), 
                                            (poetry_batch.trg_y != 3).unsqueeze(-2).to(device)))[:,0,:])
    crit_bce = style_criterion(real_scores[:, 0], torch.ones((tgt.shape[0])).to(device))
    
    gen_scores = soft(style_critic.generator(style_critic.forward(output_selected.to(device), 
                                            (output_selected != 3).unsqueeze(-2).to(device)))[:,0,:])
    crit_bce += style_criterion(gen_scores[:, 1], torch.ones((tgt.shape[0])).to(device))
    
    scramb_scores = soft(style_critic.generator(style_critic.forward(scrambled_batch.src.transpose(0,1).to(device),
                                            (scrambled_batch.src.transpose(0,1) != 3).unsqueeze(-2).to(device)))[:,0,:])
    crit_bce += style_criterion(scramb_scores[:, 2], torch.ones((scramb_scores.shape[0])).to(device))
    
    prose_scores = soft(style_critic.generator(style_critic.forward(prose_batch.src.transpose(0,1).to(device),
                            (prose_batch.src.transpose(0,1) != 3).unsqueeze(-2).to(device)))[:,0,:])
    crit_bce += style_criterion(prose_scores[:, 3], torch.ones((prose_scores.shape[0])).to(device))
    crit_bce.backward()
    style_optim.step()

    


    
    if c % 10  == 0:
        #print("Reconstruction loss:", reconstruction_loss)
        print("Discriminator loss:", critic_scores)
        print("Discriminator prediction on real poetry", real_scores)
        print("Discriminator prediction on scrambled", scramb_scores)
        print("Discriminator prediction on prose", prose_scores)
        print(all_tokens / (time.time() - start), "tokens processed per second.")
        #print(torch.mean(gen_scores[:, 1]))
        if c % 500 == 0:
            val_src = get_dae_input(toyset.transpose(0, 1), token_selector).transpose(0,1).to(device)
            validated = greedy_generate(enc_dec, val_src)
            print([tok.Decode(x.tolist()) for x in validated])
        

Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.
Skipped overlong sample while batching.


  


Discriminator loss: tensor([[0.0828, 0.2911, 0.4840, 0.1421],
        [0.2397, 0.1369, 0.4136, 0.2098]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[0.4944, 0.0673, 0.3445, 0.0937],
        [0.7420, 0.0362, 0.1320, 0.0898]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[0.5870, 0.1640, 0.0427, 0.2063],
        [0.7260, 0.1334, 0.0985, 0.0422],
        [0.8260, 0.1045, 0.0271, 0.0424],
        [0.6001, 0.2649, 0.0287, 0.1063],
        [0.6293, 0.0960, 0.1530, 0.1218],
        [0.5640, 0.2067, 0.1007, 0.1287],
        [0.8927, 0.0283, 0.0576, 0.0214],
        [0.6823, 0.0909, 0.1616, 0.0652],
        [0.6290, 0.0742, 0.1346, 0.1622],
        [0.4441, 0.1834, 0.3464, 0.0261],
        [0.8813, 0.0410, 0.0211, 0.0565],
        [0.7922, 0.1241, 0.0468, 0.0369],
        [0.6862, 0.1507, 0.1068, 0.0563],
        [0.8664, 0.0621, 0.0254, 0.0461],
        [0.3100, 0.0804, 0.5073, 0.1023],


['ayrılığın sınırlı ışığında¬ışıldar gözler¬dar gelir dünya¬sıkıcıyla¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de¬bir de', 'Şair sözün gösterer,¬Gözel gözün gösterer.¬Payızını ver de ver,¬Şairlik, şiir, şiir.¬Şair sözün göstersin,¬Gözel gözün göstersin,¬Payızını ver de ver,¬Şairlik, şiir, şiir.¬Şair sözün göstersin,¬Gözel gözün göstersin,¬Payızını ver de ver,¬Şairlik, şiir, şiir.¬Şair sözün göstersin,¬Gözel gözün göstersin,¬Payızını ver de ver,¬Şairlik, şiir, şiir.', 'Ey yakarış¬Ey uyan

Discriminator loss: tensor([[1.1806e-02, 9.8800e-01, 1.7730e-05, 1.7706e-04],
        [1.7671e-02, 9.8227e-01, 6.4784e-06, 4.9086e-05],
        [4.7167e-02, 9.5250e-01, 5.7659e-05, 2.7964e-04],
        [9.7555e-03, 9.9008e-01, 1.3133e-05, 1.4860e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[8.1556e-02, 9.1817e-01, 2.5171e-05, 2.5084e-04],
        [2.1096e-01, 7.8872e-01, 1.6285e-05, 2.9764e-04],
        [2.8932e-01, 7.1025e-01, 2.3878e-05, 4.0796e-04],
        [2.2424e-01, 7.7521e-01, 2.5314e-05, 5.3035e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[3.6650e-03, 1.0571e-04, 9.8359e-01, 1.2641e-02],
        [4.0591e-03, 1.6920e-04, 9.9176e-01, 4.0095e-03],
        [1.9687e-03, 4.6551e-05, 9.9389e-01, 4.0977e-03],
        [3.8247e-03, 6.0476e-05, 9.9403e-01, 2.0888e-03],
        [8.8062e-04, 4.7247e-05, 9.9750e-01, 1.5745e-03],
        [2.2452e-03, 5.5609e-05, 9.9435e-01,

Discriminator loss: tensor([[2.6440e-02, 9.7288e-01, 1.9096e-04, 4.9072e-04],
        [1.1504e-03, 9.9869e-01, 4.4109e-05, 1.1613e-04],
        [8.5328e-04, 9.9901e-01, 4.2022e-05, 9.9584e-05],
        [2.7331e-03, 9.9703e-01, 6.5452e-05, 1.7168e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[3.4290e-01, 6.5600e-01, 3.1629e-04, 7.8167e-04],
        [3.6488e-01, 6.3293e-01, 1.4254e-03, 7.7150e-04],
        [2.1037e-01, 7.8794e-01, 1.0147e-03, 6.7770e-04],
        [2.5645e-01, 7.4197e-01, 9.4030e-04, 6.3478e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[6.3158e-04, 1.7394e-04, 9.8933e-01, 9.8680e-03],
        [7.9327e-05, 5.3239e-05, 9.9776e-01, 2.1080e-03],
        [2.4249e-04, 1.8457e-04, 9.9771e-01, 1.8637e-03],
        [4.2956e-04, 1.3731e-04, 9.9476e-01, 4.6747e-03],
        [3.3864e-04, 2.5344e-04, 9.8840e-01, 1.1005e-02],
        [3.8179e-04, 3.0127e-04, 9.9192e-01,

Discriminator loss: tensor([[2.4006e-02, 9.7560e-01, 1.6531e-04, 2.3271e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.5642e-01, 4.3223e-02, 2.5244e-04, 1.0104e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[3.4860e-04, 3.7303e-06, 9.9909e-01, 5.6175e-04],
        [1.2736e-03, 1.2332e-05, 9.9816e-01, 5.5840e-04],
        [2.0904e-04, 2.8680e-06, 9.9918e-01, 6.0321e-04],
        [5.2700e-04, 9.7293e-06, 9.9751e-01, 1.9512e-03],
        [1.2533e-03, 1.7594e-05, 9.9726e-01, 1.4702e-03],
        [5.4188e-04, 1.0750e-05, 9.9742e-01, 2.0243e-03],
        [3.0108e-03, 2.3614e-05, 9.9571e-01, 1.2581e-03],
        [9.8332e-04, 8.8919e-06, 9.9824e-01, 7.6437e-04],
        [3.4173e-04, 7.2285e-06, 9.9688e-01, 2.7703e-03],
        [1.6769e-03, 2.9617e-05, 9.9724e-01, 1.0525e-03],
        [1.1887e-03, 6.5318e-06, 9.9832e-01, 4.8074e-04],
        [8.1905e-04, 2.2493e-05, 9.9811e-01,

Discriminator loss: tensor([[1.4526e-03, 9.9810e-01, 2.2436e-04, 2.2497e-04],
        [4.2723e-05, 9.9992e-01, 7.7605e-06, 2.9658e-05],
        [6.0844e-05, 9.9989e-01, 6.9105e-06, 4.2995e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.4701e-01, 8.5224e-01, 2.3256e-04, 5.1806e-04],
        [4.9795e-01, 5.0104e-01, 3.8318e-04, 6.2260e-04],
        [5.0715e-02, 9.4838e-01, 3.3429e-04, 5.6898e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[2.5767e-05, 3.6686e-04, 9.9687e-01, 2.7365e-03],
        [5.4076e-05, 1.9027e-03, 9.9442e-01, 3.6274e-03],
        [1.1020e-05, 2.4978e-04, 9.9857e-01, 1.1704e-03],
        [4.5386e-05, 4.5835e-04, 9.9654e-01, 2.9586e-03],
        [2.7499e-05, 3.8206e-04, 9.9874e-01, 8.5029e-04],
        [3.8309e-05, 4.1006e-04, 9.9887e-01, 6.8087e-04],
        [2.2512e-05, 1.4283e-03, 9.9723e-01, 1.3150e-03],
        [3.8841e-05, 1.1704e-03, 9.9787e-01,

Discriminator loss: tensor([[2.5402e-04, 9.9962e-01, 4.0318e-05, 8.9839e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9999e-01, 5.2889e-06, 2.2581e-06, 2.2085e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[2.5875e-05, 1.5138e-06, 9.9993e-01, 4.5817e-05],
        [3.6177e-05, 2.8458e-05, 9.9937e-01, 5.6445e-04],
        [2.9943e-05, 2.2463e-06, 9.9990e-01, 6.3999e-05],
        [2.3442e-06, 3.8645e-06, 9.9971e-01, 2.8155e-04],
        [1.0279e-05, 2.3553e-06, 9.9984e-01, 1.4606e-04],
        [3.4368e-06, 8.3575e-07, 9.9996e-01, 3.6950e-05],
        [4.8378e-06, 9.9877e-07, 9.9974e-01, 2.5109e-04],
        [1.1379e-05, 1.5027e-06, 9.9975e-01, 2.3997e-04],
        [1.4082e-05, 8.9755e-07, 9.9990e-01, 8.4831e-05],
        [2.1365e-06, 9.7428e-07, 9.9992e-01, 7.4081e-05],
        [5.0111e-06, 9.2353e-06, 9.9951e-01, 4.7398e-04],
        [4.7585e-06, 9.5430e-07, 9.9992e-01,

Discriminator loss: tensor([[8.0131e-05, 9.9984e-01, 2.3154e-05, 5.4013e-05],
        [8.3484e-05, 9.9983e-01, 8.8944e-06, 7.7343e-05],
        [3.8499e-05, 9.9984e-01, 2.4131e-05, 9.7428e-05],
        [4.2779e-05, 9.9991e-01, 1.2459e-05, 3.2432e-05],
        [2.2105e-04, 9.9972e-01, 3.0290e-05, 2.3752e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9990e-01, 4.6313e-05, 4.1720e-05, 7.4885e-06],
        [9.9992e-01, 2.8538e-05, 3.6417e-05, 1.9057e-05],
        [9.9952e-01, 3.9029e-04, 7.3037e-05, 1.3059e-05],
        [9.9981e-01, 8.3785e-05, 9.8717e-05, 4.0879e-06],
        [9.9997e-01, 1.8063e-05, 8.5872e-06, 4.4256e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[1.1665e-06, 6.6144e-06, 9.9990e-01, 9.4455e-05],
        [9.6647e-07, 7.9271e-06, 9.9989e-01, 1.0370e-04],
        [2.5614e-06, 1.1495e-05, 9.9981e-01, 1.7547e-04],
        [4.0345e-06, 3.1067e-05, 9.9984e-01,

Discriminator loss: tensor([[1.0235e-04, 9.9945e-01, 3.7827e-04, 7.3121e-05],
        [9.2489e-02, 8.8341e-01, 2.3455e-02, 6.4541e-04],
        [9.0804e-05, 9.9953e-01, 3.1867e-04, 6.0670e-05],
        [2.6708e-04, 9.9934e-01, 3.0951e-04, 7.8703e-05],
        [1.0956e-04, 9.9951e-01, 3.2253e-04, 6.1272e-05],
        [2.6686e-04, 9.9923e-01, 4.4016e-04, 6.2015e-05],
        [7.7094e-05, 9.9952e-01, 3.6163e-04, 4.4779e-05],
        [1.4650e-04, 9.9930e-01, 4.4826e-04, 1.0725e-04],
        [1.4181e-03, 9.9714e-01, 1.3391e-03, 9.8950e-05],
        [3.4081e-03, 9.9313e-01, 3.1747e-03, 2.8449e-04],
        [7.1761e-05, 9.9964e-01, 2.4238e-04, 4.5506e-05],
        [1.0434e-04, 9.9940e-01, 3.8524e-04, 1.0988e-04],
        [6.9610e-05, 9.9961e-01, 2.8266e-04, 3.3631e-05],
        [1.9633e-04, 9.9840e-01, 1.3587e-03, 4.8622e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[2.9606e-01, 2.6424e-02, 6.7595e-01, 1.5634e-03],
        [1.5506e-0

Discriminator loss: tensor([[3.2665e-04, 9.8780e-01, 1.1856e-02, 1.4691e-05],
        [5.0781e-05, 9.9747e-01, 2.4541e-03, 2.1400e-05],
        [1.4214e-04, 9.9495e-01, 4.8469e-03, 6.3793e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9980e-01, 6.3059e-05, 1.3606e-04, 2.3529e-06],
        [9.9928e-01, 4.5348e-04, 2.5757e-04, 6.5669e-06],
        [9.9947e-01, 2.3437e-04, 2.9157e-04, 7.2892e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[2.5455e-04, 1.1626e-02, 9.8811e-01, 5.6157e-06],
        [1.8026e-04, 4.5069e-03, 9.9531e-01, 1.8855e-06],
        [1.5744e-04, 5.3350e-03, 9.9450e-01, 2.6146e-06],
        [3.1654e-04, 7.3339e-03, 9.9234e-01, 4.5928e-06],
        [4.1083e-04, 4.8340e-03, 9.9475e-01, 1.0832e-06],
        [9.3084e-05, 5.3872e-03, 9.9452e-01, 3.6405e-06],
        [5.4245e-04, 6.3624e-03, 9.9309e-01, 4.0740e-06],
        [6.1791e-04, 2.0303e-02, 9.7907e-01,

Discriminator loss: tensor([[8.8754e-05, 9.9926e-01, 6.3136e-04, 1.6790e-05],
        [1.6664e-02, 9.2210e-01, 6.1191e-02, 4.0887e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9982e-01, 1.5174e-04, 2.6991e-05, 2.8573e-06],
        [9.9998e-01, 1.0116e-05, 5.0966e-06, 3.5866e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[1.9960e-03, 2.0067e-02, 9.7794e-01, 1.5127e-06],
        [1.0486e-03, 4.7924e-02, 9.5101e-01, 1.2431e-05],
        [3.1374e-04, 2.2705e-03, 9.9741e-01, 2.6030e-06],
        [5.3736e-04, 2.0897e-03, 9.9737e-01, 1.6368e-06],
        [6.8449e-04, 7.0457e-03, 9.9226e-01, 6.0276e-06],
        [1.7386e-03, 5.4300e-03, 9.9283e-01, 5.5648e-06],
        [2.5309e-04, 6.3836e-03, 9.9336e-01, 3.6683e-06],
        [1.3328e-04, 3.3783e-03, 9.9649e-01, 1.3847e-06],
        [1.9257e-04, 3.5635e-03, 9.9624e-01, 1.7235e-06],
        [6.7069e-04, 8.0162e-03, 9.9131e-01,

Discriminator loss: tensor([[6.4538e-05, 9.8852e-01, 1.1363e-02, 5.1498e-05],
        [2.3053e-05, 9.9758e-01, 2.3484e-03, 4.7679e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9283e-01, 4.1740e-03, 2.8997e-03, 9.2759e-05],
        [9.9784e-01, 6.3445e-04, 1.4956e-03, 2.8303e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[5.4524e-05, 2.0499e-03, 9.9788e-01, 1.2580e-05],
        [6.1470e-05, 3.6017e-03, 9.9632e-01, 1.6899e-05],
        [9.3371e-05, 5.3131e-03, 9.9458e-01, 1.3508e-05],
        [3.6928e-05, 3.1683e-03, 9.9678e-01, 1.3124e-05],
        [8.8590e-05, 4.0654e-03, 9.9583e-01, 1.7334e-05],
        [6.0706e-05, 3.3451e-03, 9.9657e-01, 2.1481e-05],
        [5.1308e-05, 2.2273e-03, 9.9771e-01, 8.2739e-06],
        [1.0209e-04, 5.0046e-03, 9.9487e-01, 2.2230e-05],
        [2.7516e-04, 1.0327e-02, 9.8938e-01, 1.3409e-05],
        [5.7953e-05, 3.1594e-03, 9.9677e-01,

Discriminator loss: tensor([[1.7255e-05, 9.9985e-01, 1.2292e-04, 8.7108e-06],
        [2.6791e-05, 9.9982e-01, 1.3474e-04, 1.4509e-05],
        [6.2786e-06, 9.9989e-01, 9.8377e-05, 5.6315e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9693e-01, 2.8804e-03, 1.6760e-04, 1.9360e-05],
        [9.9998e-01, 9.7069e-06, 6.0002e-06, 8.5913e-07],
        [9.9985e-01, 1.1477e-04, 3.5163e-05, 2.4407e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[2.4890e-04, 3.1085e-02, 9.6864e-01, 2.7091e-05],
        [4.6396e-04, 6.7390e-02, 9.3209e-01, 5.3129e-05],
        [6.2900e-04, 1.0598e-01, 8.9336e-01, 3.4448e-05],
        [1.5077e-03, 5.0288e-02, 9.4817e-01, 3.7251e-05],
        [6.6407e-04, 4.4876e-02, 9.5441e-01, 4.6185e-05],
        [4.2804e-04, 5.3241e-02, 9.4628e-01, 4.9259e-05],
        [7.8068e-04, 1.0518e-01, 8.9394e-01, 9.3995e-05],
        [8.2511e-04, 5.5545e-02, 9.4360e-01,

Discriminator loss: tensor([[2.1230e-05, 9.9946e-01, 4.9798e-04, 2.4694e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9945e-01, 2.2030e-04, 3.1502e-04, 1.2041e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[4.1817e-05, 9.4396e-04, 9.9901e-01, 4.0407e-06],
        [9.0291e-05, 1.7644e-03, 9.9814e-01, 7.0391e-06],
        [7.2960e-05, 6.6672e-04, 9.9925e-01, 8.3302e-06],
        [2.2490e-05, 5.8183e-04, 9.9939e-01, 4.4881e-06],
        [1.6668e-04, 2.4513e-03, 9.9736e-01, 1.8237e-05],
        [1.0221e-03, 1.1869e-02, 9.8706e-01, 5.0106e-05],
        [6.2861e-05, 5.5386e-04, 9.9938e-01, 7.0760e-06],
        [4.4427e-05, 1.2533e-03, 9.9870e-01, 2.7996e-06],
        [1.0767e-04, 9.5783e-04, 9.9893e-01, 9.3561e-06],
        [7.9823e-05, 6.9873e-04, 9.9922e-01, 4.1478e-06],
        [5.2405e-05, 3.2021e-04, 9.9962e-01, 2.7588e-06],
        [4.3437e-05, 3.1259e-03, 9.9682e-01,

Discriminator loss: tensor([[1.0710e-04, 9.9777e-01, 2.1065e-03, 2.0326e-05],
        [1.6091e-03, 9.9414e-01, 4.1959e-03, 5.6335e-05],
        [1.2293e-04, 9.9948e-01, 3.7221e-04, 2.2172e-05],
        [6.3568e-06, 9.9985e-01, 1.3095e-04, 1.0665e-05],
        [1.0307e-03, 9.9292e-01, 6.0184e-03, 3.5143e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9997e-01, 1.2505e-05, 2.1698e-05, 7.5990e-07],
        [9.9988e-01, 5.0848e-05, 6.8126e-05, 1.6052e-06],
        [9.9980e-01, 9.4572e-05, 9.9772e-05, 2.4230e-06],
        [9.9998e-01, 2.5891e-06, 1.4590e-05, 3.0503e-07],
        [9.9994e-01, 2.4362e-05, 3.9740e-05, 8.1328e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[1.0433e-04, 8.2304e-04, 9.9907e-01, 2.3021e-06],
        [9.7541e-05, 1.6629e-03, 9.9823e-01, 1.0758e-05],
        [1.6425e-04, 1.0699e-03, 9.9876e-01, 9.6005e-06],
        [6.8501e-05, 1.2752e-03, 9.9865e-01,

Discriminator loss: tensor([[7.6816e-07, 1.0000e+00, 1.2029e-06, 2.0321e-06],
        [1.8215e-06, 9.9999e-01, 5.0009e-06, 1.4495e-06],
        [3.1903e-06, 9.9999e-01, 1.7174e-06, 1.3026e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9996e-01, 3.0809e-05, 3.3941e-06, 1.3462e-06],
        [9.9979e-01, 2.0663e-04, 5.6928e-06, 2.2723e-06],
        [9.9986e-01, 1.3075e-04, 1.0833e-05, 1.6257e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[2.0029e-04, 9.9027e-03, 9.8986e-01, 3.5619e-05],
        [6.3349e-05, 4.9019e-03, 9.9503e-01, 5.9827e-06],
        [4.9850e-05, 7.5146e-03, 9.9243e-01, 4.5860e-06],
        [1.0461e-04, 2.7898e-02, 9.7198e-01, 1.6560e-05],
        [2.8352e-05, 3.6988e-03, 9.9626e-01, 9.2986e-06],
        [5.6330e-05, 1.1694e-02, 9.8824e-01, 7.8542e-06],
        [1.7847e-04, 9.2696e-03, 9.9054e-01, 8.9122e-06],
        [1.8160e-04, 2.0994e-02, 9.7880e-01,

Discriminator loss: tensor([[5.9193e-07, 9.9999e-01, 5.7887e-06, 1.8960e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9939e-01, 5.8940e-04, 1.7863e-05, 5.1081e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[1.6123e-05, 1.1625e-04, 9.9987e-01, 9.5334e-07],
        [7.7968e-06, 1.4843e-04, 9.9984e-01, 2.1501e-06],
        [4.5543e-05, 2.1858e-04, 9.9973e-01, 2.7583e-06],
        [9.4432e-06, 8.2805e-05, 9.9991e-01, 9.3591e-07],
        [8.5256e-06, 7.6400e-05, 9.9991e-01, 1.3756e-06],
        [1.6804e-05, 1.2457e-04, 9.9985e-01, 3.6825e-06],
        [1.4452e-05, 2.1269e-04, 9.9977e-01, 3.5224e-06],
        [1.0971e-04, 5.5429e-04, 9.9933e-01, 3.4869e-06],
        [2.9323e-05, 3.2136e-04, 9.9965e-01, 2.7658e-06],
        [1.3113e-05, 1.8336e-04, 9.9980e-01, 1.9132e-06],
        [3.8004e-05, 1.8894e-04, 9.9977e-01, 6.5362e-06],
        [3.7847e-05, 5.0305e-04, 9.9946e-01,

Discriminator loss: tensor([[1.0786e-07, 9.9988e-01, 1.2170e-04, 2.8137e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 1.5136e-07, 3.3532e-07, 3.9074e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[4.3943e-07, 2.1186e-04, 9.9979e-01, 2.4648e-06],
        [4.2621e-08, 3.2929e-05, 9.9997e-01, 1.2945e-06],
        [6.5510e-08, 4.0149e-05, 9.9996e-01, 3.2726e-06],
        [1.9617e-07, 3.8074e-05, 9.9996e-01, 1.1441e-06],
        [1.3588e-07, 1.1208e-04, 9.9988e-01, 3.4951e-06],
        [2.6852e-07, 5.0877e-05, 9.9995e-01, 1.4943e-06],
        [9.4251e-08, 4.9811e-05, 9.9995e-01, 8.7827e-07],
        [2.3099e-07, 9.8722e-05, 9.9990e-01, 2.5219e-06],
        [9.1645e-08, 7.1193e-05, 9.9993e-01, 1.8216e-06],
        [9.3155e-07, 3.8184e-05, 9.9996e-01, 1.2985e-06],
        [6.1665e-08, 8.1848e-05, 9.9992e-01, 1.8025e-06],
        [7.8260e-08, 5.5824e-05, 9.9994e-01,

Discriminator loss: tensor([[1.7988e-06, 9.9999e-01, 1.6448e-06, 5.4496e-06],
        [6.2522e-07, 1.0000e+00, 6.3489e-07, 1.1850e-06],
        [2.7529e-07, 1.0000e+00, 4.5580e-07, 1.3128e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9997e-01, 2.7547e-05, 2.1117e-07, 1.2268e-06],
        [9.9999e-01, 7.4464e-06, 7.0858e-08, 4.0021e-07],
        [9.9991e-01, 7.9318e-05, 3.7585e-07, 7.3616e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[3.1433e-05, 2.4878e-04, 9.9971e-01, 1.1288e-05],
        [8.6646e-06, 1.6704e-04, 9.9981e-01, 1.5279e-05],
        [8.5852e-06, 3.0572e-04, 9.9967e-01, 1.6737e-05],
        [2.7586e-05, 8.1129e-04, 9.9914e-01, 1.9014e-05],
        [8.7870e-06, 1.1338e-03, 9.9881e-01, 4.3179e-05],
        [2.7483e-05, 4.0048e-04, 9.9954e-01, 3.3267e-05],
        [7.7087e-05, 1.6713e-03, 9.9823e-01, 2.5652e-05],
        [4.4235e-05, 1.7062e-03, 9.9821e-01,

Discriminator loss: tensor([[8.1336e-04, 7.8344e-01, 2.1501e-01, 7.3715e-04],
        [1.0496e-04, 9.5815e-01, 4.1110e-02, 6.3157e-04],
        [1.2692e-04, 9.9770e-01, 2.0324e-03, 1.4249e-04]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 2.2974e-08, 2.2054e-07, 2.6276e-07],
        [9.9999e-01, 1.0207e-06, 1.6407e-06, 2.6055e-06],
        [1.0000e+00, 5.6029e-08, 1.0505e-06, 7.9353e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[2.3757e-06, 1.6971e-06, 9.9999e-01, 7.2604e-06],
        [3.6754e-06, 2.4534e-06, 9.9999e-01, 8.4952e-06],
        [2.7617e-06, 2.8348e-06, 9.9998e-01, 1.0662e-05],
        [8.2050e-07, 1.2959e-06, 9.9999e-01, 4.6778e-06],
        [3.2334e-06, 4.1121e-06, 9.9999e-01, 7.4307e-06],
        [1.1295e-05, 5.1734e-06, 9.9997e-01, 1.2667e-05],
        [1.8554e-06, 2.8327e-06, 9.9999e-01, 2.4563e-06],
        [1.8343e-06, 2.6484e-06, 9.9998e-01,

Discriminator loss: tensor([[1.8215e-07, 1.0000e+00, 1.2471e-08, 9.5765e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9995e-01, 4.5998e-05, 1.4376e-08, 5.6121e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[2.3803e-05, 3.4485e-04, 9.9961e-01, 2.1588e-05],
        [1.5664e-04, 7.0469e-04, 9.9911e-01, 3.0028e-05],
        [2.7230e-05, 6.6761e-04, 9.9927e-01, 3.4780e-05],
        [1.1662e-03, 1.0608e-03, 9.9771e-01, 6.8039e-05],
        [5.6180e-05, 3.9458e-04, 9.9952e-01, 3.1535e-05],
        [2.2933e-05, 1.6441e-04, 9.9976e-01, 5.6284e-05],
        [5.5549e-05, 2.8254e-04, 9.9964e-01, 1.9464e-05],
        [6.4140e-05, 6.3760e-04, 9.9920e-01, 9.7401e-05],
        [2.6747e-05, 9.1926e-04, 9.9892e-01, 1.3214e-04],
        [3.7812e-05, 4.1011e-04, 9.9949e-01, 6.2635e-05],
        [3.0783e-04, 5.5130e-04, 9.9905e-01, 8.7996e-05],
        [2.1814e-04, 1.2118e-03, 9.9853e-01,

Discriminator loss: tensor([[2.9258e-06, 9.9999e-01, 9.9166e-07, 3.4020e-06],
        [2.8426e-06, 9.9999e-01, 1.5528e-07, 2.3758e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9867e-01, 1.3252e-03, 3.4418e-08, 6.7789e-06],
        [1.0000e+00, 1.8710e-06, 2.1881e-09, 2.0557e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[1.8233e-05, 3.2920e-04, 9.9957e-01, 8.7531e-05],
        [3.6705e-05, 1.1992e-04, 9.9979e-01, 5.0177e-05],
        [1.4871e-04, 9.5614e-04, 9.9883e-01, 6.5804e-05],
        [8.2992e-06, 1.9732e-04, 9.9976e-01, 3.7963e-05],
        [2.8482e-05, 9.0154e-05, 9.9981e-01, 6.6776e-05],
        [4.1378e-05, 3.3584e-04, 9.9957e-01, 5.5617e-05],
        [2.6048e-05, 3.1609e-04, 9.9960e-01, 5.3587e-05],
        [9.5571e-06, 6.5392e-05, 9.9991e-01, 1.2171e-05],
        [1.4522e-05, 2.0768e-04, 9.9976e-01, 2.2538e-05],
        [1.9162e-05, 2.2392e-04, 9.9969e-01,

Discriminator loss: tensor([[1.5442e-07, 1.0000e+00, 4.0483e-08, 9.2873e-07],
        [6.8829e-08, 1.0000e+00, 2.1747e-08, 4.7678e-07],
        [1.4902e-06, 9.9999e-01, 5.7344e-07, 4.3556e-06],
        [1.1805e-07, 1.0000e+00, 7.5782e-08, 1.4484e-06],
        [3.5844e-06, 1.0000e+00, 1.2626e-07, 9.0562e-07],
        [1.4193e-07, 1.0000e+00, 5.4463e-08, 1.2686e-06],
        [2.1725e-07, 1.0000e+00, 4.8301e-08, 1.1606e-06],
        [5.7954e-04, 9.9930e-01, 9.5151e-05, 2.2602e-05],
        [1.2154e-06, 1.0000e+00, 6.9020e-07, 2.4058e-06],
        [1.3289e-07, 1.0000e+00, 2.9526e-08, 1.0520e-06],
        [3.1136e-07, 1.0000e+00, 1.2365e-07, 9.6583e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 5.8952e-07, 2.8414e-07, 1.7591e-07],
        [9.9999e-01, 8.1403e-06, 3.1972e-07, 3.3316e-07],
        [1.0000e+00, 2.6569e-06, 5.5090e-07, 9.8678e-07],
        [9.9994e-01, 5.3492e-05, 9.4314e-07, 1.2001e-06],
        [1.0000e+0

Discriminator loss: tensor([[1.0776e-06, 1.0000e+00, 2.0206e-07, 1.2891e-06],
        [1.2453e-06, 9.9999e-01, 3.2483e-07, 3.9253e-06],
        [1.2735e-07, 1.0000e+00, 4.6456e-08, 9.4933e-07],
        [2.5413e-06, 9.9999e-01, 7.2620e-07, 2.2959e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[9.9996e-01, 4.2112e-05, 4.8404e-09, 8.1072e-07],
        [1.0000e+00, 5.5191e-08, 2.4229e-10, 4.9339e-08],
        [1.0000e+00, 1.5435e-06, 2.7703e-10, 8.6717e-08],
        [9.9990e-01, 9.6171e-05, 9.5875e-09, 4.0083e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[7.4443e-06, 4.0083e-05, 9.9994e-01, 9.8998e-06],
        [2.2746e-05, 3.3780e-04, 9.9959e-01, 5.3554e-05],
        [6.2077e-06, 7.8806e-05, 9.9990e-01, 1.4838e-05],
        [2.5209e-05, 2.2094e-04, 9.9974e-01, 1.6831e-05],
        [1.0937e-05, 2.6435e-04, 9.9962e-01, 9.9867e-05],
        [2.1808e-05, 2.3811e-04, 9.9971e-01,

Discriminator loss: tensor([[1.6144e-06, 1.0000e+00, 2.6538e-07, 1.9432e-06],
        [1.1396e-06, 1.0000e+00, 3.1033e-07, 7.7344e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 7.1058e-09, 8.2760e-11, 1.8827e-08],
        [1.0000e+00, 2.3386e-09, 7.1561e-11, 7.4099e-09]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[8.0023e-06, 4.5671e-05, 9.9993e-01, 1.5468e-05],
        [7.6271e-05, 4.9050e-04, 9.9940e-01, 2.9056e-05],
        [1.4845e-06, 2.6830e-05, 9.9994e-01, 3.5969e-05],
        [2.5333e-06, 1.5188e-05, 9.9998e-01, 6.3811e-06],
        [1.8708e-06, 1.3661e-05, 9.9998e-01, 5.2860e-06],
        [7.4331e-06, 7.0675e-05, 9.9989e-01, 2.8744e-05],
        [1.2527e-05, 2.0416e-04, 9.9977e-01, 1.0552e-05],
        [1.6051e-06, 3.5555e-05, 9.9995e-01, 9.7800e-06],
        [3.8425e-05, 9.3432e-05, 9.9986e-01, 6.9238e-06],
        [1.6153e-05, 1.6142e-04, 9.9979e-01,

Discriminator loss: tensor([[1.5869e-06, 1.0000e+00, 1.7060e-07, 1.0867e-06],
        [4.3280e-06, 9.9999e-01, 4.3215e-07, 4.9997e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 2.7692e-08, 6.8765e-11, 2.8144e-08],
        [1.0000e+00, 8.9678e-09, 2.0430e-10, 1.2891e-08]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[1.0686e-06, 4.9222e-06, 9.9998e-01, 1.5922e-05],
        [6.1507e-05, 1.0463e-04, 9.9980e-01, 3.0125e-05],
        [1.2814e-06, 8.8556e-06, 9.9999e-01, 3.3014e-06],
        [8.3712e-06, 4.0215e-05, 9.9994e-01, 1.3004e-05],
        [1.1266e-06, 1.6816e-05, 9.9997e-01, 1.0121e-05],
        [9.7844e-07, 1.0908e-05, 9.9999e-01, 1.8425e-06],
        [2.2894e-06, 1.5067e-05, 9.9998e-01, 7.5332e-06],
        [2.9009e-06, 1.9263e-05, 9.9996e-01, 1.5569e-05],
        [5.0538e-06, 2.1111e-05, 9.9997e-01, 3.7519e-06],
        [1.7189e-05, 4.3421e-05, 9.9991e-01,

Discriminator loss: tensor([[1.1588e-04, 9.9900e-01, 7.9689e-04, 8.7865e-05],
        [7.2485e-06, 9.9913e-01, 7.7956e-04, 8.7581e-05],
        [1.7713e-06, 9.9967e-01, 2.6925e-04, 5.8566e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 1.5100e-10, 2.6044e-10, 8.1994e-09],
        [1.0000e+00, 6.5389e-10, 5.8123e-10, 1.9686e-08],
        [1.0000e+00, 5.7927e-10, 6.1406e-10, 5.5458e-08]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[5.3937e-07, 6.3948e-05, 9.9992e-01, 1.2651e-05],
        [2.0479e-06, 7.2251e-05, 9.9991e-01, 1.3883e-05],
        [7.7274e-07, 3.6607e-05, 9.9995e-01, 1.3625e-05],
        [1.9557e-07, 8.8823e-06, 9.9999e-01, 2.7343e-06],
        [1.4635e-07, 1.5155e-05, 9.9998e-01, 3.7065e-06],
        [5.3071e-06, 7.2374e-05, 9.9991e-01, 1.0253e-05],
        [1.4462e-07, 7.5135e-06, 9.9999e-01, 3.5380e-06],
        [2.8173e-06, 1.2456e-04, 9.9985e-01,

Discriminator loss: tensor([[5.3394e-07, 9.9992e-01, 7.1235e-05, 6.2581e-06],
        [1.3936e-07, 9.9997e-01, 1.9392e-05, 8.8123e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 1.6733e-09, 1.9539e-10, 3.9138e-08],
        [1.0000e+00, 4.6382e-09, 2.6144e-10, 5.0771e-08]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[6.1683e-07, 3.9695e-05, 9.9994e-01, 1.7900e-05],
        [4.0450e-07, 4.4971e-05, 9.9994e-01, 1.1517e-05],
        [5.5101e-07, 2.7610e-05, 9.9996e-01, 1.0130e-05],
        [1.0785e-07, 2.5162e-05, 9.9997e-01, 4.4393e-06],
        [3.9600e-07, 1.8212e-05, 9.9997e-01, 1.3090e-05],
        [1.2729e-06, 2.5974e-05, 9.9995e-01, 1.7718e-05],
        [2.2830e-06, 2.5286e-05, 9.9996e-01, 1.3113e-05],
        [2.2112e-06, 5.2875e-05, 9.9994e-01, 5.1816e-06],
        [1.6594e-07, 2.1035e-05, 9.9998e-01, 1.5530e-06],
        [2.3689e-07, 6.7385e-05, 9.9993e-01,

Discriminator loss: tensor([[4.5009e-07, 9.9999e-01, 2.2784e-06, 5.3158e-06],
        [3.5325e-07, 1.0000e+00, 2.8663e-07, 3.2551e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 1.7913e-09, 1.2760e-11, 2.6804e-08],
        [1.0000e+00, 8.1104e-11, 1.0841e-11, 7.6067e-09]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[4.2265e-08, 5.6110e-08, 1.0000e+00, 4.4615e-07],
        [2.9141e-08, 3.0254e-08, 1.0000e+00, 4.9823e-07],
        [1.1807e-07, 1.9260e-07, 1.0000e+00, 1.6950e-06],
        [1.4918e-07, 1.3168e-07, 1.0000e+00, 2.1325e-06],
        [2.0306e-07, 1.8515e-07, 1.0000e+00, 2.1617e-06],
        [1.7925e-08, 4.2046e-08, 1.0000e+00, 1.0065e-06],
        [3.1420e-08, 5.4772e-08, 1.0000e+00, 7.3724e-07],
        [1.2076e-07, 9.2450e-08, 1.0000e+00, 2.4963e-06],
        [1.4499e-07, 1.2620e-07, 9.9999e-01, 5.1398e-06],
        [3.2992e-08, 6.4124e-08, 1.0000e+00,

Discriminator loss: tensor([[1.4248e-04, 9.9972e-01, 5.8430e-05, 7.9935e-05]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 1.2423e-10, 3.2512e-11, 5.4499e-09]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[4.6059e-08, 6.0817e-08, 1.0000e+00, 7.6750e-07],
        [5.4015e-07, 1.6273e-07, 1.0000e+00, 1.5122e-06],
        [6.7776e-08, 4.1190e-08, 1.0000e+00, 9.4586e-07],
        [5.6934e-08, 1.9349e-07, 1.0000e+00, 7.1039e-07],
        [2.3994e-08, 3.7517e-08, 1.0000e+00, 5.3623e-07],
        [1.0730e-07, 6.6338e-08, 1.0000e+00, 1.4116e-06],
        [5.9108e-08, 1.0095e-07, 1.0000e+00, 2.2330e-06],
        [1.0951e-07, 3.7494e-08, 1.0000e+00, 6.1720e-07],
        [5.3857e-08, 5.6998e-08, 1.0000e+00, 1.3046e-06],
        [4.9382e-08, 5.8478e-08, 1.0000e+00, 1.0739e-06],
        [5.1606e-08, 8.6939e-08, 1.0000e+00, 8.0598e-07],
        [1.8418e-08, 3.4589e-08, 1.0000e+00,

Discriminator loss: tensor([[4.7269e-06, 9.9998e-01, 3.5309e-07, 1.3602e-05],
        [1.4645e-06, 9.9999e-01, 3.5923e-07, 3.2764e-06],
        [2.0089e-07, 1.0000e+00, 1.9280e-06, 1.7012e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 7.4186e-12, 9.3044e-12, 1.4971e-09],
        [1.0000e+00, 4.7642e-11, 6.6758e-12, 1.0112e-08],
        [1.0000e+00, 1.0024e-10, 6.1001e-12, 5.4647e-09]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[4.9394e-07, 5.0346e-07, 9.9999e-01, 4.8297e-06],
        [5.9197e-08, 9.4650e-08, 1.0000e+00, 1.3271e-06],
        [2.7995e-08, 5.5543e-08, 1.0000e+00, 1.3369e-06],
        [5.9779e-08, 8.3317e-08, 1.0000e+00, 1.9869e-06],
        [4.3427e-08, 5.8876e-08, 1.0000e+00, 7.6465e-07],
        [1.2849e-07, 2.6227e-07, 1.0000e+00, 2.8429e-06],
        [5.4323e-08, 6.8787e-08, 1.0000e+00, 7.1887e-07],
        [1.9902e-07, 1.8888e-07, 1.0000e+00,

Discriminator loss: tensor([[5.2095e-07, 9.9999e-01, 1.8046e-06, 3.6355e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 1.6103e-10, 4.4605e-11, 8.8813e-09]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[8.4751e-06, 1.3468e-04, 9.9972e-01, 1.3399e-04],
        [8.9186e-06, 1.9929e-04, 9.9972e-01, 6.8610e-05],
        [1.1092e-05, 4.3977e-04, 9.9951e-01, 4.2837e-05],
        [2.0118e-05, 3.9566e-04, 9.9951e-01, 7.1567e-05],
        [9.2637e-06, 4.4976e-04, 9.9949e-01, 5.5932e-05],
        [2.4086e-05, 2.4586e-04, 9.9965e-01, 7.8611e-05],
        [1.6969e-05, 1.4872e-04, 9.9980e-01, 3.7534e-05],
        [5.4942e-06, 3.5548e-04, 9.9961e-01, 2.7844e-05],
        [1.9884e-05, 1.6892e-04, 9.9969e-01, 1.2142e-04],
        [1.6485e-05, 5.7311e-04, 9.9933e-01, 8.4492e-05],
        [1.1201e-05, 2.5575e-04, 9.9968e-01, 5.2174e-05],
        [3.6477e-05, 5.0739e-04, 9.9936e-01,

Discriminator loss: tensor([[1.1070e-06, 1.0000e+00, 3.2225e-07, 1.8137e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 1.7630e-10, 5.1918e-11, 2.4248e-09]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[1.1476e-06, 2.6608e-05, 9.9997e-01, 3.4912e-06],
        [7.9701e-06, 9.3104e-05, 9.9989e-01, 1.2094e-05],
        [4.7126e-06, 5.0973e-05, 9.9994e-01, 3.8268e-06],
        [1.6626e-05, 1.9080e-04, 9.9977e-01, 1.9525e-05],
        [1.6310e-06, 3.6919e-05, 9.9995e-01, 7.5080e-06],
        [1.5118e-05, 2.4007e-04, 9.9966e-01, 8.6256e-05],
        [2.3570e-06, 2.0429e-05, 9.9996e-01, 2.1814e-05],
        [1.3578e-05, 7.2134e-05, 9.9989e-01, 2.2338e-05],
        [5.7694e-06, 5.6295e-05, 9.9993e-01, 1.2578e-05],
        [1.8940e-05, 2.1279e-04, 9.9971e-01, 5.9244e-05],
        [1.3792e-06, 1.2921e-05, 9.9998e-01, 8.2097e-06],
        [2.6287e-07, 1.9577e-05, 9.9998e-01,

Discriminator loss: tensor([[6.5964e-06, 9.9999e-01, 1.0020e-06, 4.3576e-06],
        [1.2198e-07, 1.0000e+00, 8.5718e-07, 5.1309e-07],
        [7.5644e-07, 9.9999e-01, 1.1730e-05, 3.2769e-07],
        [1.3659e-07, 9.9997e-01, 2.7431e-05, 1.1061e-06],
        [2.7320e-06, 9.9999e-01, 5.2171e-06, 3.6604e-07],
        [5.4293e-07, 9.9999e-01, 5.5642e-06, 5.4437e-07],
        [9.7248e-06, 9.9995e-01, 3.7482e-05, 1.3198e-06]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 3.3162e-06, 2.2240e-07, 5.4766e-08],
        [1.0000e+00, 1.2546e-08, 2.0772e-08, 4.2853e-09],
        [1.0000e+00, 1.3654e-08, 2.8900e-09, 4.5572e-09],
        [1.0000e+00, 1.5778e-07, 7.2570e-08, 1.5378e-08],
        [1.0000e+00, 1.3705e-06, 1.7641e-07, 8.5396e-08],
        [1.0000e+00, 1.1220e-07, 5.4359e-08, 3.3153e-08],
        [1.0000e+00, 1.6533e-06, 3.3109e-08, 1.5325e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction

Discriminator loss: tensor([[3.4629e-07, 1.0000e+00, 2.7159e-06, 2.7547e-07],
        [1.3693e-05, 9.9975e-01, 2.3034e-04, 7.1837e-06],
        [8.5408e-08, 9.9999e-01, 5.8753e-06, 5.7797e-07]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on real poetry tensor([[1.0000e+00, 3.5141e-11, 4.8570e-11, 3.9571e-09],
        [1.0000e+00, 1.4033e-11, 6.1986e-12, 1.6762e-09],
        [1.0000e+00, 7.6656e-11, 1.1023e-11, 2.1188e-09]], device='cuda:0',
       grad_fn=<SoftmaxBackward>)
Discriminator prediction on scrambled tensor([[5.5091e-07, 2.2387e-05, 9.9997e-01, 2.4822e-06],
        [1.3421e-06, 3.5875e-05, 9.9996e-01, 3.4647e-06],
        [1.4542e-06, 1.6555e-05, 9.9998e-01, 2.5591e-06],
        [3.0038e-07, 1.7039e-05, 9.9998e-01, 1.3524e-06],
        [5.3318e-07, 4.4128e-05, 9.9995e-01, 6.1301e-06],
        [1.3596e-06, 5.0304e-05, 9.9995e-01, 2.5272e-06],
        [1.0718e-06, 2.7573e-05, 9.9997e-01, 1.8713e-06],
        [2.4175e-06, 1.0826e-04, 9.9989e-01,