In [1]:
import sys
import torch
import os

In [2]:
os.chdir("..")

In [3]:
!ls

README.md	  jupyter.o7284871  out.txt		translations
checkpoints	  jupyter.o7285616  prose_translations	venv
data		  jupyter.o7288049  requirements.txt	vt-tr.o7146959
jupyter.o7250076  jupyter.o7301470  runs		vt-tr.o7209762
jupyter.o7272725  jupyter.o7315071  scripts		vt-tr.o7242335
jupyter.o7277176  logs		    src
jupyter.o7280433  notebooks	    training


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
from src.data_utils.batch import rebatch
from src.data_utils.data import get_training_iterators
from src.model.loss_optim import MultiGPULossCompute, SimpleLossCompute
from src.model.model import make_model, NoamOpt, LabelSmoothing, translate_sentence
from src.utils.utils import get_tokenizer

In [6]:
tok = get_tokenizer("tr")

In [7]:
train_iter, valid_iter, test_iter, train_idx, dev_idx, test_idx = get_training_iterators("tur", batch_size=1000)



In [8]:
# mini dev set
with open("data/tr/tur.dev.tgt", encoding="utf-8") as infile:
    toystrings = [x.strip() for x in infile.readlines()[:20]]

In [9]:
toyset = [torch.LongTensor([1] + tok.Encode(x) + [2])  for x in toystrings]
toyset = torch.nn.utils.rnn.pad_sequence(sequences=toyset, padding_value=3)

In [10]:
toyset

tensor([[    1,     1,     1,  ...,     1,     1,     1],
        [ 5605,     8,  1330,  ...,     8,   771,  2804],
        [27861,  2475, 10284,  ...,  3987,  5057, 11694],
        ...,
        [    3,     3,     3,  ...,     3,     3,     3],
        [    3,     3,     3,  ...,     3,     3,     3],
        [    3,     3,     3,  ...,     3,     3,     3]])

Two critics:
- Input related to output or not
- Classifier into poetry, prose, generated, scrambled poetry

One word/token selector:
- Choose tokens from input sequence to use for topic
- 

In [11]:
from torchtext import data
import torchtext as tt
from src.data_utils.batch import MyIterator
from src.model.model import batch_size_val

def each_line(fname):
    c = 0
    lines = []
    with open(fname, "r", encoding="utf-8") as infile:
        for line in infile:
            if line.count(" ") > 200 or line.count(" ") < 10:
                continue
            lines.append(line.strip())
            c += 1
            if c >= 2000000: 
                break
    return lines

def make_iter(lines, tokenizer, batch_size=256):
    
    def tok(seq):
        return tokenizer.EncodeAsIds(seq)

    field = data.Field(tokenize=tok, init_token=1, eos_token=2, pad_token=3, use_vocab=False)
    #ds = data.TabularDataset(fpath, "tsv", [("src", field)], skip_header=True)

    examples = [tt.data.Example.fromdict({"src": x}, {"src": ("src", field)}) for x in lines]
    ds = tt.data.Dataset(examples, {"src": field})
    iter = MyIterator(ds, batch_size=batch_size, device="cpu",
                             repeat=False, sort_key=lambda x: len(x.src),
                             batch_size_fn=batch_size_val, train=False, sort=True)

    return iter




In [12]:
prose_iter = make_iter(each_line("data/tr/prose/prose_gan.txt"), tok, batch_size=1000)




In [13]:
import random

to_scramble = each_line("data/tr/tur.train.tgt")
scrambled = []
for poem in to_scramble:
    new_poem = poem.split("¬")
    random.shuffle(new_poem)
    scrambled.append("¬".join(new_poem))

In [14]:
scrambled_iter = make_iter(scrambled, tok, batch_size=1000)

In [15]:
import copy
from src.model.model import MultiHeadedAttention, PositionwiseFeedForward, \
                    PositionalEncoding, Encoder, EncoderLayer, Generator, Embeddings
import torch.nn as nn

class Critic(nn.Module):

    def __init__(self, encoder, src_embed, generator):
        super(Critic, self).__init__()
        self.encoder = encoder
        self.src_embed = src_embed
        self.generator = generator
        self.steps = 0

        
    def forward(self, x, mask):
        """Pass the input (and mask) through each layer in turn."""
        x = self.src_embed(x)
        for layer in self.encoder.layers:
            x = layer(x, mask)
        return self.encoder.norm(x)    


def make_critic(src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1):
    """Helper: Construct a model from hyper-parameters."""
    c = copy.deepcopy
    attn = MultiHeadedAttention(h, d_model)
    ff = PositionwiseFeedForward(d_model, d_ff, dropout)
    position = PositionalEncoding(d_model, dropout)
    generator = Generator(d_model, tgt_vocab)
    embed = nn.Sequential(Embeddings(d_model, src_vocab), c(position))
    encoder = Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N)
    critic = Critic(encoder, embed, generator)
    
    # This was important from their code.
    # Initialize parameters with Glorot / fan_avg.
    for p in critic.parameters():
        if p.dim() > 1:
            nn.init.xavier_uniform(p)

    return critic


In [16]:
ntokens = 32000
enc_dec = make_model(ntokens, ntokens, N=6).to(device)
token_selector = make_critic(ntokens, 2, N=2).to(device)
style_critic = make_critic(ntokens, 4, N=2).to(device)
relevance_critic = make_critic(ntokens + 1, 1, N=2).to(device)

  nn.init.xavier_uniform(p)


In [17]:
from torch.autograd import Variable
import numpy as np
def subsequent_mask(size):
    "Mask out subsequent positions."
    attn_shape = (1, size, size)
    subsequent_mask = np.triu(np.ones(attn_shape), k=1).astype('uint8')
    return torch.from_numpy(subsequent_mask) == 0


def prep_tensors( src, trg, pad=3):
    src_mask = (src != pad).unsqueeze(-2)
    trg_in = trg[:, :-1]
    trg_y = trg[:, 1:]
    trg_mask = make_std_mask(trg_in, pad)
    return src, trg_y, src_mask, trg_mask

def make_std_mask(tgt, pad):
    """Create a mask to hide padding and future words."""
    tgt_mask = (tgt != pad).unsqueeze(-2)
    tgt_mask = tgt_mask & Variable(
        subsequent_mask(tgt.size(-1)).type_as(tgt_mask.data))
    return tgt_mask


In [18]:
def get_dae_input(tgt, token_selector):
    select_prob_embeds = token_selector.forward(tgt.to(device), 
                                         (tgt != 3).unsqueeze(-2).to(device))
    select_prob = token_selector.generator(select_prob_embeds)
    select_indices = torch.max(select_prob, dim=2).indices.type(torch.ByteTensor)
    dae_list = []
    for ind, row in zip(select_indices, tgt):
        dae_list.append(torch.masked_select(row, ind)[:15])
    dae_input = torch.nn.utils.rnn.pad_sequence(dae_list, batch_first=False, padding_value=3)
    return dae_input

In [19]:
rebatched = (rebatch(3, b) for b in train_iter)

In [20]:
from src.model.adafactor import Adafactor

#enc_dec_opt = NoamOpt(enc_dec.src_embed[0].d_model, 1, 2000,
#                        torch.optim.Adam(enc_dec.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))
enc_dec_opt = Adafactor(enc_dec.parameters())

style_criterion = nn.BCELoss()
relevance_criterion = nn.BCELoss()

token_optim = Adafactor(token_selector.parameters())
style_optim = Adafactor(style_critic.parameters())
rel_optim = Adafactor(relevance_critic.parameters())

In [21]:
relevance_criterion = torch.nn.BCEWithLogitsLoss()

In [22]:
def get_relevance_input(dae_input, tgt):
    mid_point = torch.ones((tgt.shape[0], 1), dtype=torch.long) * ntokens
    return torch.cat((dae_input, mid_point.to(device), tgt), dim=1)


In [23]:
# get validation iterator


def validate_batch(model, src, max_len=256, start_symbol=1, end_symbol=2):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    src_mask = (src != 3).unsqueeze(-2)
    memory = model.encode(src.to(device), src_mask.to(device))
    ys = torch.ones(src.shape[0], 1).fill_(start_symbol).type_as(src.data).to(device)
    finished = torch.zeros((src.shape[0], 1))
    for i in range(max_len-1):
        out = model.decode(memory, src_mask,
                           Variable(ys).to(device),
                           Variable(subsequent_mask(ys.size(1)).type_as(src.data)).to(device))
        prob = model.generator(out[:, -1])
        _, next_word = torch.max(prob, dim = 1)
        # next_word = next_word.data_utils[0]
        unsqueezed = next_word.unsqueeze(1)
        for c, token in enumerate(unsqueezed):
            if token == end_symbol:
                finished[c] = 1
        if sum(finished) >= src.shape[0]:
            break
        ys = torch.cat([ys, unsqueezed], dim=1)
                        # torch.ones(src.shape[0], 1).type_as(src.data_utils).fill_(next_word).to(device)], dim=1)
    return ys


def validate(model, selector, iterator):
    pass

In [24]:

label_smoothing = LabelSmoothing(size=32000, padding_idx=3, smoothing=0.1)



In [25]:
[tok.Decode(x.tolist()) for x in toyset.transpose(0, 1)]

['ayrılığın sınırlı ışığında¬ışıldar gözler¬dar gelir dünya¬sıkıcı karanlıktan¬çıkış başlar¬sevgi akla gelince¬ışık küçücüktür¬sevgi karanlığında¬sonsuzluğu hatırlatır insana¬ışık kandırıcıdır belki¬ayrılığı sevenler için¬orada yuvarlak bir masa¬üç beş sigara biraz da çay¬sevgi karanlığında herşey¬coşkuyla¬zaten aranan da o değil mi¬sınırlı değilmiş hiçbir şey',
 'Şair sözün gösterer,¬Gözel gözün gösterer.¬Payız özün gösterer,¬Saralmış yarpağıyla.¬Uzaqda quzey durar,¬Şa ⁇ ta qelbi hey vurar.¬Önümde muzey qurar,¬Heyat çılpaqlığıyla.¬Yel eser deyinerek,¬ ⁇ um-paltar geyinerek,¬Fe ⁇ r eder öyünərek¬Insan torpaqlığıyla.',
 'Ey yakarış¬Ey uyanış¬Ey sevgi¬Ey aşk¬Ey arayış¬Ey tükeniş¬Ey özleyiş¬Uyanın artık¬O derin uykulardan¬Ve devam edin¬Katıksızca aramaya¬Aşkı,sevdayı,¬Yakarışı,uyanışı¬Arayışı,tükenişi¬Özleyişi,özlemi',
 'ben ölsemde aşkım cümle alemde¬kulakdan kulaga yayılır belki¬adımız gecer her bir kalemde¬gönül nikahımız kıyılır belki¬sevdik sevildik ölürcesine¬başını sonunu bilircesi

In [None]:
soft = torch.nn.Softmax(dim=1)
for c, (poetry_batch, prose_batch, scrambled_batch) in enumerate(zip(rebatched, prose_iter, scrambled_iter)):
    #zero_grad
    enc_dec_opt.zero_grad()
    token_optim.zero_grad()


    
    tgt, tgt_mask = poetry_batch.trg.to(device), poetry_batch.trg_mask.to(device)
    # classify tokens, get the first 15 tokens selected.
    dae_input = get_dae_input(poetry_batch.trg, token_selector).transpose(0,1).to(device)
    # create src and src mask from selected tokens
    dae_input_mask = (dae_input != 3).unsqueeze(-2)
    
    #output_selected = validate_batch(enc_dec, dae_input)
    # get output of poetry generator
    output_embeds = enc_dec.forward(dae_input, tgt, dae_input_mask, tgt_mask)
    # put through its generator, choose likeliest token
    output = enc_dec.generator(output_embeds)
    norm = dae_input.shape[0] * dae_input.shape[1] + tgt.shape[0] * tgt.shape[1]
    reconstruction_loss = label_smoothing(output.contiguous().view(-1, output.size(-1)),
                             poetry_batch.trg_y.to(device).contiguous().view(-1)) / norm
    reconstruction_loss.backward()
    token_optim.step() # I don't want GAN loss touching this

    # torch.max that stuff
    _, output_selected = torch.max(output, 2)
    
    #create rel critic input by concatenating dae input and tgt
    rel_input = get_relevance_input(dae_input, tgt)
    # get critic losses
    style_scores = soft(style_critic.generator(style_critic.forward(output_selected.to(device), 
                                        (output_selected != 3).unsqueeze(-2).to(device)))[:,0,:])
    rel_output = relevance_critic.generator(relevance_critic.forward(rel_input.to(device), 
                                        (rel_input != 3).unsqueeze(-2).to(device)))
    relevance_scores = rel_output[:,0,:]
    
    style_loss = style_criterion(style_scores[:, 0], torch.ones((style_scores.shape[0])).to(device))
    try:
        relevance_loss = relevance_criterion(relevance_scores.squeeze(), 
                                         torch.ones(relevance_scores.shape[0]).to(device))
    except ValueError:
        continue
        
    
    style_loss.backward() 
    relevance_loss.backward()
    enc_dec_opt.step()

    rel_optim.zero_grad()
    style_optim.zero_grad()
    # samples and classes for relevance critic
    
    scramb_rel_input = get_relevance_input(dae_input[torch.randperm(dae_input.shape[0]), :], tgt)
    scrambled_relevance_scores = relevance_critic.generator(relevance_critic.forward(scramb_rel_input.to(device), 
                                        (scramb_rel_input != 3).unsqueeze(-2).to(device)))[:,0,:]
    rel_crit_loss = relevance_criterion(scrambled_relevance_scores.squeeze(), 
                                         torch.zeros(scrambled_relevance_scores.shape[0]).to(device))
    rel_crit_loss.backward()
    rel_optim.step()
    
    # samples and classes for style critic
    # trg is poetry, prose batch is prose, model output is generated, scramble poetry lines for scrambled.
    
    # generated poetry
    gen_scores = soft(style_critic.generator(style_critic.forward(output_selected.to(device), 
                                        (output_selected != 3).unsqueeze(-2).to(device)))[:,0,:])
    gen_loss = style_criterion(gen_scores[:, 1], torch.ones((style_scores.shape[0])).to(device))

    # real poetry 
    real_scores = soft(style_critic.generator(style_critic.forward(tgt.to(device), 
                                        (tgt != 3).unsqueeze(-2).to(device)))[:,0,:])
    
    real_loss = style_criterion(real_scores[:, 0], torch.ones((real_scores.shape[0])).to(device))
    
    
    # scrambled poetry
    scramb_scores = soft(style_critic.generator(style_critic.forward(scrambled_batch.src.transpose(0,1).to(device), 
                                        (scrambled_batch.src.transpose(0,1) != 3).unsqueeze(-2).to(device)))[:,0,:])
    scramb_loss = style_criterion(scramb_scores[:, 2], torch.ones((scramb_scores.shape[0])).to(device))

    # prose
    prose_scores = soft(style_critic.generator(style_critic.forward(prose_batch.src.transpose(0,1).to(device), 
                                        (prose_batch.src.transpose(0,1) != 3).unsqueeze(-2).to(device)))[:,0,:])
    prose_loss = style_criterion(prose_scores[:, 3], torch.ones((prose_scores.shape[0])).to(device))
    
    gen_loss.backward()
    real_loss.backward()
    scramb_loss.backward()
    prose_loss.backward()
    style_optim.step()
    
    if c% 50 == 0:
        print("Reconstruction loss:", reconstruction_loss)
        print(torch.mean(gen_scores[:, 1]))
        if c% 500 == 0:
            val_src = get_dae_input(toyset.transpose(0, 1), token_selector).transpose(0,1).to(device)
            validated = validate_batch(enc_dec, val_src)
            print([tok.Decode(x.tolist()) for x in validated])
        

  


Reconstruction loss: tensor(4.8275, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
['Bir gün bir bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬Bir gün¬', 'Bir bir gün,¬Bir gün,¬Bir gün,¬Bir gün,¬Bir gün,¬Bir gün,¬Bir gün,¬Bir gün,¬Bir gün bir gün,¬Bir gün,¬Bir bir gün,¬Bir gün,¬Bir gün,¬Bir gün,¬Bir gün,¬Bir gün,¬Bir gün,¬Bir gün

Reconstruction loss: tensor(6.3027, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9006, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.1292, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8865, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.0495, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.3806, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3633, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.7083, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., de

Reconstruction loss: tensor(5.0039, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.6138, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.9994, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.7967, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9344, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.3843, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.6754, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4170, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.3818, device='cuda:0', grad_fn=<DivBackw

Reconstruction loss: tensor(5.2499, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8934, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.4336, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8826, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.3269, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.3523, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9736, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8532, device='cuda:0', grad_fn=<

Reconstruction loss: tensor(6.0239, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.2990, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.9999, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.7660, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.1463, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.7006, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.6209, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.7021, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2008, device='cuda:0', grad_fn=<DivBackward0>)
t

Reconstruction loss: tensor(5.1742, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.8016, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.6673, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.4065, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.9932, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.4152, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4644, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9735, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9107, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.

Reconstruction loss: tensor(4.0653, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.9983, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8702, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5563, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2570, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5524, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3274, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4891, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.1975, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.

Reconstruction loss: tensor(5.3302, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.0152, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3471, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.7333, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3805, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3315, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4787, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8791, device='cuda:0', grad_fn=<DivBackward0>)
tenso

Reconstruction loss: tensor(5.8882, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.1384, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4177, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5292, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.9892, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9233, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.0542, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4181, device='cuda:0', grad_fn=<DivBackward0

Reconstruction loss: tensor(5.6052, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9765, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(6.0069, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.6223, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9260, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8556, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.9998, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.0563, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2283, device='cuda:0', grad_fn=<DivBackward0

Reconstruction loss: tensor(5.4539, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8921, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2623, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.0582, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.7610, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8964, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.7441, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9013, device='cuda:0', grad_fn=<DivBackward0>)
tenso

Reconstruction loss: tensor(5.4308, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5921, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5128, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5107, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2781, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2781, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.0398, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8384, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.

Reconstruction loss: tensor(4.6896, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8505, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5722, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.1699, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4469, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.6135, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9395, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.0242, device='cuda:0', grad_fn=<DivBackward0>)
tenso

Reconstruction loss: tensor(5.4727, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4532, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3317, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.7440, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.6255, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5730, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(3.6088, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.7045, device='cuda:0', grad_fn=<DivBackward0>)
t

Reconstruction loss: tensor(5.2700, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4185, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2864, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8019, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.7831, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.6030, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2851, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.1555, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.

Reconstruction loss: tensor(5.9149, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.6281, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.9999, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.1282, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2565, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3922, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3058, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9648, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.1447, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.

Reconstruction loss: tensor(5.7174, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.4058, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3454, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.1282, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3369, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3065, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.4700, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.0747, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.

Reconstruction loss: tensor(4.7619, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.7096, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.9974, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.0177, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.3390, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2746, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4485, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.6508, device='cuda:0', grad_fn=<DivBackward0>)
tenso

Reconstruction loss: tensor(5.3833, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5920, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(3.8675, device='cuda:0', grad_fn=<DivBackward0>)
tensor(0.9992, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.4707, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.9096, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.2649, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5463, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.9061, device='cuda:0', grad_fn=<DivBackward0>)
t

Reconstruction loss: tensor(5.4689, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.8914, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.8723, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.6076, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.0964, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(5.5791, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1., device='cuda:0', grad_fn=<MeanBackward0>)
Reconstruction loss: tensor(4.1343, device='cuda:0', grad_fn=<DivBackward0>)
tensor(1.0000, device='cuda:0', grad_fn=<MeanBackward0>)
