In [1]:
import pandas as pd
import torch as torch
from transformers import BertModel
from pytorch_pretrained_bert import BertTokenizer

In [2]:
from sklearn.metrics import accuracy_score

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
import statistics

In [5]:
import time

In [6]:
from functools import partial
# from dataclasses import dataclass
from collections import OrderedDict

In [7]:
import torch.nn as nn

In [8]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler


In [9]:
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [10]:
bert_tokenizer=BertTokenizer.from_pretrained('bertPytorch/bert-base-cased')
embedding_model=BertModel.from_pretrained('bertPytorch/bert-base-cased')

In [11]:
embedding_model.cuda()

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(28996, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

In [12]:
def BertEmbedding(ids,masks):
#         input_token = ['[CLS]'] + bert_tokenizer.tokenize(text_as_input) + ['[SEP]']
#         input_id = bert_tokenizer.convert_tokens_to_ids(input_token)
#         input_id=[input_id]
#         input_ids = pad_sequences(input_id, maxlen=128, dtype="long", truncating="post", padding="post")
#         attention_masks = []
#         for seq in text_as_input_ids:
#             for i in seq:
#                 if i > 0:
#                     attention_masks.append(1)
#                 else:
#                     attention_masks.append(0)
#         masks=[attention_masks]
#         masks=torch.LongTensor(masks)
#         input_ids=torch.LongTensor(text_as_input_ids)
        embeddings = embedding_model(ids,masks)
        embeddings = embeddings[0]
        
        return embeddings

In [13]:
class Conv2dAuto(nn.Conv2d):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.padding =  (self.kernel_size[0] // 2, self.kernel_size[1] // 2) # dynamic add padding based on the kernel_size
        
conv3x3 = partial(Conv2dAuto, kernel_size=3, bias=False) 

In [14]:
conv = conv3x3(in_channels=32, out_channels=64)
print(conv)
del conv

Conv2dAuto(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)


In [15]:
def activation_func(activation):
    return  nn.ModuleDict([
        ['relu', nn.ReLU(inplace=True)],
        ['leaky_relu', nn.LeakyReLU(negative_slope=0.01, inplace=True)],
        ['selu', nn.SELU(inplace=True)],
        ['sigmoid',nn.Sigmoid()],
        ['none', nn.Identity()]
    ])[activation]

In [16]:
class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, activation='relu'):
        super().__init__()
        self.in_channels, self.out_channels, self.activation = in_channels, out_channels, activation
        self.blocks = nn.Identity()
        self.activate = activation_func(activation)
        self.shortcut = nn.Identity()   
    
    def forward(self, x):
        residual = x
        if self.should_apply_shortcut: residual = self.shortcut(x)
        x = self.blocks(x)
        x += residual
        x = self.activate(x)
        return x
    
    @property
    def should_apply_shortcut(self):
        return self.in_channels != self.out_channels

In [17]:
dummy = torch.ones((1, 1, 1, 1))

block = ResidualBlock(1, 64)
block(dummy)

tensor([[[[2.]]]])

In [18]:
class ResNetResidualBlock(ResidualBlock):
    def __init__(self, in_channels, out_channels, expansion=1, downsampling=1, conv=conv3x3, *args, **kwargs):
        super().__init__(in_channels, out_channels, *args, **kwargs)
        self.expansion, self.downsampling, self.conv = expansion, downsampling, conv
        self.shortcut = nn.Sequential(
            nn.Conv2d(self.in_channels, self.expanded_channels, kernel_size=1,
                      stride=self.downsampling, bias=False),
            nn.BatchNorm2d(self.expanded_channels)) if self.should_apply_shortcut else None
        
        
    @property
    def expanded_channels(self):
        return self.out_channels * self.expansion
    
    @property
    def should_apply_shortcut(self):
        return self.in_channels != self.expanded_channels


In [19]:
def conv_bn(in_channels, out_channels, conv, *args, **kwargs):
    return nn.Sequential(conv(in_channels, out_channels, *args, **kwargs), nn.BatchNorm2d(out_channels))


In [20]:
class ResNetBasicBlock(ResNetResidualBlock):
    """
    Basic ResNet block composed by two layers of 3x3conv/batchnorm/activation
    """
    expansion = 1
    def __init__(self, in_channels, out_channels, *args, **kwargs):
        super().__init__(in_channels, out_channels, *args, **kwargs)
        self.blocks = nn.Sequential(
            conv_bn(self.in_channels, self.out_channels, conv=self.conv, bias=False, stride=self.downsampling),
            activation_func(self.activation),
            conv_bn(self.out_channels, self.expanded_channels, conv=self.conv, bias=False),
        )

In [21]:
class ResNetBottleNeckBlock(ResNetResidualBlock):
    expansion = 4
    def __init__(self, in_channels, out_channels, *args, **kwargs):
        super().__init__(in_channels, out_channels, expansion=4, *args, **kwargs)
        self.blocks = nn.Sequential(
           conv_bn(self.in_channels, self.out_channels, self.conv, kernel_size=1),
             activation_func(self.activation),
             conv_bn(self.out_channels, self.out_channels, self.conv, kernel_size=3, stride=self.downsampling),
             activation_func(self.activation),
             conv_bn(self.out_channels, self.expanded_channels, self.conv, kernel_size=1),
        )

In [22]:
class ResNetLayer(nn.Module):
    """
    A ResNet layer composed by `n` blocks stacked one after the other
    """
    def __init__(self, in_channels, out_channels, block=ResNetBasicBlock, n=1, *args, **kwargs):
        super().__init__()
        # 'We perform downsampling directly by convolutional layers that have a stride of 2.'
        downsampling = 2 if in_channels != out_channels else 1
        self.blocks = nn.Sequential(
            block(in_channels , out_channels, *args, **kwargs, downsampling=downsampling),
            *[block(out_channels * block.expansion, 
                    out_channels, downsampling=1, *args, **kwargs) for _ in range(n - 1)]
        )

    def forward(self, x):
        x = self.blocks(x)
        return x

In [23]:
class ResNetEncoder(nn.Module):
    """
    ResNet encoder composed by layers with increasing features.
    """
    def __init__(self, in_channels=3, blocks_sizes=[64, 128, 256, 512], deepths=[2,2,2,2], 
                 activation='relu', block=ResNetBasicBlock, *args, **kwargs):
        super().__init__()
        self.blocks_sizes = blocks_sizes
        
        self.gate = nn.Sequential(
            nn.Conv2d(in_channels, self.blocks_sizes[0], kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(self.blocks_sizes[0]),
            activation_func(activation),
            nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        )
        
        self.in_out_block_sizes = list(zip(blocks_sizes, blocks_sizes[1:]))
        self.blocks = nn.ModuleList([ 
            ResNetLayer(blocks_sizes[0], blocks_sizes[0], n=deepths[0], activation=activation, 
                        block=block,*args, **kwargs),
            *[ResNetLayer(in_channels * block.expansion, 
                          out_channels, n=n, activation=activation, 
                          block=block, *args, **kwargs) 
              for (in_channels, out_channels), n in zip(self.in_out_block_sizes, deepths[1:])]       
        ])
        
        
    def forward(self, x):
        x = self.gate(x)
        for block in self.blocks:
            x = block(x)
        return x

In [24]:
class ResnetDecoder(nn.Module):
    """
    This class represents the tail of ResNet. It performs a global pooling and maps the output to the
    correct class by using a fully connected layer.
    """
    def __init__(self, in_features, n_classes):
        super().__init__()
        self.avg = nn.AdaptiveAvgPool2d((1, 1))
        self.decoder = nn.Linear(in_features, n_classes)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.avg(x)
        x = x.view(x.size(0), -1)
        x = self.decoder(x)
        x = self.sigmoid(x)
        return x


In [25]:
class ResNet(nn.Module):
    
    def __init__(self, in_channels, n_classes, *args, **kwargs):
        super().__init__()
        self.encoder = ResNetEncoder(in_channels, *args, **kwargs)
        self.decoder = ResnetDecoder(self.encoder.blocks[-1].blocks[-1].expanded_channels, n_classes)
        
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [26]:
def resnet152(in_channels, n_classes, block=ResNetBottleNeckBlock, *args, **kwargs):
    return ResNet(in_channels, n_classes, block=block, deepths=[3, 8, 36, 3], *args, **kwargs)

In [27]:
model = resnet152(2, 1, activation='sigmoid')
print(model.cuda(), (3, 224, 224))

ResNet(
  (encoder): ResNetEncoder(
    (gate): Sequential(
      (0): Conv2d(2, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Sigmoid()
      (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (blocks): ModuleList(
      (0): ResNetLayer(
        (blocks): Sequential(
          (0): ResNetBottleNeckBlock(
            (blocks): Sequential(
              (0): Sequential(
                (0): Conv2dAuto(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (1): Sigmoid()
              (2): Sequential(
                (0): Conv2dAuto(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=Tru

In [28]:
class FakeNewsClassifier(nn.Module):
    def __init__(self, resnet_architecture, embedding_func):
        super(FakeNewsClassifier, self).__init__()
        self.resnet=resnet_architecture
    def forward(self, x):
        output = self.resnet(x)
        return output

In [29]:
model = FakeNewsClassifier(resnet152(128, 1, activation='sigmoid'), BertEmbedding)

In [30]:
def getIds(inp):
    ids=[]
    masks=[]
    for sent in inp:
        input_token = ['[CLS]'] + bert_tokenizer.tokenize(sent) + ['[SEP]']
        input_id = bert_tokenizer.convert_tokens_to_ids(input_token)
        input_id=[input_id]
        input_ids = pad_sequences(input_id, maxlen=128, dtype="long", truncating="post", padding="post")
        attention_masks = []
        for seq in input_ids:
            for i in seq:
                if i > 0:
                    attention_masks.append(1)
                else:
                    attention_masks.append(0)
        maks=[attention_masks]
        masks.append(maks)
        ids.append(input_ids)
    return(ids,masks)

In [31]:
def getIds(inp):
        ids=[]
        masks=[]
        for sent in inp:
            input_token = ['[CLS]'] + bert_tokenizer.tokenize(sent) + ['[SEP]']
            input_id = bert_tokenizer.convert_tokens_to_ids(input_token)
            input_id=[input_id]
            input_ids = pad_sequences(input_id, maxlen=128, dtype="long", truncating="post", padding="post")
            attention_masks = []
            for seq in input_ids:
                for i in seq:
                    if i > 0:
                        attention_masks.append(1)
                    else:
                        attention_masks.append(0)
            maks=[attention_masks]
            masks.append(maks)
            ids.append(input_ids)
        return(ids,masks)

In [32]:
class DataGenerator:
    def __init__(self,data):
        MAX_LEN = 128
        batch_size=40
        data['split'].value_counts()
        trainDf=data[data['split']=='TRAIN']
        valDf=data[data['split']=='VALID']
        testDf=data[data['split']=='TEST']
        print(trainDf.head())
        print(valDf.head())
        print(testDf.head())

        
        trainText = trainDf['text'].values
        trainLabels = trainDf['truth'].values
        valText = valDf['text'].values
        valLabels = valDf['truth'].values
        testText = testDf['text'].values
        testLabels = testDf['truth'].values


        train_ids, train_attention_masks = getIds(trainText)
        val_ids, val_attention_masks = getIds(valText)
        test_ids, test_attention_masks = getIds(testText)

        train_data_inputs = torch.tensor(train_ids)
        train_data_labels = torch.tensor(trainLabels)
        train_data_masks = torch.tensor(train_attention_masks)
        val_data_inputs = torch.tensor(val_ids)
        val_data_labels = torch.tensor(valLabels)
        val_data_masks = torch.tensor(val_attention_masks)
        test_data_inputs = torch.tensor(test_ids)
        test_data_labels = torch.tensor(testLabels)
        test_data_masks = torch.tensor(test_attention_masks)

        trainDataset = TensorDataset(train_data_inputs, train_data_masks, train_data_labels)
        testDataset = TensorDataset(test_data_inputs, test_data_masks, test_data_labels)
        valDataset = TensorDataset(val_data_inputs, val_data_masks, val_data_labels)
        train_data_sampler=RandomSampler(trainDataset)
        test_data_sampler=RandomSampler(testDataset)
        val_data_sampler=RandomSampler(valDataset)

        self.train_dataloader = DataLoader(trainDataset, sampler=train_data_sampler, batch_size=batch_size)
        self.valid_dataloader = DataLoader(valDataset, sampler=test_data_sampler, batch_size=batch_size)
        self.test_dataloader = DataLoader(testDataset,sampler=test_data_sampler, batch_size=batch_size)

        
    def generate_train_batch_data(self):
        for i, batch in enumerate(self.train_dataloader):
            batch = tuple(t.to(device) for t in batch)
            yield batch

    def generate_valid_batch_data(self):
        for i, batch in enumerate(self.valid_dataloader):
            batch = tuple(t.to(device) for t in batch)
            yield batch

    def generate_test_batch_data(self):
        for i, batch in enumerate(self.test_dataloader):
            batch = tuple(t.to(device) for t in batch)
            yield batch

In [33]:
class DataGeneratorv2:
    def __init__(self):
        MAX_LEN = 128
        batch_size=10
        data=pd.read_csv('/home/gridsan/svattam/upama/FakeNews/combined_relevant_data.csv')
        text=data[['text']]
        labels=data[['truth']].to_numpy()
        tokenizer= BertTokenizer.from_pretrained('/home/gridsan/svattam/bert/bert-base-cased', do_lower_case=True)
        x_train, X_test, Y_train, y_test = train_test_split(text,labels, test_size=0.3)
        x_val, x_test, Y_val, Y_test = train_test_split(X_test, y_test, test_size=0.5)
        
        train_inputs, train_attention_masks = prepare_for_bert(x_train, tokenizer, MAX_LEN)
        val_inputs, val_attention_masks = prepare_for_bert(x_val, tokenizer, MAX_LEN)
        test_inputs, test_attention_masks = prepare_for_bert(x_test, tokenizer, MAX_LEN)
        
        train_data_inputs = torch.tensor(train_inputs)
        train_data_labels = torch.tensor(Y_train)
        train_data_masks = torch.tensor(train_attention_masks)
        val_data_inputs = torch.tensor(val_inputs)
        val_data_labels = torch.tensor(Y_val)
        val_data_masks = torch.tensor(val_attention_masks)
        test_data_inputs = torch.tensor(test_inputs)
        test_data_labels = torch.tensor(Y_test)
        test_data_masks = torch.tensor(test_attention_masks)
       
        trainDataset = TensorDataset(train_data_inputs, train_data_masks, train_data_labels)
        testDataset = TensorDataset(test_data_inputs, test_data_masks, test_data_labels)
        valDataset = TensorDataset(val_data_inputs, val_data_masks, val_data_labels)
        train_data_sampler=RandomSampler(trainDataset)
        test_data_sampler=RandomSampler(testDataset)
        val_data_sampler=RandomSampler(valDataset)


        # self.train_data = 
        # self.train_masks = 
        # self.train_labels = 

        # self.valid_data = 
        # self.valid_masks = 
        # self.valid_labels = 

        self.train_dataloader = DataLoader(trainDataset, sampler=train_data_sampler, batch_size=batch_size)
        self.valid_dataloader = DataLoader(valDataset, sampler=test_data_sampler, batch_size=batch_size)
        self.test_dataloader = DataLoader(testDataset,sampler=test_data_sampler, batch_size=batch_size)

    # def sampleFromClass(ds, k):
    #     class_counts = {}
    #     train_data = []
    #     train_label = []
    #     test_data = []
    #     test_label = []
    #     for data, label in ds:
    #         c = label.item()
    #         class_counts[c] = class_counts.get(c, 0) + 1
    #         if class_counts[c] <= k:
    #             train_data.append(data)
    #             train_label.append(torch.unsqueeze(label, 0))
    #         else:
    #             test_data.append(data)
    #             test_label.append(torch.unsqueeze(label, 0))
    #     train_data = torch.cat(train_data)
    #     for ll in train_label:
    #         print(ll)
    #     train_label = torch.cat(train_label)
    #     test_data = torch.cat(test_data)
    #     test_label = torch.cat(test_label)

    #     return (TensorDataset(train_data, train_label), 
    #         TensorDataset(test_data, test_label))
        
    def generate_train_batch_data(self):
        for i, batch in enumerate(self.train_dataloader):
            batch = tuple(t.to(device) for t in batch)
            yield batch

    def generate_valid_batch_data(self):
        for i, batch in enumerate(self.valid_dataloader):
            batch = tuple(t.to(device) for t in batch)
            yield batch

    def generate_test_batch_data(self):
        for i, batch in enumerate(self.test_dataloader):
            batch = tuple(t.to(device) for t in batch)
            yield batch


In [34]:
data=pd.read_csv('/home/gridsan/svattam/upama/FakeNews/combined_relevant_data.csv')

In [35]:
df = data.sample(frac=1).reset_index(drop=True)


In [36]:
sample_data=df.iloc[0:100]

In [37]:
def prepare_for_bert(df, tokenizer, MAX_LEN):
    sentences=df['text'].values
    sentences = ["[CLS] " + sentence + " [SEP]" for sentence in sentences]
    tokenized_texts = [tokenizer.tokenize(sent) for sent in sentences]
    input_ids = [tokenizer.convert_tokens_to_ids(x) for x in tokenized_texts]
    input_ids = pad_sequences(input_ids, maxlen=MAX_LEN, dtype="long", truncating="post", padding="post")

    attention_masks = []
    for seq in input_ids:
        seq_mask = [float(i>0) for i in seq]
        attention_masks.append(seq_mask)
    return input_ids, attention_masks

In [38]:
data_gen=DataGeneratorv2()

In [39]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [40]:
class Embedder:
    def __init__(self):
        self.embedding_model = BertModel.from_pretrained('/home/gridsan/svattam/bert/bert-base-cased', from_tf=False)
        self.embedding_model.cuda()

In [41]:
embedder = Embedder()


In [42]:
model.cuda()

FakeNewsClassifier(
  (resnet): ResNet(
    (encoder): ResNetEncoder(
      (gate): Sequential(
        (0): Conv2d(128, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): Sigmoid()
        (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      )
      (blocks): ModuleList(
        (0): ResNetLayer(
          (blocks): Sequential(
            (0): ResNetBottleNeckBlock(
              (blocks): Sequential(
                (0): Sequential(
                  (0): Conv2dAuto(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
                  (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                )
                (1): Sigmoid()
                (2): Sequential(
                  (0): Conv2dAuto(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
                  (1): Bat

In [43]:
n_epochs = 10
batch_size = 40
lr = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
loss_fn = nn.BCELoss()
metrics=[]
for epoch in range(n_epochs):
    start_time = time.time()
    train_loss = []
    val_loss=[]
    train_acc=[]
    val_acc=[]
    for i, batch in enumerate(data_gen.generate_train_batch_data()):
        print(i)
        model.train(True)
        b_input_ids, b_input_mask, labels = batch
        print(b_input_ids.size(),b_input_mask.size())
        with torch.no_grad():
            embeddings = embedder.embedding_model(b_input_ids, attention_mask=b_input_mask)
        X_batch = embeddings[0].float()
        X_batch = X_batch.unsqueeze(3)
        y_batch = labels.float()
        y_pred = model(X_batch)
        print("=================")
        print(y_batch)
        print(y_pred)
        print("=================")
        exit()
        optimizer.zero_grad()
        loss = loss_fn(y_pred, y_batch)
        train_loss.append(loss.item())
        loss.backward()
        optimizer.step()
        y_labels=y_pred.cpu().detach().numpy()
        cat=np.argmax(y_labels, axis=1)
        clabels=np.argmax(labels.cpu().detach().numpy(), axis=1)
        train_acc.append(accuracy_score(clabels,cat))
    for i, batch in enumerate(data_gen.generate_valid_batch_data()):
        print(i)
        correct=0
        model.eval()
        b_input_ids, b_input_mask, labels = batch
        with torch.no_grad():
            embeddings = embedder.embedding_model(b_input_ids, attention_mask=b_input_mask)
        X_batch = embeddings[0].float()
        y_batch = labels.float()
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        val_loss.append(loss.item())
        y_labels=y_pred.cpu().detach().numpy()
        cat=np.argmax(y_labels, axis=1)
        clabels=np.argmax(labels.cpu().detach().numpy(), axis=1)
        val_acc.append(accuracy_score(clabels,cat))   
    trainingAccuracy=statistics.mean(train_acc)
    validationAccuracy=statistics.mean(val_acc)
    trainingLoss=statistics.mean(train_loss)
    validationLoss=statistics.mean(val_loss)
    metrics.append([trainingAccuracy,trainingLoss,validationAccuracy,validationLoss])
print(metrics)
stats = np.array(metrics)
np.savez("trainingstats.npz", stats)


0
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.4786],
        [0.4720],
        [0.4882],
        [0.4708],
        [0.4657],
        [0.4797],
        [0.4828],
        [0.4851],
        [0.4909],
        [0.4871]], device='cuda:0', grad_fn=<SigmoidBackward>)
1
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.7720],
        [0.7312],
        [0.8079],
        [0.7906],
        [0.7616],
        [0.7453],
        [0.7763],
        [0.7917],
        [0.7898],
        [0.7633]], device='cuda:0', grad_fn=<SigmoidBackward>)
2
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.]

18
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.7110],
        [0.7632],
        [0.7424],
        [0.6682],
        [0.6067],
        [0.7500],
        [0.7660],
        [0.7182],
        [0.4720],
        [0.7310]], device='cuda:0', grad_fn=<SigmoidBackward>)
19
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6865],
        [0.4863],
        [0.6012],
        [0.6853],
        [0.6962],
        [0.8042],
        [0.6540],
        [0.5625],
        [0.7022],
        [0.6909]], device='cuda:0', grad_fn=<SigmoidBackward>)
20
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [

36
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.6759],
        [0.6660],
        [0.6727],
        [0.6888],
        [0.6810],
        [0.6623],
        [0.6612],
        [0.6668],
        [0.6957],
        [0.6583]], device='cuda:0', grad_fn=<SigmoidBackward>)
37
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.7507],
        [0.7556],
        [0.7566],
        [0.7465],
        [0.7522],
        [0.7570],
        [0.7549],
        [0.7532],
        [0.7493],
        [0.7449]], device='cuda:0', grad_fn=<SigmoidBackward>)
38
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [

54
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.8124],
        [0.7905],
        [0.8215],
        [0.8005],
        [0.8027],
        [0.8165],
        [0.7955],
        [0.7889],
        [0.8092],
        [0.8322]], device='cuda:0', grad_fn=<SigmoidBackward>)
55
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.8289],
        [0.8020],
        [0.8193],
        [0.8125],
        [0.8064],
        [0.8011],
        [0.8135],
        [0.8205],
        [0.8181],
        [0.8270]], device='cuda:0', grad_fn=<SigmoidBackward>)
56
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [

72
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.4738],
        [0.5193],
        [0.4552],
        [0.4947],
        [0.5066],
        [0.4711],
        [0.4675],
        [0.5290],
        [0.4517],
        [0.5241]], device='cuda:0', grad_fn=<SigmoidBackward>)
73
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5890],
        [0.5535],
        [0.5479],
        [0.5392],
        [0.5940],
        [0.5616],
        [0.5153],
        [0.5505],
        [0.5094],
        [0.5671]], device='cuda:0', grad_fn=<SigmoidBackward>)
74
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [

90
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.5040],
        [0.4093],
        [0.4947],
        [0.5240],
        [0.4163],
        [0.3856],
        [0.4829],
        [0.3950],
        [0.5473],
        [0.5214]], device='cuda:0', grad_fn=<SigmoidBackward>)
91
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.4618],
        [0.3961],
        [0.4550],
        [0.4248],
        [0.4403],
        [0.4228],
        [0.4472],
        [0.4737],
        [0.4622],
        [0.4613]], device='cuda:0', grad_fn=<SigmoidBackward>)
92
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [

108
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.4590],
        [0.5412],
        [0.4945],
        [0.5143],
        [0.5038],
        [0.5763],
        [0.4668],
        [0.5790],
        [0.4703],
        [0.4180]], device='cuda:0', grad_fn=<SigmoidBackward>)
109
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.3676],
        [0.4599],
        [0.3777],
        [0.4125],
        [0.3614],
        [0.3888],
        [0.4656],
        [0.4264],
        [0.4299],
        [0.4410]], device='cuda:0', grad_fn=<SigmoidBackward>)
110
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
      

126
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.5992],
        [0.5999],
        [0.5485],
        [0.5710],
        [0.6958],
        [0.5267],
        [0.5463],
        [0.5734],
        [0.5296],
        [0.6419]], device='cuda:0', grad_fn=<SigmoidBackward>)
127
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.6375],
        [0.6052],
        [0.5741],
        [0.5178],
        [0.5574],
        [0.6053],
        [0.6369],
        [0.5940],
        [0.5767],
        [0.5932]], device='cuda:0', grad_fn=<SigmoidBackward>)
128
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
      

144
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5171],
        [0.5206],
        [0.4902],
        [0.5040],
        [0.5344],
        [0.5182],
        [0.5268],
        [0.4978],
        [0.5365],
        [0.5147]], device='cuda:0', grad_fn=<SigmoidBackward>)
145
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5238],
        [0.5045],
        [0.5169],
        [0.4582],
        [0.5223],
        [0.5096],
        [0.4928],
        [0.5187],
        [0.5048],
        [0.4990]], device='cuda:0', grad_fn=<SigmoidBackward>)
146
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
      

162
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.3254],
        [0.3037],
        [0.3153],
        [0.3106],
        [0.2853],
        [0.3195],
        [0.3289],
        [0.3318],
        [0.3657],
        [0.3830]], device='cuda:0', grad_fn=<SigmoidBackward>)
163
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.3594],
        [0.3246],
        [0.3852],
        [0.3872],
        [0.3452],
        [0.3468],
        [0.3591],
        [0.3891],
        [0.3231],
        [0.3602]], device='cuda:0', grad_fn=<SigmoidBackward>)
164
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
      

180
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6142],
        [0.5070],
        [0.5930],
        [0.5482],
        [0.6105],
        [0.5459],
        [0.6336],
        [0.4784],
        [0.6006],
        [0.5314]], device='cuda:0', grad_fn=<SigmoidBackward>)
181
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5767],
        [0.6138],
        [0.6016],
        [0.6129],
        [0.5877],
        [0.6704],
        [0.6760],
        [0.5973],
        [0.6809],
        [0.6725]], device='cuda:0', grad_fn=<SigmoidBackward>)
182
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
      

198
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5472],
        [0.5527],
        [0.5447],
        [0.5557],
        [0.5568],
        [0.5526],
        [0.5370],
        [0.5658],
        [0.5486],
        [0.5419]], device='cuda:0', grad_fn=<SigmoidBackward>)
199
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6689],
        [0.6411],
        [0.6455],
        [0.6452],
        [0.6523],
        [0.6603],
        [0.6557],
        [0.6510],
        [0.6458],
        [0.6499]], device='cuda:0', grad_fn=<SigmoidBackward>)
200
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
      

216
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5850],
        [0.6443],
        [0.5954],
        [0.5616],
        [0.5772],
        [0.5865],
        [0.5443],
        [0.5553],
        [0.5667],
        [0.5968]], device='cuda:0', grad_fn=<SigmoidBackward>)
217
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6320],
        [0.6911],
        [0.6417],
        [0.7078],
        [0.7057],
        [0.6741],
        [0.6573],
        [0.7140],
        [0.6729],
        [0.6478]], device='cuda:0', grad_fn=<SigmoidBackward>)
218
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
      

234
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.8516],
        [0.8439],
        [0.8364],
        [0.8493],
        [0.8454],
        [0.8363],
        [0.8256],
        [0.8501],
        [0.8414],
        [0.8430]], device='cuda:0', grad_fn=<SigmoidBackward>)
235
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.8807],
        [0.8682],
        [0.8714],
        [0.8650],
        [0.8818],
        [0.8712],
        [0.8740],
        [0.8677],
        [0.8711],
        [0.8771]], device='cuda:0', grad_fn=<SigmoidBackward>)
236
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
      

252
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.8371],
        [0.8319],
        [0.8328],
        [0.8306],
        [0.8262],
        [0.8318],
        [0.8361],
        [0.8288],
        [0.8327],
        [0.8375]], device='cuda:0', grad_fn=<SigmoidBackward>)
253
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.8591],
        [0.8637],
        [0.8628],
        [0.8563],
        [0.8650],
        [0.8571],
        [0.8604],
        [0.8646],
        [0.8610],
        [0.8622]], device='cuda:0', grad_fn=<SigmoidBackward>)
254
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
      

270
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.7060],
        [0.7067],
        [0.7049],
        [0.7066],
        [0.7038],
        [0.7068],
        [0.7047],
        [0.7030],
        [0.7015],
        [0.6977]], device='cuda:0', grad_fn=<SigmoidBackward>)
271
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.7216],
        [0.7212],
        [0.7185],
        [0.7265],
        [0.7228],
        [0.7226],
        [0.7242],
        [0.7204],
        [0.7245],
        [0.7243]], device='cuda:0', grad_fn=<SigmoidBackward>)
272
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
      

288
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6267],
        [0.6053],
        [0.6621],
        [0.5951],
        [0.5956],
        [0.6043],
        [0.6731],
        [0.6333],
        [0.6248],
        [0.6149]], device='cuda:0', grad_fn=<SigmoidBackward>)
289
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6774],
        [0.6472],
        [0.6185],
        [0.6401],
        [0.6876],
        [0.6293],
        [0.6282],
        [0.6249],
        [0.6148],
        [0.6659]], device='cuda:0', grad_fn=<SigmoidBackward>)
290
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
      

306
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.7216],
        [0.6880],
        [0.7385],
        [0.7056],
        [0.6950],
        [0.7514],
        [0.7664],
        [0.7255],
        [0.6795],
        [0.6879]], device='cuda:0', grad_fn=<SigmoidBackward>)
307
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.7230],
        [0.7310],
        [0.7275],
        [0.7510],
        [0.7017],
        [0.7721],
        [0.7076],
        [0.7631],
        [0.7495],
        [0.6593]], device='cuda:0', grad_fn=<SigmoidBackward>)
308
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
      

324
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6718],
        [0.6734],
        [0.7093],
        [0.6570],
        [0.6921],
        [0.6558],
        [0.6188],
        [0.7667],
        [0.6608],
        [0.6598]], device='cuda:0', grad_fn=<SigmoidBackward>)
325
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5741],
        [0.6352],
        [0.6471],
        [0.5669],
        [0.5749],
        [0.6159],
        [0.6439],
        [0.6551],
        [0.5344],
        [0.6681]], device='cuda:0', grad_fn=<SigmoidBackward>)
326
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
      

342
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5259],
        [0.6045],
        [0.4989],
        [0.4880],
        [0.3789],
        [0.4874],
        [0.5090],
        [0.5542],
        [0.4146],
        [0.5333]], device='cuda:0', grad_fn=<SigmoidBackward>)
343
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.4195],
        [0.4456],
        [0.4625],
        [0.4882],
        [0.3865],
        [0.4330],
        [0.5834],
        [0.4074],
        [0.3414],
        [0.3351]], device='cuda:0', grad_fn=<SigmoidBackward>)
344
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
      

360
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5345],
        [0.5368],
        [0.5777],
        [0.6114],
        [0.5388],
        [0.5685],
        [0.6951],
        [0.5375],
        [0.6948],
        [0.4889]], device='cuda:0', grad_fn=<SigmoidBackward>)
361
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.5517],
        [0.6108],
        [0.5682],
        [0.5917],
        [0.5278],
        [0.5163],
        [0.6173],
        [0.5623],
        [0.5744],
        [0.5216]], device='cuda:0', grad_fn=<SigmoidBackward>)
362
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
      

378
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.7744],
        [0.7744],
        [0.7775],
        [0.7627],
        [0.7693],
        [0.7527],
        [0.7716],
        [0.7599],
        [0.7679],
        [0.7692]], device='cuda:0', grad_fn=<SigmoidBackward>)
379
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.7438],
        [0.7288],
        [0.7495],
        [0.7588],
        [0.7394],
        [0.7453],
        [0.7374],
        [0.7480],
        [0.7457],
        [0.7276]], device='cuda:0', grad_fn=<SigmoidBackward>)
380
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
      

396
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6590],
        [0.6604],
        [0.6599],
        [0.6617],
        [0.6614],
        [0.6625],
        [0.6589],
        [0.6621],
        [0.6593],
        [0.6645]], device='cuda:0', grad_fn=<SigmoidBackward>)
397
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.6971],
        [0.6946],
        [0.6979],
        [0.6936],
        [0.6942],
        [0.6999],
        [0.6960],
        [0.6990],
        [0.7012],
        [0.6965]], device='cuda:0', grad_fn=<SigmoidBackward>)
398
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
      

414
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.5609],
        [0.5581],
        [0.5553],
        [0.5566],
        [0.5577],
        [0.5523],
        [0.5597],
        [0.5623],
        [0.5539],
        [0.5574]], device='cuda:0', grad_fn=<SigmoidBackward>)
415
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6135],
        [0.6221],
        [0.6203],
        [0.6180],
        [0.6236],
        [0.6178],
        [0.6155],
        [0.6157],
        [0.6160],
        [0.6122]], device='cuda:0', grad_fn=<SigmoidBackward>)
416
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
      

432
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.7384],
        [0.7729],
        [0.7506],
        [0.7412],
        [0.7545],
        [0.7492],
        [0.7297],
        [0.7459],
        [0.7535],
        [0.7406]], device='cuda:0', grad_fn=<SigmoidBackward>)
433
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.7852],
        [0.7846],
        [0.7892],
        [0.7743],
        [0.7792],
        [0.7843],
        [0.7922],
        [0.7654],
        [0.8050],
        [0.7957]], device='cuda:0', grad_fn=<SigmoidBackward>)
434
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
      

450
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.6166],
        [0.6013],
        [0.6256],
        [0.6210],
        [0.6161],
        [0.6191],
        [0.6143],
        [0.6088],
        [0.6071],
        [0.5949]], device='cuda:0', grad_fn=<SigmoidBackward>)
451
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5990],
        [0.5513],
        [0.6272],
        [0.6291],
        [0.5797],
        [0.6336],
        [0.6148],
        [0.5968],
        [0.5896],
        [0.5896]], device='cuda:0', grad_fn=<SigmoidBackward>)
452
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
      

468
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.7787],
        [0.7360],
        [0.7803],
        [0.7712],
        [0.7871],
        [0.7589],
        [0.7127],
        [0.7702],
        [0.7793],
        [0.7353]], device='cuda:0', grad_fn=<SigmoidBackward>)
469
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.8008],
        [0.7599],
        [0.7612],
        [0.7649],
        [0.7917],
        [0.7701],
        [0.7913],
        [0.7795],
        [0.7436],
        [0.7634]], device='cuda:0', grad_fn=<SigmoidBackward>)
470
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
      

486
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.7447],
        [0.7576],
        [0.7718],
        [0.7927],
        [0.7679],
        [0.7548],
        [0.7529],
        [0.7668],
        [0.7635],
        [0.7609]], device='cuda:0', grad_fn=<SigmoidBackward>)
487
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.8182],
        [0.8336],
        [0.8243],
        [0.8394],
        [0.8183],
        [0.8375],
        [0.8166],
        [0.8169],
        [0.8283],
        [0.8284]], device='cuda:0', grad_fn=<SigmoidBackward>)
488
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
      

504
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6528],
        [0.6427],
        [0.6444],
        [0.6397],
        [0.6295],
        [0.6254],
        [0.6569],
        [0.6571],
        [0.6593],
        [0.6559]], device='cuda:0', grad_fn=<SigmoidBackward>)
505
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.6834],
        [0.6491],
        [0.6853],
        [0.6691],
        [0.6724],
        [0.6462],
        [0.6530],
        [0.7050],
        [0.6730],
        [0.6673]], device='cuda:0', grad_fn=<SigmoidBackward>)
506
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
      

522
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.5556],
        [0.5894],
        [0.5382],
        [0.5381],
        [0.5506],
        [0.5419],
        [0.5361],
        [0.5295],
        [0.5599],
        [0.5415]], device='cuda:0', grad_fn=<SigmoidBackward>)
523
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.6293],
        [0.6408],
        [0.6454],
        [0.6236],
        [0.6086],
        [0.6139],
        [0.6216],
        [0.6267],
        [0.6274],
        [0.6217]], device='cuda:0', grad_fn=<SigmoidBackward>)
524
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
      

540
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.5779],
        [0.5588],
        [0.5551],
        [0.5436],
        [0.5624],
        [0.5642],
        [0.5386],
        [0.5588],
        [0.5682],
        [0.5485]], device='cuda:0', grad_fn=<SigmoidBackward>)
541
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.5630],
        [0.5501],
        [0.5394],
        [0.5632],
        [0.5585],
        [0.5426],
        [0.5567],
        [0.5467],
        [0.5481],
        [0.5542]], device='cuda:0', grad_fn=<SigmoidBackward>)
542
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
      

558
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.4329],
        [0.4312],
        [0.4303],
        [0.4331],
        [0.4396],
        [0.4376],
        [0.4341],
        [0.4417],
        [0.4388],
        [0.4315]], device='cuda:0', grad_fn=<SigmoidBackward>)
559
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.4858],
        [0.4893],
        [0.4821],
        [0.4833],
        [0.4908],
        [0.4890],
        [0.4866],
        [0.4854],
        [0.4864],
        [0.4839]], device='cuda:0', grad_fn=<SigmoidBackward>)
560
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
      

576
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6171],
        [0.6155],
        [0.6145],
        [0.6147],
        [0.6181],
        [0.6198],
        [0.6183],
        [0.6191],
        [0.6172],
        [0.6186]], device='cuda:0', grad_fn=<SigmoidBackward>)
577
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.5820],
        [0.5777],
        [0.5868],
        [0.5891],
        [0.5849],
        [0.5774],
        [0.5820],
        [0.5802],
        [0.5735],
        [0.5884]], device='cuda:0', grad_fn=<SigmoidBackward>)
578
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
      

594
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.4605],
        [0.4781],
        [0.4721],
        [0.4721],
        [0.4752],
        [0.4590],
        [0.4594],
        [0.4681],
        [0.4761],
        [0.4584]], device='cuda:0', grad_fn=<SigmoidBackward>)
595
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.5073],
        [0.4944],
        [0.5044],
        [0.5120],
        [0.5205],
        [0.5071],
        [0.4997],
        [0.5029],
        [0.4979],
        [0.5099]], device='cuda:0', grad_fn=<SigmoidBackward>)
596
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
      

612
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.4877],
        [0.4895],
        [0.4885],
        [0.4882],
        [0.4878],
        [0.4894],
        [0.4877],
        [0.4879],
        [0.4886],
        [0.4910]], device='cuda:0', grad_fn=<SigmoidBackward>)
613
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.5304],
        [0.5291],
        [0.5277],
        [0.5262],
        [0.5280],
        [0.5308],
        [0.5286],
        [0.5309],
        [0.5259],
        [0.5307]], device='cuda:0', grad_fn=<SigmoidBackward>)
614
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
      

630
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.5878],
        [0.5990],
        [0.6026],
        [0.5965],
        [0.5965],
        [0.5997],
        [0.5993],
        [0.5977],
        [0.5987],
        [0.6001]], device='cuda:0', grad_fn=<SigmoidBackward>)
631
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6180],
        [0.6177],
        [0.6231],
        [0.6178],
        [0.6186],
        [0.6169],
        [0.6222],
        [0.6146],
        [0.6135],
        [0.6134]], device='cuda:0', grad_fn=<SigmoidBackward>)
632
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
      

648
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6575],
        [0.6573],
        [0.6584],
        [0.6609],
        [0.6617],
        [0.6568],
        [0.6617],
        [0.6608],
        [0.6598],
        [0.6570]], device='cuda:0', grad_fn=<SigmoidBackward>)
649
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6687],
        [0.6681],
        [0.6705],
        [0.6671],
        [0.6674],
        [0.6718],
        [0.6734],
        [0.6722],
        [0.6690],
        [0.6710]], device='cuda:0', grad_fn=<SigmoidBackward>)
650
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
      

666
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6756],
        [0.6653],
        [0.6682],
        [0.6763],
        [0.6825],
        [0.6803],
        [0.6663],
        [0.6823],
        [0.6762],
        [0.6757]], device='cuda:0', grad_fn=<SigmoidBackward>)
667
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6985],
        [0.7058],
        [0.7046],
        [0.7049],
        [0.6981],
        [0.7017],
        [0.6978],
        [0.7059],
        [0.7147],
        [0.7062]], device='cuda:0', grad_fn=<SigmoidBackward>)
668
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
      

684
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.8207],
        [0.8012],
        [0.8297],
        [0.8280],
        [0.8228],
        [0.8236],
        [0.8207],
        [0.8195],
        [0.8080],
        [0.8278]], device='cuda:0', grad_fn=<SigmoidBackward>)
685
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.8466],
        [0.8399],
        [0.8349],
        [0.8396],
        [0.8429],
        [0.8447],
        [0.8438],
        [0.8402],
        [0.8407],
        [0.8489]], device='cuda:0', grad_fn=<SigmoidBackward>)
686
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
      

702
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.5973],
        [0.5960],
        [0.6137],
        [0.5935],
        [0.5780],
        [0.5952],
        [0.5864],
        [0.5821],
        [0.5802],
        [0.5878]], device='cuda:0', grad_fn=<SigmoidBackward>)
703
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6089],
        [0.6095],
        [0.6020],
        [0.5893],
        [0.5985],
        [0.5885],
        [0.6143],
        [0.6201],
        [0.5962],
        [0.5782]], device='cuda:0', grad_fn=<SigmoidBackward>)
704
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
      

720
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.6439],
        [0.6659],
        [0.6655],
        [0.6733],
        [0.6641],
        [0.6671],
        [0.6634],
        [0.6683],
        [0.6870],
        [0.6408]], device='cuda:0', grad_fn=<SigmoidBackward>)
721
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6664],
        [0.6893],
        [0.6775],
        [0.6562],
        [0.6591],
        [0.6963],
        [0.6889],
        [0.6809],
        [0.6735],
        [0.6899]], device='cuda:0', grad_fn=<SigmoidBackward>)
722
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
      

738
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.8382],
        [0.7847],
        [0.8324],
        [0.7822],
        [0.8650],
        [0.7783],
        [0.8122],
        [0.8083],
        [0.7653],
        [0.8013]], device='cuda:0', grad_fn=<SigmoidBackward>)
739
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.8445],
        [0.8029],
        [0.8434],
        [0.8063],
        [0.7971],
        [0.7805],
        [0.8368],
        [0.8076],
        [0.8011],
        [0.7797]], device='cuda:0', grad_fn=<SigmoidBackward>)
740
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
      

756
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.3639],
        [0.3638],
        [0.3408],
        [0.3834],
        [0.4459],
        [0.3751],
        [0.3697],
        [0.3189],
        [0.3602],
        [0.3433]], device='cuda:0', grad_fn=<SigmoidBackward>)
757
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.3144],
        [0.3002],
        [0.3448],
        [0.2861],
        [0.3344],
        [0.2834],
        [0.2818],
        [0.2685],
        [0.2684],
        [0.2720]], device='cuda:0', grad_fn=<SigmoidBackward>)
758
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
      

774
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.3942],
        [0.3618],
        [0.3737],
        [0.3783],
        [0.3709],
        [0.3708],
        [0.3723],
        [0.3599],
        [0.3495],
        [0.3723]], device='cuda:0', grad_fn=<SigmoidBackward>)
775
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.4153],
        [0.3958],
        [0.4070],
        [0.3683],
        [0.4130],
        [0.3857],
        [0.4517],
        [0.4002],
        [0.3989],
        [0.3932]], device='cuda:0', grad_fn=<SigmoidBackward>)
776
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
      

792
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.3629],
        [0.3562],
        [0.3600],
        [0.3538],
        [0.3563],
        [0.3487],
        [0.3544],
        [0.3510],
        [0.3446],
        [0.3425]], device='cuda:0', grad_fn=<SigmoidBackward>)
793
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.3836],
        [0.3840],
        [0.3755],
        [0.3720],
        [0.3792],
        [0.3692],
        [0.3825],
        [0.3779],
        [0.3862],
        [0.3824]], device='cuda:0', grad_fn=<SigmoidBackward>)
794
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
      

810
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.7186],
        [0.7363],
        [0.7218],
        [0.7206],
        [0.7244],
        [0.7218],
        [0.7458],
        [0.7351],
        [0.7182],
        [0.7364]], device='cuda:0', grad_fn=<SigmoidBackward>)
811
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.7725],
        [0.7696],
        [0.7690],
        [0.7731],
        [0.7851],
        [0.7609],
        [0.7713],
        [0.7732],
        [0.7885],
        [0.7876]], device='cuda:0', grad_fn=<SigmoidBackward>)
812
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
      

828
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6821],
        [0.6573],
        [0.6739],
        [0.6321],
        [0.6586],
        [0.6640],
        [0.6526],
        [0.6703],
        [0.6621],
        [0.6563]], device='cuda:0', grad_fn=<SigmoidBackward>)
829
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5933],
        [0.6146],
        [0.6299],
        [0.6005],
        [0.6080],
        [0.6193],
        [0.6126],
        [0.5699],
        [0.6201],
        [0.6024]], device='cuda:0', grad_fn=<SigmoidBackward>)
830
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
      

846
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.7438],
        [0.7574],
        [0.7436],
        [0.7491],
        [0.7384],
        [0.7759],
        [0.7791],
        [0.7573],
        [0.7690],
        [0.7472]], device='cuda:0', grad_fn=<SigmoidBackward>)
847
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.8008],
        [0.7771],
        [0.7819],
        [0.7908],
        [0.7939],
        [0.8106],
        [0.7949],
        [0.8064],
        [0.7916],
        [0.8193]], device='cuda:0', grad_fn=<SigmoidBackward>)
848
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
      

864
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.8501],
        [0.8649],
        [0.8582],
        [0.8531],
        [0.8610],
        [0.8622],
        [0.8569],
        [0.8535],
        [0.8533],
        [0.8565]], device='cuda:0', grad_fn=<SigmoidBackward>)
865
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.8754],
        [0.8694],
        [0.8725],
        [0.8679],
        [0.8656],
        [0.8563],
        [0.8617],
        [0.8658],
        [0.8733],
        [0.8661]], device='cuda:0', grad_fn=<SigmoidBackward>)
866
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
      

882
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.6625],
        [0.6835],
        [0.7007],
        [0.6917],
        [0.6868],
        [0.6826],
        [0.6898],
        [0.7053],
        [0.6769],
        [0.6967]], device='cuda:0', grad_fn=<SigmoidBackward>)
883
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6562],
        [0.6352],
        [0.6550],
        [0.6677],
        [0.6626],
        [0.6730],
        [0.6709],
        [0.6494],
        [0.6728],
        [0.6657]], device='cuda:0', grad_fn=<SigmoidBackward>)
884
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
      

900
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5460],
        [0.5532],
        [0.5784],
        [0.5872],
        [0.5765],
        [0.5751],
        [0.5741],
        [0.5815],
        [0.5577],
        [0.5617]], device='cuda:0', grad_fn=<SigmoidBackward>)
901
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.5441],
        [0.5278],
        [0.5448],
        [0.4771],
        [0.5426],
        [0.5330],
        [0.4761],
        [0.5245],
        [0.5417],
        [0.5305]], device='cuda:0', grad_fn=<SigmoidBackward>)
902
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
      

918
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.7522],
        [0.8481],
        [0.8348],
        [0.7445],
        [0.8515],
        [0.8503],
        [0.8310],
        [0.8311],
        [0.8577],
        [0.8937]], device='cuda:0', grad_fn=<SigmoidBackward>)
919
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.8395],
        [0.7725],
        [0.7876],
        [0.7220],
        [0.6804],
        [0.6988],
        [0.8344],
        [0.7877],
        [0.8058],
        [0.8035]], device='cuda:0', grad_fn=<SigmoidBackward>)
920
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
      

936
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.7179],
        [0.7456],
        [0.7068],
        [0.7664],
        [0.6886],
        [0.6968],
        [0.7236],
        [0.6906],
        [0.7531],
        [0.7581]], device='cuda:0', grad_fn=<SigmoidBackward>)
937
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.6509],
        [0.7129],
        [0.6950],
        [0.6948],
        [0.6926],
        [0.6788],
        [0.7192],
        [0.6858],
        [0.7214],
        [0.6682]], device='cuda:0', grad_fn=<SigmoidBackward>)
938
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
      

954
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5178],
        [0.5288],
        [0.5261],
        [0.5194],
        [0.5181],
        [0.5207],
        [0.5268],
        [0.5125],
        [0.5391],
        [0.5111]], device='cuda:0', grad_fn=<SigmoidBackward>)
955
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.5403],
        [0.5501],
        [0.5492],
        [0.5327],
        [0.5314],
        [0.5481],
        [0.5484],
        [0.5431],
        [0.5474],
        [0.5534]], device='cuda:0', grad_fn=<SigmoidBackward>)
956
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
      

972
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6260],
        [0.6473],
        [0.6426],
        [0.6554],
        [0.6588],
        [0.6551],
        [0.6557],
        [0.6500],
        [0.6450],
        [0.6322]], device='cuda:0', grad_fn=<SigmoidBackward>)
973
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.5825],
        [0.6045],
        [0.5803],
        [0.5655],
        [0.6001],
        [0.6003],
        [0.5980],
        [0.5789],
        [0.5781],
        [0.5849]], device='cuda:0', grad_fn=<SigmoidBackward>)
974
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
      

990
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.4595],
        [0.4543],
        [0.4682],
        [0.4520],
        [0.4565],
        [0.4542],
        [0.4337],
        [0.4562],
        [0.4550],
        [0.4599]], device='cuda:0', grad_fn=<SigmoidBackward>)
991
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.4598],
        [0.4790],
        [0.4584],
        [0.4749],
        [0.4463],
        [0.4644],
        [0.4582],
        [0.4734],
        [0.4578],
        [0.4565]], device='cuda:0', grad_fn=<SigmoidBackward>)
992
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
      

1008
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.5442],
        [0.5091],
        [0.5376],
        [0.5440],
        [0.5213],
        [0.5281],
        [0.5325],
        [0.5438],
        [0.5356],
        [0.5406]], device='cuda:0', grad_fn=<SigmoidBackward>)
1009
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5398],
        [0.5565],
        [0.5526],
        [0.5616],
        [0.5372],
        [0.5395],
        [0.5452],
        [0.5382],
        [0.5653],
        [0.5214]], device='cuda:0', grad_fn=<SigmoidBackward>)
1010
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
   

1026
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.4565],
        [0.4690],
        [0.4214],
        [0.4763],
        [0.4280],
        [0.4388],
        [0.4546],
        [0.5020],
        [0.4538],
        [0.4570]], device='cuda:0', grad_fn=<SigmoidBackward>)
1027
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.4293],
        [0.4485],
        [0.4219],
        [0.4585],
        [0.4612],
        [0.4011],
        [0.4423],
        [0.4643],
        [0.4290],
        [0.4634]], device='cuda:0', grad_fn=<SigmoidBackward>)
1028
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
   

1044
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.5539],
        [0.5139],
        [0.5067],
        [0.5034],
        [0.5647],
        [0.5581],
        [0.4937],
        [0.4993],
        [0.4981],
        [0.5531]], device='cuda:0', grad_fn=<SigmoidBackward>)
1045
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.5850],
        [0.6118],
        [0.5840],
        [0.5933],
        [0.5842],
        [0.5780],
        [0.5564],
        [0.5985],
        [0.5917],
        [0.5573]], device='cuda:0', grad_fn=<SigmoidBackward>)
1046
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
   

1062
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.]], device='cuda:0')
tensor([[0.5051],
        [0.5489],
        [0.5131],
        [0.5399],
        [0.5444],
        [0.4917],
        [0.5523],
        [0.5377],
        [0.5357],
        [0.4973]], device='cuda:0', grad_fn=<SigmoidBackward>)
1063
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.5555],
        [0.5463],
        [0.5572],
        [0.5583],
        [0.5653],
        [0.5783],
        [0.5592],
        [0.5519],
        [0.5356],
        [0.5198]], device='cuda:0', grad_fn=<SigmoidBackward>)
1064
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
   

1080
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.8037],
        [0.7990],
        [0.8050],
        [0.8001],
        [0.8087],
        [0.8102],
        [0.8105],
        [0.8032],
        [0.8101],
        [0.8027]], device='cuda:0', grad_fn=<SigmoidBackward>)
1081
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.7837],
        [0.7969],
        [0.7861],
        [0.7947],
        [0.7910],
        [0.7863],
        [0.7887],
        [0.7949],
        [0.7935],
        [0.7936]], device='cuda:0', grad_fn=<SigmoidBackward>)
1082
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
   

1098
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.6871],
        [0.6910],
        [0.6908],
        [0.6764],
        [0.6950],
        [0.6943],
        [0.6981],
        [0.6958],
        [0.6883],
        [0.6939]], device='cuda:0', grad_fn=<SigmoidBackward>)
1099
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.6179],
        [0.6152],
        [0.6097],
        [0.6097],
        [0.6019],
        [0.6038],
        [0.6038],
        [0.6025],
        [0.6147],
        [0.6129]], device='cuda:0', grad_fn=<SigmoidBackward>)
1100
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
   

1116
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.4281],
        [0.4325],
        [0.4318],
        [0.4332],
        [0.4377],
        [0.4441],
        [0.4268],
        [0.4456],
        [0.4319],
        [0.4295]], device='cuda:0', grad_fn=<SigmoidBackward>)
1117
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.]], device='cuda:0')
tensor([[0.3840],
        [0.3847],
        [0.3861],
        [0.3837],
        [0.3873],
        [0.3877],
        [0.3875],
        [0.3875],
        [0.3825],
        [0.3931]], device='cuda:0', grad_fn=<SigmoidBackward>)
1118
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
   

1134
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.]], device='cuda:0')
tensor([[0.5344],
        [0.5610],
        [0.5507],
        [0.5419],
        [0.5470],
        [0.5298],
        [0.5594],
        [0.5388],
        [0.5513],
        [0.5234]], device='cuda:0', grad_fn=<SigmoidBackward>)
1135
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.]], device='cuda:0')
tensor([[0.5426],
        [0.5764],
        [0.5506],
        [0.5482],
        [0.5665],
        [0.5781],
        [0.5801],
        [0.5421],
        [0.5817],
        [0.5769]], device='cuda:0', grad_fn=<SigmoidBackward>)
1136
torch.Size([10, 128]) torch.Size([10, 128])
tensor([[1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
   

KeyboardInterrupt: 