In [1]:
import librosa as lr
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import logging

from torch.utils.data import Dataset, DataLoader
from matplotlib import pyplot  


class VoiceSamples(Dataset):
    
    def __init__(self, core_name, samples_path=None, Automatic=None):
        
        self.Log = logging.getLogger()
        logging.basicConfig(level=logging.INFO)
        
        self.noiseThreshold = 1
        
        self.core_name = core_name
        self.samples_path = samples_path
        
        self.soundSamples = []
        self.sampleRate = []
        self.path = []
        
        self.chopedSamples = []
        self.chopedSr = []
        
        self.tensorMelgrams = []
        
        
        self.info = " VoiceSamples Object successfully created "
        self.Log.info(self.info)
        
        
        if Automatic:
            self.LoadSoundSamples()
            self.ChopToOneSecFragments()
            self.ChopedSignalsToTenosor()
        
    def __len__(self):
        return len(self.tensorMelgrams)
    
    def __getitem__(self, idx):
        if self.tensorMelgrams:
            return self.tensorMelgrams[idx]

    def LoadSoundSamples(self):
    
        n = 1

        while(True):
            try:
                if  self.samples_path:
                    path =  self.samples_path + self.core_name + str(n)
                else:
                    path = self.core_name + str(n)

                soundSample, sampleRate = lr.load(path)

                n += 1
                self.soundSamples.append(soundSample)
                self.sampleRate.append(sampleRate) 
                self.path.append(path)

                self.info = " Sample : " + path + " : successfully added"
                self.Log.info(self.info)

            except FileNotFoundError:
                if self.soundSamples:
                    self.info = "That's the end of database : " + str(n-1) + " : Samples added"
                    self.Log.info(self.info)
                    n = 0
                    
                    return self.soundSamples, self.sampleRate, self.path

                else:
                    self.Log.exception("Files are missing")
                    n = 0

                break

            except Exception as ex:      
                self.Log.exception("Unexpected error")
                break
        
    def getSoundSample(self, idx):
        return self.soundSamples[idx], self.sampleRate[idx]
    
    def getSoundSampleLen(self):
        try:
            if len(self.soundSamples) == len(self.sampleRate):
                return len(self.soundSamples)
            else:
                self.Log.warning("Lists: sundSamples and sampleRate are not equal!")
                
        except Exception as e:
            self.Log.exception("Unexpected error" + e)
    
    def ChopToOneSecFragments(self):
        
        # TODO: make shure user goes step by step 
        
        try:
            if len(self.soundSamples) == len(self.sampleRate):
                for idx in range(len(self.soundSamples)):
                    
                    soundSample = self.soundSamples[idx]
                    sr = self.sampleRate[idx]
                    
                    frag_max = math.trunc(len(soundSample)/float(sr))
                    step = math.trunc(sr/2);
                    last_sample = len(soundSample)

                    for frag in range(frag_max*2):
                        start = step * frag
                        stop = start + sr
                        if sr<len(soundSample):
                            if self.checkIfNotNoise(soundSample[start:stop]):
                                self.chopedSamples.append(soundSample[start:stop])
                                self.chopedSr.append(sr)
                                self.info = self.path[idx] + " : " + str(frag+1) + " : successfully choped"
                                self.Log.info(self.info)
                            else:
                                self.info = self.path[idx] + " : " + str(frag+1) + " : NOISE!"
                                self.Log.info(self.info)
                        else:
                            self.Log.warning("Something went wrong")
                            
                    if self.checkIfNotNoise(soundSample[last_sample-sr:last_sample]):
                         # incuding samples cuted by math.trunc() 
                        self.chopedSamples.append(soundSample[last_sample-sr:last_sample])
                        self.chopedSr.append(sr)
                        self.info = self.path[idx] +  " : "  + str(frag_max*2+1) + " : successfully choped"
                        self.Log.info(self.info)
                    else:
                        self.info = self.path[idx] + " : "  + str(frag+1) + " : NOISE!"
                        self.Log.info(self.info)
                
                if self.chopedSamples:
                    self.Log.info("Sucessfully choped all loaded signals and eliminated the noise!")
                    return self.chopedSamples, self.chopedSr 
                    
            else:
                self.Log.warning("Lists: sundSamples and sampleRate are not equal!")
                
        except Exception as e:
            self.e = "Unexpected error : " + str(e)
            self.Log.exception(self.e)
            
    def getChoped(self, idx):
        return self.chopedSamples[idx], self.chopedSr[idx]
        
    def getChopedLen(self):
        try:
            if len(self.chopedSamples) == len(self.chopedSr):
                    return len(self.chopedSamples)
            else:
                self.Log.warning("Lists: sundSamples and sampleRate are not equal!")
                
        except Exception as e:
            self.Log.exception("Unexpected error" + e)
            
        
    def ChopedSignalsToTenosor(self):
        
        # TODO: make shure user goes step by step 
        
        try:
        
            if len(self.chopedSamples) == len(self.chopedSr):
                for idx in range(len(self.chopedSamples)):

                    # hop length adjusted
                    STFT_signal = np.abs(lr.stft(self.chopedSamples[idx], n_fft = 512, hop_length = round(self.chopedSr[idx]/256))) 
                    STFT_signal = lr.power_to_db(STFT_signal**2,ref=np.max)

                    Melgram = STFT_signal[0:256,0:256]
                    TMelgram = torch.tensor(Melgram)
                    self.tensorMelgrams.append(TMelgram)
                    
                    self.info = " " + self.samples_path +  " : ChopedSample " + str(idx) + " : " + " : converted to tensor"
                    self.Log.info(self.info)
                
                if self.tensorMelgrams:
                    self.Log.info("Sucessfully converted all ChopedSamples to Tensors!")
                    return self.tensorMelgrams
                
            else:
                self.Log.warning("Lists: chopedSamples and chopedSr are not equal!")
                
        except Exception as e:
            self.e = "Unexpected error : " + str(e)
            self.Log.exception(self.e)
                
    
    
    def checkIfNotNoise(self, chopedSample):
    
        chopedSamplePow2 = []

        for n in range(len(chopedSample)):
            chopedSamplePow2.append(chopedSample[n]**2)
        sk = sum(chopedSamplePow2)
        if sk > self.noiseThreshold:
            return True 
        else:
            return False

In [2]:
# Use torch.save(tensor, 'file.pt') and torch.load('file.pt')

class VoiceSamplesInput():
    def __init__(self):
        
        self.vsKrystian = VoiceSamples("vsKrystian", samples_path="database/Krystian/" , Automatic=True)
        self.vsNicia = VoiceSamples("vsNicia", samples_path="database/Nicia/" , Automatic=True)

        self.targetKrystian = torch.tensor([[float(1),float(0)]])
        self.targetNicia = torch.tensor([[float(0),float(1)]])
        
        
    def __getitem__(self, idx):
        if idx % 2 == 0:
            return self.vsKrystian[int(idx/2)] ,  self.targetKrystian
        else:
            return self.vsNicia[int((idx+1)/2)] , self.targetNicia
    
    def __len__(self):
        if len(self.vsKrystian) <= len(self.vsNicia):
            return len(self.vsKrystian) * 2
        else:
            return len(self.vsNicia) * 2
            




In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class VoiceRecogModel(nn.Module):

    def __init__(self):
        super(VoiceRecogModel, self).__init__()
        # 1 input image channel, 6 output channels, 3x3 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 3)
        self.conv2 = nn.Conv2d(6, 16, 3)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16*62*62, 1000)  # ?? from image dimension
        self.fc2 = nn.Linear(1000, 84)
        self.fc3 = nn.Linear(84, 2)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), 2)
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [4]:
import torch.optim as optim

# create dataset : loading and processing samples to tensors
vsInput = VoiceSamplesInput()


INFO:root: VoiceSamples Object successfully created 
INFO:root: Sample : database/Krystian/vsKrystian1 : successfully added
INFO:root: Sample : database/Krystian/vsKrystian2 : successfully added
INFO:root:That's the end of database : 2 : Samples added
INFO:root:database/Krystian/vsKrystian1 : 1 : successfully choped
INFO:root:database/Krystian/vsKrystian1 : 2 : successfully choped
INFO:root:database/Krystian/vsKrystian1 : 3 : successfully choped
INFO:root:database/Krystian/vsKrystian1 : 4 : successfully choped
INFO:root:database/Krystian/vsKrystian1 : 5 : successfully choped
INFO:root:database/Krystian/vsKrystian1 : 6 : successfully choped
INFO:root:database/Krystian/vsKrystian1 : 7 : successfully choped
INFO:root:database/Krystian/vsKrystian1 : 8 : successfully choped
INFO:root:database/Krystian/vsKrystian1 : 9 : successfully choped
INFO:root:database/Krystian/vsKrystian1 : 10 : successfully choped
INFO:root:database/Krystian/vsKrystian1 : 11 : successfully choped
INFO:root:database/K

INFO:root:database/Krystian/vsKrystian2 : 94 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 95 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 96 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 97 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 98 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 99 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 100 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 101 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 102 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 103 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 104 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 105 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 106 : successfully choped
INFO:root:database/Krystian/vsKrystian2 : 107 : NOISE!
INFO:root:database/Krystian/vsKrystian2 : 108 : successfully choped

INFO:root: database/Krystian/ : ChopedSample 73 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 74 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 75 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 76 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 77 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 78 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 79 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 80 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 81 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 82 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 83 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 84 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 85 :  : converted to tensor
INFO:root: database/Krystian/ : ChopedSample 86 :  

INFO:root:database/Nicia/vsNicia1 : 37 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 38 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 39 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 40 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 41 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 42 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 43 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 44 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 45 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 46 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 47 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 48 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 49 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 50 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 51 : successfully choped
INFO:root:database/Nicia/vsNicia1 : 52 : successfully choped
INFO:root:database/Nicia

INFO:root:database/Nicia/vsNicia2 : 94 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 95 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 96 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 97 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 98 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 99 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 100 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 101 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 102 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 103 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 104 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 105 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 106 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 107 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 108 : successfully choped
INFO:root:database/Nicia/vsNicia2 : 109 : successfully choped
INFO:root:data

INFO:root: database/Nicia/ : ChopedSample 27 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 28 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 29 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 30 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 31 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 32 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 33 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 34 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 35 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 36 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 37 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 38 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 39 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 40 :  : converted to tensor
INFO:root: database/

INFO:root: database/Nicia/ : ChopedSample 144 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 145 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 146 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 147 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 148 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 149 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 150 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 151 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 152 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 153 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 154 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 155 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 156 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 157 :  : converted to tensor
INFO:r

INFO:root: database/Nicia/ : ChopedSample 260 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 261 :  : converted to tensor
INFO:root: database/Nicia/ : ChopedSample 262 :  : converted to tensor
INFO:root:Sucessfully converted all ChopedSamples to Tensors!


In [5]:
# create net from VoiceRecogModel
net = VoiceRecogModel()

# loss function (using function implemented in pytorch)
criterion = nn.MSELoss()

# create your optimizer (basic optimizer)
# setting learning rate 
optimizer = optim.SGD(net.parameters(), lr=0.05)

In [6]:
import logging
logging.basicConfig(level=logging.INFO)
trainLog = logging.getLogger()

# Training Loop

# How many times learn on the same dataset
epoch = 2 
for i in range(epoch):   
    for k in range(len(vsInput)):  

        vs, target = vsInput[k]

        vs_n = (vs+80)/80
        
        optimizer.zero_grad()   # zero the gradient buffers
        input = vs_n.view(-1,1,256,256)
        output = net(input)

        loss = criterion(output, target)

        loss.backward()
        optimizer.step()    # Does the update
        
        
        info = "Training " + str(k)+"/"+str(len(vsInput))+" done"
        
        print("\n")
        print(target)
        print(output)
        print(loss)
        print(info)

INFO:root:Training 0/286 done


tensor([[1., 0.]])
tensor([[ 0.0654, -0.0859]], grad_fn=<AddmmBackward>)
tensor(0.4404, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[ 0.1703, -0.0620]], grad_fn=<AddmmBackward>)
tensor(0.5784, grad_fn=<MseLossBackward>)


INFO:root:Training 1/286 done
INFO:root:Training 2/286 done


tensor([[1., 0.]])
tensor([[0.1909, 0.0858]], grad_fn=<AddmmBackward>)
tensor(0.3310, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.3615, 0.1164]], grad_fn=<AddmmBackward>)
tensor(0.4557, grad_fn=<MseLossBackward>)


INFO:root:Training 3/286 done
INFO:root:Training 4/286 done


tensor([[1., 0.]])
tensor([[0.4580, 0.4778]], grad_fn=<AddmmBackward>)
tensor(0.2610, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5871, 0.2119]], grad_fn=<AddmmBackward>)
tensor(0.4829, grad_fn=<MseLossBackward>)


INFO:root:Training 5/286 done
INFO:root:Training 6/286 done


tensor([[1., 0.]])
tensor([[0.3811, 0.3664]], grad_fn=<AddmmBackward>)
tensor(0.2586, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5483, 0.3266]], grad_fn=<AddmmBackward>)
tensor(0.3771, grad_fn=<MseLossBackward>)


INFO:root:Training 7/286 done
INFO:root:Training 8/286 done


tensor([[1., 0.]])
tensor([[0.4166, 0.4861]], grad_fn=<AddmmBackward>)
tensor(0.2883, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4707, 0.3095]], grad_fn=<AddmmBackward>)
tensor(0.3492, grad_fn=<MseLossBackward>)


INFO:root:Training 9/286 done
INFO:root:Training 10/286 done


tensor([[1., 0.]])
tensor([[0.3971, 0.4056]], grad_fn=<AddmmBackward>)
tensor(0.2640, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.6201, 0.4840]], grad_fn=<AddmmBackward>)
tensor(0.3254, grad_fn=<MseLossBackward>)


INFO:root:Training 11/286 done
INFO:root:Training 12/286 done


tensor([[1., 0.]])
tensor([[0.4012, 0.5014]], grad_fn=<AddmmBackward>)
tensor(0.3050, grad_fn=<MseLossBackward>)


INFO:root:Training 13/286 done


tensor([[0., 1.]])
tensor([[0.4126, 0.3109]], grad_fn=<AddmmBackward>)
tensor(0.3225, grad_fn=<MseLossBackward>)


INFO:root:Training 14/286 done


tensor([[1., 0.]])
tensor([[0.4907, 0.5820]], grad_fn=<AddmmBackward>)
tensor(0.2991, grad_fn=<MseLossBackward>)


INFO:root:Training 15/286 done


tensor([[0., 1.]])
tensor([[0.4884, 0.3292]], grad_fn=<AddmmBackward>)
tensor(0.3442, grad_fn=<MseLossBackward>)


INFO:root:Training 16/286 done


tensor([[1., 0.]])
tensor([[0.4086, 0.4264]], grad_fn=<AddmmBackward>)
tensor(0.2658, grad_fn=<MseLossBackward>)


INFO:root:Training 17/286 done


tensor([[0., 1.]])
tensor([[0.5665, 0.4360]], grad_fn=<AddmmBackward>)
tensor(0.3195, grad_fn=<MseLossBackward>)


INFO:root:Training 18/286 done


tensor([[1., 0.]])
tensor([[0.4666, 0.5423]], grad_fn=<AddmmBackward>)
tensor(0.2893, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.3923, 0.2815]], grad_fn=<AddmmBackward>)
tensor(0.3351, grad_fn=<MseLossBackward>)


INFO:root:Training 19/286 done
INFO:root:Training 20/286 done


tensor([[1., 0.]])
tensor([[0.4892, 0.5052]], grad_fn=<AddmmBackward>)
tensor(0.2581, grad_fn=<MseLossBackward>)


INFO:root:Training 21/286 done


tensor([[0., 1.]])
tensor([[0.5027, 0.3704]], grad_fn=<AddmmBackward>)
tensor(0.3245, grad_fn=<MseLossBackward>)


INFO:root:Training 22/286 done


tensor([[1., 0.]])
tensor([[0.5123, 0.5202]], grad_fn=<AddmmBackward>)
tensor(0.2542, grad_fn=<MseLossBackward>)


INFO:root:Training 23/286 done


tensor([[0., 1.]])
tensor([[0.5172, 0.3803]], grad_fn=<AddmmBackward>)
tensor(0.3258, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4225, 0.4127]], grad_fn=<AddmmBackward>)
tensor(0.2519, grad_fn=<MseLossBackward>)


INFO:root:Training 24/286 done
INFO:root:Training 25/286 done


tensor([[0., 1.]])
tensor([[0.6565, 0.5353]], grad_fn=<AddmmBackward>)
tensor(0.3235, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.3307, 0.3516]], grad_fn=<AddmmBackward>)
tensor(0.2858, grad_fn=<MseLossBackward>)


INFO:root:Training 26/286 done
INFO:root:Training 27/286 done


tensor([[0., 1.]])
tensor([[0.4868, 0.4539]], grad_fn=<AddmmBackward>)
tensor(0.2676, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4595, 0.5621]], grad_fn=<AddmmBackward>)
tensor(0.3040, grad_fn=<MseLossBackward>)


INFO:root:Training 28/286 done
INFO:root:Training 29/286 done


tensor([[0., 1.]])
tensor([[0.4397, 0.3823]], grad_fn=<AddmmBackward>)
tensor(0.2874, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5499, 0.6520]], grad_fn=<AddmmBackward>)
tensor(0.3139, grad_fn=<MseLossBackward>)


INFO:root:Training 30/286 done
INFO:root:Training 31/286 done


tensor([[0., 1.]])
tensor([[0.3250, 0.2266]], grad_fn=<AddmmBackward>)
tensor(0.3519, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4241, 0.3998]], grad_fn=<AddmmBackward>)
tensor(0.2457, grad_fn=<MseLossBackward>)


INFO:root:Training 32/286 done
INFO:root:Training 33/286 done


tensor([[0., 1.]])
tensor([[0.5742, 0.4402]], grad_fn=<AddmmBackward>)
tensor(0.3216, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4631, 0.4780]], grad_fn=<AddmmBackward>)
tensor(0.2584, grad_fn=<MseLossBackward>)


INFO:root:Training 34/286 done
INFO:root:Training 35/286 done


tensor([[0., 1.]])
tensor([[0.4782, 0.3811]], grad_fn=<AddmmBackward>)
tensor(0.3058, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5420, 0.5510]], grad_fn=<AddmmBackward>)
tensor(0.2567, grad_fn=<MseLossBackward>)


INFO:root:Training 36/286 done
INFO:root:Training 37/286 done


tensor([[0., 1.]])
tensor([[0.4007, 0.3086]], grad_fn=<AddmmBackward>)
tensor(0.3193, grad_fn=<MseLossBackward>)


INFO:root:Training 38/286 done


tensor([[1., 0.]])
tensor([[0.5597, 0.5141]], grad_fn=<AddmmBackward>)
tensor(0.2290, grad_fn=<MseLossBackward>)


INFO:root:Training 39/286 done


tensor([[0., 1.]])
tensor([[0.4512, 0.3410]], grad_fn=<AddmmBackward>)
tensor(0.3189, grad_fn=<MseLossBackward>)


INFO:root:Training 40/286 done


tensor([[1., 0.]])
tensor([[0.5197, 0.4548]], grad_fn=<AddmmBackward>)
tensor(0.2188, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5672, 0.4275]], grad_fn=<AddmmBackward>)
tensor(0.3248, grad_fn=<MseLossBackward>)


INFO:root:Training 41/286 done
INFO:root:Training 42/286 done


tensor([[1., 0.]])
tensor([[0.5086, 0.4843]], grad_fn=<AddmmBackward>)
tensor(0.2380, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5201, 0.3788]], grad_fn=<AddmmBackward>)
tensor(0.3282, grad_fn=<MseLossBackward>)


INFO:root:Training 43/286 done
INFO:root:Training 44/286 done


tensor([[1., 0.]])
tensor([[0.4390, 0.4428]], grad_fn=<AddmmBackward>)
tensor(0.2554, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5758, 0.4533]], grad_fn=<AddmmBackward>)
tensor(0.3152, grad_fn=<MseLossBackward>)


INFO:root:Training 45/286 done
INFO:root:Training 46/286 done


tensor([[1., 0.]])
tensor([[0.4046, 0.4420]], grad_fn=<AddmmBackward>)
tensor(0.2749, grad_fn=<MseLossBackward>)


INFO:root:Training 47/286 done


tensor([[0., 1.]])
tensor([[0.5175, 0.4659]], grad_fn=<AddmmBackward>)
tensor(0.2765, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5040, 0.5425]], grad_fn=<AddmmBackward>)
tensor(0.2701, grad_fn=<MseLossBackward>)


INFO:root:Training 48/286 done
INFO:root:Training 49/286 done


tensor([[0., 1.]])
tensor([[0.4613, 0.4201]], grad_fn=<AddmmBackward>)
tensor(0.2746, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4645, 0.4866]], grad_fn=<AddmmBackward>)
tensor(0.2618, grad_fn=<MseLossBackward>)


INFO:root:Training 50/286 done
INFO:root:Training 51/286 done


tensor([[0., 1.]])
tensor([[0.5394, 0.4975]], grad_fn=<AddmmBackward>)
tensor(0.2718, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4436, 0.4692]], grad_fn=<AddmmBackward>)
tensor(0.2649, grad_fn=<MseLossBackward>)


INFO:root:Training 52/286 done
INFO:root:Training 53/286 done


tensor([[0., 1.]])
tensor([[0.5662, 0.5348]], grad_fn=<AddmmBackward>)
tensor(0.2685, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.3927, 0.4091]], grad_fn=<AddmmBackward>)
tensor(0.2681, grad_fn=<MseLossBackward>)


INFO:root:Training 54/286 done
INFO:root:Training 55/286 done


tensor([[0., 1.]])
tensor([[0.5411, 0.5342]], grad_fn=<AddmmBackward>)
tensor(0.2549, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.3472, 0.4522]], grad_fn=<AddmmBackward>)
tensor(0.3153, grad_fn=<MseLossBackward>)


INFO:root:Training 56/286 done
INFO:root:Training 57/286 done


tensor([[0., 1.]])
tensor([[0.4761, 0.4707]], grad_fn=<AddmmBackward>)
tensor(0.2534, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4247, 0.5522]], grad_fn=<AddmmBackward>)
tensor(0.3180, grad_fn=<MseLossBackward>)


INFO:root:Training 58/286 done
INFO:root:Training 59/286 done


tensor([[0., 1.]])
tensor([[0.4987, 0.4828]], grad_fn=<AddmmBackward>)
tensor(0.2581, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4279, 0.5015]], grad_fn=<AddmmBackward>)
tensor(0.2894, grad_fn=<MseLossBackward>)


INFO:root:Training 60/286 done
INFO:root:Training 61/286 done


tensor([[0., 1.]])
tensor([[0.4850, 0.4822]], grad_fn=<AddmmBackward>)
tensor(0.2516, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4437, 0.5238]], grad_fn=<AddmmBackward>)
tensor(0.2920, grad_fn=<MseLossBackward>)


INFO:root:Training 62/286 done
INFO:root:Training 63/286 done


tensor([[0., 1.]])
tensor([[0.4804, 0.4792]], grad_fn=<AddmmBackward>)
tensor(0.2510, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4803, 0.5445]], grad_fn=<AddmmBackward>)
tensor(0.2832, grad_fn=<MseLossBackward>)


INFO:root:Training 64/286 done
INFO:root:Training 65/286 done


tensor([[0., 1.]])
tensor([[0.4849, 0.4585]], grad_fn=<AddmmBackward>)
tensor(0.2642, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4892, 0.5148]], grad_fn=<AddmmBackward>)
tensor(0.2629, grad_fn=<MseLossBackward>)


INFO:root:Training 66/286 done
INFO:root:Training 67/286 done


tensor([[0., 1.]])
tensor([[0.5264, 0.4850]], grad_fn=<AddmmBackward>)
tensor(0.2711, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4082, 0.4407]], grad_fn=<AddmmBackward>)
tensor(0.2722, grad_fn=<MseLossBackward>)


INFO:root:Training 68/286 done
INFO:root:Training 69/286 done


tensor([[0., 1.]])
tensor([[0.5228, 0.5100]], grad_fn=<AddmmBackward>)
tensor(0.2567, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4546, 0.5057]], grad_fn=<AddmmBackward>)
tensor(0.2766, grad_fn=<MseLossBackward>)


INFO:root:Training 70/286 done
INFO:root:Training 71/286 done


tensor([[0., 1.]])
tensor([[0.4675, 0.4315]], grad_fn=<AddmmBackward>)
tensor(0.2709, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4560, 0.5255]], grad_fn=<AddmmBackward>)
tensor(0.2861, grad_fn=<MseLossBackward>)


INFO:root:Training 72/286 done
INFO:root:Training 73/286 done


tensor([[0., 1.]])
tensor([[0.5285, 0.4821]], grad_fn=<AddmmBackward>)
tensor(0.2738, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4457, 0.4950]], grad_fn=<AddmmBackward>)
tensor(0.2762, grad_fn=<MseLossBackward>)


INFO:root:Training 74/286 done
INFO:root:Training 75/286 done


tensor([[0., 1.]])
tensor([[0.5148, 0.4956]], grad_fn=<AddmmBackward>)
tensor(0.2597, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4916, 0.5438]], grad_fn=<AddmmBackward>)
tensor(0.2771, grad_fn=<MseLossBackward>)


INFO:root:Training 76/286 done
INFO:root:Training 77/286 done


tensor([[0., 1.]])
tensor([[0.4764, 0.4517]], grad_fn=<AddmmBackward>)
tensor(0.2638, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4178, 0.4719]], grad_fn=<AddmmBackward>)
tensor(0.2808, grad_fn=<MseLossBackward>)


INFO:root:Training 78/286 done
INFO:root:Training 79/286 done


tensor([[0., 1.]])
tensor([[0.4911, 0.4778]], grad_fn=<AddmmBackward>)
tensor(0.2569, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4634, 0.5185]], grad_fn=<AddmmBackward>)
tensor(0.2784, grad_fn=<MseLossBackward>)


INFO:root:Training 80/286 done
INFO:root:Training 81/286 done


tensor([[0., 1.]])
tensor([[0.4868, 0.4663]], grad_fn=<AddmmBackward>)
tensor(0.2609, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5044, 0.5637]], grad_fn=<AddmmBackward>)
tensor(0.2817, grad_fn=<MseLossBackward>)


INFO:root:Training 82/286 done
INFO:root:Training 83/286 done


tensor([[0., 1.]])
tensor([[0.4851, 0.4183]], grad_fn=<AddmmBackward>)
tensor(0.2868, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4861, 0.4851]], grad_fn=<AddmmBackward>)
tensor(0.2497, grad_fn=<MseLossBackward>)


INFO:root:Training 84/286 done
INFO:root:Training 85/286 done


tensor([[0., 1.]])
tensor([[0.5387, 0.4889]], grad_fn=<AddmmBackward>)
tensor(0.2757, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4510, 0.4756]], grad_fn=<AddmmBackward>)
tensor(0.2638, grad_fn=<MseLossBackward>)


INFO:root:Training 86/286 done
INFO:root:Training 87/286 done


tensor([[0., 1.]])
tensor([[0.5135, 0.4511]], grad_fn=<AddmmBackward>)
tensor(0.2825, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4548, 0.5116]], grad_fn=<AddmmBackward>)
tensor(0.2795, grad_fn=<MseLossBackward>)


INFO:root:Training 88/286 done
INFO:root:Training 89/286 done


tensor([[0., 1.]])
tensor([[0.4804, 0.4228]], grad_fn=<AddmmBackward>)
tensor(0.2820, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4676, 0.5111]], grad_fn=<AddmmBackward>)
tensor(0.2724, grad_fn=<MseLossBackward>)


INFO:root:Training 90/286 done
INFO:root:Training 91/286 done


tensor([[0., 1.]])
tensor([[0.4872, 0.4444]], grad_fn=<AddmmBackward>)
tensor(0.2730, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4653, 0.5007]], grad_fn=<AddmmBackward>)
tensor(0.2683, grad_fn=<MseLossBackward>)


INFO:root:Training 92/286 done
INFO:root:Training 93/286 done


tensor([[0., 1.]])
tensor([[0.5165, 0.4502]], grad_fn=<AddmmBackward>)
tensor(0.2846, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4383, 0.4422]], grad_fn=<AddmmBackward>)
tensor(0.2555, grad_fn=<MseLossBackward>)


INFO:root:Training 94/286 done
INFO:root:Training 95/286 done


tensor([[0., 1.]])
tensor([[0.5594, 0.4865]], grad_fn=<AddmmBackward>)
tensor(0.2883, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4239, 0.4700]], grad_fn=<AddmmBackward>)
tensor(0.2764, grad_fn=<MseLossBackward>)


INFO:root:Training 96/286 done
INFO:root:Training 97/286 done


tensor([[0., 1.]])
tensor([[0.5448, 0.5203]], grad_fn=<AddmmBackward>)
tensor(0.2634, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4367, 0.4729]], grad_fn=<AddmmBackward>)
tensor(0.2705, grad_fn=<MseLossBackward>)


INFO:root:Training 98/286 done
INFO:root:Training 99/286 done


tensor([[0., 1.]])
tensor([[0.4802, 0.4785]], grad_fn=<AddmmBackward>)
tensor(0.2513, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4767, 0.5318]], grad_fn=<AddmmBackward>)
tensor(0.2783, grad_fn=<MseLossBackward>)


INFO:root:Training 100/286 done
INFO:root:Training 101/286 done


tensor([[0., 1.]])
tensor([[0.4457, 0.4308]], grad_fn=<AddmmBackward>)
tensor(0.2613, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5219, 0.5704]], grad_fn=<AddmmBackward>)
tensor(0.2769, grad_fn=<MseLossBackward>)


INFO:root:Training 102/286 done
INFO:root:Training 103/286 done


tensor([[0., 1.]])
tensor([[0.4676, 0.4550]], grad_fn=<AddmmBackward>)
tensor(0.2579, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4848, 0.5046]], grad_fn=<AddmmBackward>)
tensor(0.2601, grad_fn=<MseLossBackward>)


INFO:root:Training 104/286 done
INFO:root:Training 105/286 done


tensor([[0., 1.]])
tensor([[0.5311, 0.4899]], grad_fn=<AddmmBackward>)
tensor(0.2711, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4347, 0.4187]], grad_fn=<AddmmBackward>)
tensor(0.2474, grad_fn=<MseLossBackward>)


INFO:root:Training 106/286 done
INFO:root:Training 107/286 done


tensor([[0., 1.]])
tensor([[0.5815, 0.5570]], grad_fn=<AddmmBackward>)
tensor(0.2672, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4593, 0.4378]], grad_fn=<AddmmBackward>)
tensor(0.2420, grad_fn=<MseLossBackward>)


INFO:root:Training 108/286 done
INFO:root:Training 109/286 done


tensor([[0., 1.]])
tensor([[0.5143, 0.5179]], grad_fn=<AddmmBackward>)
tensor(0.2485, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4723, 0.5380]], grad_fn=<AddmmBackward>)
tensor(0.2840, grad_fn=<MseLossBackward>)


INFO:root:Training 110/286 done
INFO:root:Training 111/286 done


tensor([[0., 1.]])
tensor([[0.4783, 0.4200]], grad_fn=<AddmmBackward>)
tensor(0.2826, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4243, 0.4564]], grad_fn=<AddmmBackward>)
tensor(0.2699, grad_fn=<MseLossBackward>)


INFO:root:Training 112/286 done
INFO:root:Training 113/286 done


tensor([[0., 1.]])
tensor([[0.5138, 0.4975]], grad_fn=<AddmmBackward>)
tensor(0.2583, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4336, 0.5444]], grad_fn=<AddmmBackward>)
tensor(0.3086, grad_fn=<MseLossBackward>)


INFO:root:Training 114/286 done
INFO:root:Training 115/286 done


tensor([[0., 1.]])
tensor([[0.4736, 0.4827]], grad_fn=<AddmmBackward>)
tensor(0.2460, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4694, 0.5556]], grad_fn=<AddmmBackward>)
tensor(0.2951, grad_fn=<MseLossBackward>)


INFO:root:Training 116/286 done
INFO:root:Training 117/286 done


tensor([[0., 1.]])
tensor([[0.4873, 0.4703]], grad_fn=<AddmmBackward>)
tensor(0.2590, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4470, 0.5036]], grad_fn=<AddmmBackward>)
tensor(0.2797, grad_fn=<MseLossBackward>)


INFO:root:Training 118/286 done
INFO:root:Training 119/286 done


tensor([[0., 1.]])
tensor([[0.4891, 0.4540]], grad_fn=<AddmmBackward>)
tensor(0.2687, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4800, 0.5350]], grad_fn=<AddmmBackward>)
tensor(0.2783, grad_fn=<MseLossBackward>)


INFO:root:Training 120/286 done
INFO:root:Training 121/286 done


tensor([[0., 1.]])
tensor([[0.4904, 0.4513]], grad_fn=<AddmmBackward>)
tensor(0.2708, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4540, 0.4773]], grad_fn=<AddmmBackward>)
tensor(0.2629, grad_fn=<MseLossBackward>)


INFO:root:Training 122/286 done
INFO:root:Training 123/286 done


tensor([[0., 1.]])
tensor([[0.5001, 0.4738]], grad_fn=<AddmmBackward>)
tensor(0.2635, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5114, 0.6008]], grad_fn=<AddmmBackward>)
tensor(0.2999, grad_fn=<MseLossBackward>)


INFO:root:Training 124/286 done
INFO:root:Training 125/286 done


tensor([[0., 1.]])
tensor([[0.4402, 0.3510]], grad_fn=<AddmmBackward>)
tensor(0.3075, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4744, 0.5162]], grad_fn=<AddmmBackward>)
tensor(0.2713, grad_fn=<MseLossBackward>)


INFO:root:Training 126/286 done
INFO:root:Training 127/286 done


tensor([[0., 1.]])
tensor([[0.4967, 0.4364]], grad_fn=<AddmmBackward>)
tensor(0.2822, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4478, 0.4511]], grad_fn=<AddmmBackward>)
tensor(0.2542, grad_fn=<MseLossBackward>)


INFO:root:Training 128/286 done
INFO:root:Training 129/286 done


tensor([[0., 1.]])
tensor([[0.5604, 0.5269]], grad_fn=<AddmmBackward>)
tensor(0.2690, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4575, 0.4515]], grad_fn=<AddmmBackward>)
tensor(0.2491, grad_fn=<MseLossBackward>)


INFO:root:Training 130/286 done
INFO:root:Training 131/286 done


tensor([[0., 1.]])
tensor([[0.5332, 0.5046]], grad_fn=<AddmmBackward>)
tensor(0.2649, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4717, 0.4704]], grad_fn=<AddmmBackward>)
tensor(0.2502, grad_fn=<MseLossBackward>)


INFO:root:Training 132/286 done
INFO:root:Training 133/286 done


tensor([[0., 1.]])
tensor([[0.5424, 0.4895]], grad_fn=<AddmmBackward>)
tensor(0.2775, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4578, 0.4591]], grad_fn=<AddmmBackward>)
tensor(0.2524, grad_fn=<MseLossBackward>)


INFO:root:Training 134/286 done
INFO:root:Training 135/286 done


tensor([[0., 1.]])
tensor([[0.4803, 0.4630]], grad_fn=<AddmmBackward>)
tensor(0.2595, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4975, 0.5307]], grad_fn=<AddmmBackward>)
tensor(0.2671, grad_fn=<MseLossBackward>)


INFO:root:Training 136/286 done
INFO:root:Training 137/286 done


tensor([[0., 1.]])
tensor([[0.4705, 0.4518]], grad_fn=<AddmmBackward>)
tensor(0.2609, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4810, 0.5260]], grad_fn=<AddmmBackward>)
tensor(0.2730, grad_fn=<MseLossBackward>)


INFO:root:Training 138/286 done
INFO:root:Training 139/286 done


tensor([[0., 1.]])
tensor([[0.5193, 0.4569]], grad_fn=<AddmmBackward>)
tensor(0.2823, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4486, 0.4992]], grad_fn=<AddmmBackward>)
tensor(0.2766, grad_fn=<MseLossBackward>)


INFO:root:Training 140/286 done
INFO:root:Training 141/286 done


tensor([[0., 1.]])
tensor([[0.4786, 0.4394]], grad_fn=<AddmmBackward>)
tensor(0.2717, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4718, 0.4882]], grad_fn=<AddmmBackward>)
tensor(0.2587, grad_fn=<MseLossBackward>)


INFO:root:Training 142/286 done
INFO:root:Training 143/286 done


tensor([[0., 1.]])
tensor([[0.4998, 0.4900]], grad_fn=<AddmmBackward>)
tensor(0.2550, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4405, 0.4522]], grad_fn=<AddmmBackward>)
tensor(0.2588, grad_fn=<MseLossBackward>)


INFO:root:Training 144/286 done
INFO:root:Training 145/286 done


tensor([[0., 1.]])
tensor([[0.5491, 0.5101]], grad_fn=<AddmmBackward>)
tensor(0.2707, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4721, 0.4824]], grad_fn=<AddmmBackward>)
tensor(0.2557, grad_fn=<MseLossBackward>)


INFO:root:Training 146/286 done
INFO:root:Training 147/286 done


tensor([[0., 1.]])
tensor([[0.5255, 0.5176]], grad_fn=<AddmmBackward>)
tensor(0.2544, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5106, 0.5177]], grad_fn=<AddmmBackward>)
tensor(0.2537, grad_fn=<MseLossBackward>)


INFO:root:Training 148/286 done
INFO:root:Training 149/286 done


tensor([[0., 1.]])
tensor([[0.5159, 0.4875]], grad_fn=<AddmmBackward>)
tensor(0.2644, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4709, 0.4945]], grad_fn=<AddmmBackward>)
tensor(0.2622, grad_fn=<MseLossBackward>)


INFO:root:Training 150/286 done
INFO:root:Training 151/286 done


tensor([[0., 1.]])
tensor([[0.4643, 0.3693]], grad_fn=<AddmmBackward>)
tensor(0.3067, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4920, 0.4890]], grad_fn=<AddmmBackward>)
tensor(0.2486, grad_fn=<MseLossBackward>)


INFO:root:Training 152/286 done
INFO:root:Training 153/286 done


tensor([[0., 1.]])
tensor([[0.4766, 0.3915]], grad_fn=<AddmmBackward>)
tensor(0.2987, grad_fn=<MseLossBackward>)


INFO:root:Training 154/286 done


tensor([[1., 0.]])
tensor([[0.5093, 0.5155]], grad_fn=<AddmmBackward>)
tensor(0.2533, grad_fn=<MseLossBackward>)


INFO:root:Training 155/286 done


tensor([[0., 1.]])
tensor([[0.5203, 0.4457]], grad_fn=<AddmmBackward>)
tensor(0.2890, grad_fn=<MseLossBackward>)


INFO:root:Training 156/286 done


tensor([[1., 0.]])
tensor([[0.4536, 0.4677]], grad_fn=<AddmmBackward>)
tensor(0.2587, grad_fn=<MseLossBackward>)


INFO:root:Training 157/286 done


tensor([[0., 1.]])
tensor([[0.5396, 0.4673]], grad_fn=<AddmmBackward>)
tensor(0.2875, grad_fn=<MseLossBackward>)


INFO:root:Training 158/286 done


tensor([[1., 0.]])
tensor([[0.4180, 0.4465]], grad_fn=<AddmmBackward>)
tensor(0.2690, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5399, 0.5224]], grad_fn=<AddmmBackward>)
tensor(0.2598, grad_fn=<MseLossBackward>)


INFO:root:Training 159/286 done
INFO:root:Training 160/286 done


tensor([[1., 0.]])
tensor([[0.4371, 0.4961]], grad_fn=<AddmmBackward>)
tensor(0.2815, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4989, 0.5144]], grad_fn=<AddmmBackward>)
tensor(0.2424, grad_fn=<MseLossBackward>)


INFO:root:Training 161/286 done
INFO:root:Training 162/286 done


tensor([[1., 0.]])
tensor([[0.4990, 0.4884]], grad_fn=<AddmmBackward>)
tensor(0.2448, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5033, 0.4992]], grad_fn=<AddmmBackward>)
tensor(0.2521, grad_fn=<MseLossBackward>)


INFO:root:Training 163/286 done
INFO:root:Training 164/286 done


tensor([[1., 0.]])
tensor([[0.5371, 0.5091]], grad_fn=<AddmmBackward>)
tensor(0.2367, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5114, 0.4443]], grad_fn=<AddmmBackward>)
tensor(0.2852, grad_fn=<MseLossBackward>)


INFO:root:Training 165/286 done
INFO:root:Training 166/286 done


tensor([[1., 0.]])
tensor([[0.4835, 0.5203]], grad_fn=<AddmmBackward>)
tensor(0.2687, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4861, 0.4434]], grad_fn=<AddmmBackward>)
tensor(0.2731, grad_fn=<MseLossBackward>)


INFO:root:Training 167/286 done
INFO:root:Training 168/286 done


tensor([[1., 0.]])
tensor([[0.4694, 0.5064]], grad_fn=<AddmmBackward>)
tensor(0.2690, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5080, 0.5270]], grad_fn=<AddmmBackward>)
tensor(0.2409, grad_fn=<MseLossBackward>)


INFO:root:Training 169/286 done
INFO:root:Training 170/286 done


tensor([[1., 0.]])
tensor([[0.4152, 0.4299]], grad_fn=<AddmmBackward>)
tensor(0.2634, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5117, 0.5559]], grad_fn=<AddmmBackward>)
tensor(0.2295, grad_fn=<MseLossBackward>)


INFO:root:Training 171/286 done
INFO:root:Training 172/286 done


tensor([[1., 0.]])
tensor([[0.4650, 0.5497]], grad_fn=<AddmmBackward>)
tensor(0.2942, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4683, 0.4648]], grad_fn=<AddmmBackward>)
tensor(0.2528, grad_fn=<MseLossBackward>)


INFO:root:Training 173/286 done
INFO:root:Training 174/286 done


tensor([[1., 0.]])
tensor([[0.5110, 0.5741]], grad_fn=<AddmmBackward>)
tensor(0.2843, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4452, 0.4028]], grad_fn=<AddmmBackward>)

INFO:root:Training 175/286 done



tensor(0.2774, grad_fn=<MseLossBackward>)


INFO:root:Training 176/286 done


tensor([[1., 0.]])
tensor([[0.4689, 0.5206]], grad_fn=<AddmmBackward>)
tensor(0.2766, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4836, 0.3882]], grad_fn=<AddmmBackward>)
tensor(0.3041, grad_fn=<MseLossBackward>)


INFO:root:Training 177/286 done
INFO:root:Training 178/286 done


tensor([[1., 0.]])
tensor([[0.4677, 0.4997]], grad_fn=<AddmmBackward>)
tensor(0.2665, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4814, 0.3864]], grad_fn=<AddmmBackward>)
tensor(0.3042, grad_fn=<MseLossBackward>)


INFO:root:Training 179/286 done
INFO:root:Training 180/286 done


tensor([[1., 0.]])
tensor([[0.4852, 0.4760]], grad_fn=<AddmmBackward>)
tensor(0.2458, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5386, 0.4404]], grad_fn=<AddmmBackward>)
tensor(0.3016, grad_fn=<MseLossBackward>)


INFO:root:Training 181/286 done
INFO:root:Training 182/286 done


tensor([[1., 0.]])
tensor([[0.4720, 0.4750]], grad_fn=<AddmmBackward>)
tensor(0.2522, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4902, 0.4130]], grad_fn=<AddmmBackward>)
tensor(0.2924, grad_fn=<MseLossBackward>)


INFO:root:Training 183/286 done
INFO:root:Training 184/286 done


tensor([[1., 0.]])
tensor([[0.4908, 0.5195]], grad_fn=<AddmmBackward>)
tensor(0.2646, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4658, 0.4101]], grad_fn=<AddmmBackward>)
tensor(0.2825, grad_fn=<MseLossBackward>)


INFO:root:Training 185/286 done
INFO:root:Training 186/286 done


tensor([[1., 0.]])
tensor([[0.5029, 0.5019]], grad_fn=<AddmmBackward>)
tensor(0.2495, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5051, 0.4533]], grad_fn=<AddmmBackward>)
tensor(0.2770, grad_fn=<MseLossBackward>)


INFO:root:Training 187/286 done
INFO:root:Training 188/286 done


tensor([[1., 0.]])
tensor([[0.5040, 0.4903]], grad_fn=<AddmmBackward>)
tensor(0.2432, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5430, 0.5056]], grad_fn=<AddmmBackward>)
tensor(0.2696, grad_fn=<MseLossBackward>)


INFO:root:Training 189/286 done
INFO:root:Training 190/286 done


tensor([[1., 0.]])
tensor([[0.4936, 0.4893]], grad_fn=<AddmmBackward>)
tensor(0.2480, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5464, 0.4988]], grad_fn=<AddmmBackward>)
tensor(0.2749, grad_fn=<MseLossBackward>)


INFO:root:Training 191/286 done
INFO:root:Training 192/286 done


tensor([[1., 0.]])
tensor([[0.4535, 0.4861]], grad_fn=<AddmmBackward>)
tensor(0.2675, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4900, 0.4788]], grad_fn=<AddmmBackward>)
tensor(0.2559, grad_fn=<MseLossBackward>)


INFO:root:Training 193/286 done
INFO:root:Training 194/286 done


tensor([[1., 0.]])
tensor([[0.4742, 0.5237]], grad_fn=<AddmmBackward>)
tensor(0.2754, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4867, 0.4824]], grad_fn=<AddmmBackward>)
tensor(0.2524, grad_fn=<MseLossBackward>)


INFO:root:Training 195/286 done
INFO:root:Training 196/286 done


tensor([[1., 0.]])
tensor([[0.5038, 0.5365]], grad_fn=<AddmmBackward>)
tensor(0.2670, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4924, 0.4712]], grad_fn=<AddmmBackward>)
tensor(0.2610, grad_fn=<MseLossBackward>)


INFO:root:Training 197/286 done
INFO:root:Training 198/286 done


tensor([[1., 0.]])
tensor([[0.5007, 0.5103]], grad_fn=<AddmmBackward>)
tensor(0.2549, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4736, 0.4204]], grad_fn=<AddmmBackward>)
tensor(0.2801, grad_fn=<MseLossBackward>)


INFO:root:Training 199/286 done
INFO:root:Training 200/286 done


tensor([[1., 0.]])
tensor([[0.4979, 0.5096]], grad_fn=<AddmmBackward>)
tensor(0.2559, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5082, 0.4699]], grad_fn=<AddmmBackward>)
tensor(0.2696, grad_fn=<MseLossBackward>)


INFO:root:Training 201/286 done
INFO:root:Training 202/286 done


tensor([[1., 0.]])
tensor([[0.4770, 0.4526]], grad_fn=<AddmmBackward>)
tensor(0.2392, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5439, 0.5400]], grad_fn=<AddmmBackward>)
tensor(0.2537, grad_fn=<MseLossBackward>)


INFO:root:Training 203/286 done
INFO:root:Training 204/286 done


tensor([[1., 0.]])
tensor([[0.4918, 0.4785]], grad_fn=<AddmmBackward>)
tensor(0.2436, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5201, 0.5344]], grad_fn=<AddmmBackward>)
tensor(0.2436, grad_fn=<MseLossBackward>)


INFO:root:Training 205/286 done
INFO:root:Training 206/286 done


tensor([[1., 0.]])
tensor([[0.4675, 0.5398]], grad_fn=<AddmmBackward>)
tensor(0.2875, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4771, 0.4940]], grad_fn=<AddmmBackward>)
tensor(0.2419, grad_fn=<MseLossBackward>)


INFO:root:Training 207/286 done
INFO:root:Training 208/286 done


tensor([[1., 0.]])
tensor([[0.4635, 0.4920]], grad_fn=<AddmmBackward>)
tensor(0.2650, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5168, 0.5353]], grad_fn=<AddmmBackward>)
tensor(0.2415, grad_fn=<MseLossBackward>)


INFO:root:Training 209/286 done
INFO:root:Training 210/286 done


tensor([[1., 0.]])
tensor([[0.4752, 0.4802]], grad_fn=<AddmmBackward>)
tensor(0.2530, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])


INFO:root:Training 211/286 done


tensor([[0.5518, 0.6101]], grad_fn=<AddmmBackward>)
tensor(0.2282, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4489, 0.5648]], grad_fn=<AddmmBackward>)
tensor(0.3113, grad_fn=<MseLossBackward>)


INFO:root:Training 212/286 done
INFO:root:Training 213/286 done


tensor([[0., 1.]])
tensor([[0.4706, 0.4658]], grad_fn=<AddmmBackward>)
tensor(0.2534, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4554, 0.4812]], grad_fn=<AddmmBackward>)
tensor(0.2641, grad_fn=<MseLossBackward>)


INFO:root:Training 214/286 done
INFO:root:Training 215/286 done


tensor([[0., 1.]])
tensor([[0.4791, 0.4721]], grad_fn=<AddmmBackward>)
tensor(0.2541, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5486, 0.5084]], grad_fn=<AddmmBackward>)
tensor(0.2311, grad_fn=<MseLossBackward>)


INFO:root:Training 216/286 done
INFO:root:Training 217/286 done


tensor([[0., 1.]])
tensor([[0.5515, 0.4823]], grad_fn=<AddmmBackward>)
tensor(0.2861, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4272, 0.4866]], grad_fn=<AddmmBackward>)
tensor(0.2825, grad_fn=<MseLossBackward>)


INFO:root:Training 218/286 done
INFO:root:Training 219/286 done


tensor([[0., 1.]])
tensor([[0.4506, 0.4159]], grad_fn=<AddmmBackward>)
tensor(0.2721, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4545, 0.4734]], grad_fn=<AddmmBackward>)
tensor(0.2608, grad_fn=<MseLossBackward>)


INFO:root:Training 220/286 done
INFO:root:Training 221/286 done


tensor([[0., 1.]])
tensor([[0.4883, 0.4874]], grad_fn=<AddmmBackward>)
tensor(0.2506, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4711, 0.5231]], grad_fn=<AddmmBackward>)
tensor(0.2766, grad_fn=<MseLossBackward>)


INFO:root:Training 222/286 done
INFO:root:Training 223/286 done


tensor([[0., 1.]])
tensor([[0.4927, 0.4521]], grad_fn=<AddmmBackward>)
tensor(0.2714, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4552, 0.5305]], grad_fn=<AddmmBackward>)
tensor(0.2891, grad_fn=<MseLossBackward>)


INFO:root:Training 224/286 done
INFO:root:Training 225/286 done


tensor([[0., 1.]])
tensor([[0.4472, 0.3904]], grad_fn=<AddmmBackward>)
tensor(0.2858, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4302, 0.4797]], grad_fn=<AddmmBackward>)
tensor(0.2774, grad_fn=<MseLossBackward>)


INFO:root:Training 226/286 done
INFO:root:Training 227/286 done


tensor([[0., 1.]])
tensor([[0.4653, 0.4322]], grad_fn=<AddmmBackward>)
tensor(0.2695, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4685, 0.4809]], grad_fn=<AddmmBackward>)
tensor(0.2569, grad_fn=<MseLossBackward>)


INFO:root:Training 228/286 done
INFO:root:Training 229/286 done


tensor([[0., 1.]])
tensor([[0.5053, 0.4896]], grad_fn=<AddmmBackward>)
tensor(0.2579, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4617, 0.4890]], grad_fn=<AddmmBackward>)
tensor(0.2645, grad_fn=<MseLossBackward>)


INFO:root:Training 230/286 done
INFO:root:Training 231/286 done


tensor([[0., 1.]])
tensor([[0.5258, 0.5269]], grad_fn=<AddmmBackward>)
tensor(0.2502, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4602, 0.4223]], grad_fn=<AddmmBackward>)
tensor(0.2348, grad_fn=<MseLossBackward>)


INFO:root:Training 232/286 done
INFO:root:Training 233/286 done


tensor([[0., 1.]])
tensor([[0.5354, 0.5553]], grad_fn=<AddmmBackward>)
tensor(0.2422, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5256, 0.5329]], grad_fn=<AddmmBackward>)
tensor(0.2546, grad_fn=<MseLossBackward>)


INFO:root:Training 234/286 done
INFO:root:Training 235/286 done


tensor([[0., 1.]])
tensor([[0.4812, 0.5093]], grad_fn=<AddmmBackward>)
tensor(0.2362, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5726, 0.5170]], grad_fn=<AddmmBackward>)
tensor(0.2250, grad_fn=<MseLossBackward>)


INFO:root:Training 236/286 done
INFO:root:Training 237/286 done


tensor([[0., 1.]])
tensor([[0.5714, 0.5675]], grad_fn=<AddmmBackward>)
tensor(0.2568, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5128, 0.4120]], grad_fn=<AddmmBackward>)
tensor(0.2035, grad_fn=<MseLossBackward>)


INFO:root:Training 238/286 done
INFO:root:Training 239/286 done


tensor([[0., 1.]])
tensor([[0.5941, 0.5286]], grad_fn=<AddmmBackward>)
tensor(0.2876, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4236, 0.4455]], grad_fn=<AddmmBackward>)
tensor(0.2653, grad_fn=<MseLossBackward>)


INFO:root:Training 240/286 done
INFO:root:Training 241/286 done


tensor([[0., 1.]])
tensor([[0.4735, 0.4339]], grad_fn=<AddmmBackward>)
tensor(0.2724, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4135, 0.5366]], grad_fn=<AddmmBackward>)
tensor(0.3160, grad_fn=<MseLossBackward>)


INFO:root:Training 242/286 done
INFO:root:Training 243/286 done


tensor([[0., 1.]])
tensor([[0.4262, 0.4581]], grad_fn=<AddmmBackward>)
tensor(0.2376, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4294, 0.5256]], grad_fn=<AddmmBackward>)
tensor(0.3009, grad_fn=<MseLossBackward>)


INFO:root:Training 244/286 done
INFO:root:Training 245/286 done


tensor([[0., 1.]])
tensor([[0.4386, 0.4436]], grad_fn=<AddmmBackward>)
tensor(0.2510, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4431, 0.4942]], grad_fn=<AddmmBackward>)
tensor(0.2772, grad_fn=<MseLossBackward>)


INFO:root:Training 246/286 done
INFO:root:Training 247/286 done


tensor([[0., 1.]])
tensor([[0.4645, 0.4738]], grad_fn=<AddmmBackward>)
tensor(0.2464, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4633, 0.4586]], grad_fn=<AddmmBackward>)
tensor(0.2492, grad_fn=<MseLossBackward>)


INFO:root:Training 248/286 done
INFO:root:Training 249/286 done


tensor([[0., 1.]])
tensor([[0.4860, 0.5318]], grad_fn=<AddmmBackward>)
tensor(0.2277, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4921, 0.4466]], grad_fn=<AddmmBackward>)
tensor(0.2287, grad_fn=<MseLossBackward>)


INFO:root:Training 250/286 done
INFO:root:Training 251/286 done


tensor([[0., 1.]])
tensor([[0.5276, 0.6394]], grad_fn=<AddmmBackward>)
tensor(0.2042, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5158, 0.5224]], grad_fn=<AddmmBackward>)
tensor(0.2537, grad_fn=<MseLossBackward>)


INFO:root:Training 252/286 done
INFO:root:Training 253/286 done


tensor([[0., 1.]])
tensor([[0.4996, 0.5807]], grad_fn=<AddmmBackward>)
tensor(0.2127, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5557, 0.4774]], grad_fn=<AddmmBackward>)
tensor(0.2126, grad_fn=<MseLossBackward>)


INFO:root:Training 254/286 done
INFO:root:Training 255/286 done


tensor([[0., 1.]])
tensor([[0.5145, 0.5379]], grad_fn=<AddmmBackward>)
tensor(0.2391, grad_fn=<MseLossBackward>)


INFO:root:Training 256/286 done


tensor([[1., 0.]])
tensor([[0.6113, 0.4388]], grad_fn=<AddmmBackward>)
tensor(0.1718, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5026, 0.7015]], grad_fn=<AddmmBackward>)
tensor(0.1708, grad_fn=<MseLossBackward>)


INFO:root:Training 257/286 done
INFO:root:Training 258/286 done


tensor([[1., 0.]])
tensor([[0.4164, 0.6614]], grad_fn=<AddmmBackward>)
tensor(0.3890, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4454, 0.4648]], grad_fn=<AddmmBackward>)
tensor(0.2424, grad_fn=<MseLossBackward>)


INFO:root:Training 259/286 done
INFO:root:Training 260/286 done


tensor([[1., 0.]])
tensor([[0.3966, 0.5573]], grad_fn=<AddmmBackward>)
tensor(0.3374, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4275, 0.4988]], grad_fn=<AddmmBackward>)
tensor(0.2170, grad_fn=<MseLossBackward>)


INFO:root:Training 261/286 done
INFO:root:Training 262/286 done


tensor([[1., 0.]])
tensor([[0.4832, 0.4779]], grad_fn=<AddmmBackward>)
tensor(0.2477, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5964, 0.3396]], grad_fn=<AddmmBackward>)
tensor(0.3959, grad_fn=<MseLossBackward>)


INFO:root:Training 263/286 done
INFO:root:Training 264/286 done


tensor([[1., 0.]])
tensor([[0.4503, 0.3807]], grad_fn=<AddmmBackward>)
tensor(0.2236, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4826, 0.3913]], grad_fn=<AddmmBackward>)
tensor(0.3017, grad_fn=<MseLossBackward>)


INFO:root:Training 265/286 done
INFO:root:Training 266/286 done


tensor([[1., 0.]])
tensor([[0.4649, 0.4047]], grad_fn=<AddmmBackward>)
tensor(0.2251, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5221, 0.4745]], grad_fn=<AddmmBackward>)
tensor(0.2744, grad_fn=<MseLossBackward>)


INFO:root:Training 267/286 done
INFO:root:Training 268/286 done


tensor([[1., 0.]])
tensor([[0.4750, 0.4093]], grad_fn=<AddmmBackward>)
tensor(0.2216, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4982, 0.4801]], grad_fn=<AddmmBackward>)
tensor(0.2592, grad_fn=<MseLossBackward>)


INFO:root:Training 269/286 done
INFO:root:Training 270/286 done


tensor([[1., 0.]])
tensor([[0.4901, 0.4724]], grad_fn=<AddmmBackward>)
tensor(0.2415, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5582, 0.3951]], grad_fn=<AddmmBackward>)
tensor(0.3388, grad_fn=<MseLossBackward>)


INFO:root:Training 271/286 done
INFO:root:Training 272/286 done


tensor([[1., 0.]])
tensor([[0.5272, 0.4440]], grad_fn=<AddmmBackward>)
tensor(0.2103, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.6031, 0.4297]], grad_fn=<AddmmBackward>)
tensor(0.3445, grad_fn=<MseLossBackward>)


INFO:root:Training 273/286 done
INFO:root:Training 274/286 done


tensor([[1., 0.]])
tensor([[0.4388, 0.5236]], grad_fn=<AddmmBackward>)
tensor(0.2946, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4370, 0.5093]], grad_fn=<AddmmBackward>)
tensor(0.2159, grad_fn=<MseLossBackward>)


INFO:root:Training 275/286 done
INFO:root:Training 276/286 done


tensor([[1., 0.]])
tensor([[0.4702, 0.5401]], grad_fn=<AddmmBackward>)
tensor(0.2862, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4511, 0.4469]], grad_fn=<AddmmBackward>)
tensor(0.2547, grad_fn=<MseLossBackward>)


INFO:root:Training 277/286 done
INFO:root:Training 278/286 done


tensor([[1., 0.]])
tensor([[0.4571, 0.4264]], grad_fn=<AddmmBackward>)
tensor(0.2383, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4773, 0.4533]], grad_fn=<AddmmBackward>)
tensor(0.2634, grad_fn=<MseLossBackward>)


INFO:root:Training 279/286 done
INFO:root:Training 280/286 done


tensor([[1., 0.]])
tensor([[0.4516, 0.4642]], grad_fn=<AddmmBackward>)
tensor(0.2581, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4659, 0.4700]], grad_fn=<AddmmBackward>)
tensor(0.2490, grad_fn=<MseLossBackward>)


INFO:root:Training 281/286 done
INFO:root:Training 282/286 done


tensor([[1., 0.]])
tensor([[0.4856, 0.4794]], grad_fn=<AddmmBackward>)
tensor(0.2472, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4712, 0.5443]], grad_fn=<AddmmBackward>)
tensor(0.2148, grad_fn=<MseLossBackward>)


INFO:root:Training 283/286 done
INFO:root:Training 284/286 done


tensor([[1., 0.]])
tensor([[0.5091, 0.4488]], grad_fn=<AddmmBackward>)
tensor(0.2212, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4957, 0.6174]], grad_fn=<AddmmBackward>)
tensor(0.1961, grad_fn=<MseLossBackward>)


INFO:root:Training 285/286 done
INFO:root:Training 0/286 done


tensor([[1., 0.]])
tensor([[0.5572, 0.5889]], grad_fn=<AddmmBackward>)
tensor(0.2714, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5044, 0.5460]], grad_fn=<AddmmBackward>)
tensor(0.2303, grad_fn=<MseLossBackward>)


INFO:root:Training 1/286 done
INFO:root:Training 2/286 done


tensor([[1., 0.]])
tensor([[0.5326, 0.5759]], grad_fn=<AddmmBackward>)
tensor(0.2751, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4358, 0.6374]], grad_fn=<AddmmBackward>)
tensor(0.1607, grad_fn=<MseLossBackward>)


INFO:root:Training 3/286 done
INFO:root:Training 4/286 done


tensor([[1., 0.]])
tensor([[0.6054, 0.4290]], grad_fn=<AddmmBackward>)
tensor(0.1699, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4854, 0.6819]], grad_fn=<AddmmBackward>)
tensor(0.1684, grad_fn=<MseLossBackward>)


INFO:root:Training 5/286 done
INFO:root:Training 6/286 done


tensor([[1., 0.]])
tensor([[0.5011, 0.4319]], grad_fn=<AddmmBackward>)
tensor(0.2177, grad_fn=<MseLossBackward>)


INFO:root:Training 7/286 done


tensor([[0., 1.]])
tensor([[0.3854, 0.6997]], grad_fn=<AddmmBackward>)
tensor(0.1194, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5056, 0.4940]], grad_fn=<AddmmBackward>)
tensor(0.2442, grad_fn=<MseLossBackward>)


INFO:root:Training 8/286 done
INFO:root:Training 9/286 done


tensor([[0., 1.]])
tensor([[0.3998, 0.7684]], grad_fn=<AddmmBackward>)
tensor(0.1067, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4629, 0.3684]], grad_fn=<AddmmBackward>)
tensor(0.2121, grad_fn=<MseLossBackward>)


INFO:root:Training 10/286 done
INFO:root:Training 11/286 done


tensor([[0., 1.]])
tensor([[0.3716, 0.6879]], grad_fn=<AddmmBackward>)
tensor(0.1177, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4203, 0.4906]], grad_fn=<AddmmBackward>)
tensor(0.2884, grad_fn=<MseLossBackward>)


INFO:root:Training 12/286 done
INFO:root:Training 13/286 done


tensor([[0., 1.]])
tensor([[0.4051, 0.5968]], grad_fn=<AddmmBackward>)
tensor(0.1633, grad_fn=<MseLossBackward>)


INFO:root:Training 14/286 done


tensor([[1., 0.]])
tensor([[0.4207, 0.5489]], grad_fn=<AddmmBackward>)
tensor(0.3185, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4195, 0.4562]], grad_fn=<AddmmBackward>)
tensor(0.2358, grad_fn=<MseLossBackward>)


INFO:root:Training 15/286 done
INFO:root:Training 16/286 done


tensor([[1., 0.]])
tensor([[0.5053, 0.4055]], grad_fn=<AddmmBackward>)
tensor(0.2046, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4723, 0.5680]], grad_fn=<AddmmBackward>)
tensor(0.2048, grad_fn=<MseLossBackward>)


INFO:root:Training 17/286 done
INFO:root:Training 18/286 done


tensor([[1., 0.]])
tensor([[0.5173, 0.3384]], grad_fn=<AddmmBackward>)
tensor(0.1738, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.3308, 0.8648]], grad_fn=<AddmmBackward>)
tensor(0.0639, grad_fn=<MseLossBackward>)


INFO:root:Training 19/286 done
INFO:root:Training 20/286 done


tensor([[1., 0.]])
tensor([[0.5298, 0.4676]], grad_fn=<AddmmBackward>)
tensor(0.2199, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5001, 0.5494]], grad_fn=<AddmmBackward>)
tensor(0.2266, grad_fn=<MseLossBackward>)


INFO:root:Training 21/286 done
INFO:root:Training 22/286 done


tensor([[1., 0.]])
tensor([[0.5333, 0.3672]], grad_fn=<AddmmBackward>)
tensor(0.1763, grad_fn=<MseLossBackward>)


INFO:root:Training 23/286 done


tensor([[0., 1.]])
tensor([[0.5394, 0.5766]], grad_fn=<AddmmBackward>)
tensor(0.2351, grad_fn=<MseLossBackward>)


INFO:root:Training 24/286 done


tensor([[1., 0.]])
tensor([[0.3719, 0.5427]], grad_fn=<AddmmBackward>)
tensor(0.3445, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4929, 0.3110]], grad_fn=<AddmmBackward>)
tensor(0.3588, grad_fn=<MseLossBackward>)


INFO:root:Training 25/286 done
INFO:root:Training 26/286 done


tensor([[1., 0.]])
tensor([[0.4661, 0.3693]], grad_fn=<AddmmBackward>)
tensor(0.2107, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4056, 0.5064]], grad_fn=<AddmmBackward>)
tensor(0.2041, grad_fn=<MseLossBackward>)


INFO:root:Training 27/286 done
INFO:root:Training 28/286 done


tensor([[1., 0.]])
tensor([[0.5880, 0.3917]], grad_fn=<AddmmBackward>)
tensor(0.1616, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.3692, 0.7798]], grad_fn=<AddmmBackward>)
tensor(0.0924, grad_fn=<MseLossBackward>)


INFO:root:Training 29/286 done
INFO:root:Training 30/286 done


tensor([[1., 0.]])
tensor([[0.5752, 0.4230]], grad_fn=<AddmmBackward>)
tensor(0.1797, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4093, 0.7966]], grad_fn=<AddmmBackward>)
tensor(0.1044, grad_fn=<MseLossBackward>)


INFO:root:Training 31/286 done
INFO:root:Training 32/286 done


tensor([[1., 0.]])
tensor([[0.5008, 0.4071]], grad_fn=<AddmmBackward>)
tensor(0.2075, grad_fn=<MseLossBackward>)


INFO:root:Training 33/286 done


tensor([[0., 1.]])
tensor([[0.5415, 0.5346]], grad_fn=<AddmmBackward>)
tensor(0.2549, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.3275, 0.5673]], grad_fn=<AddmmBackward>)
tensor(0.3870, grad_fn=<MseLossBackward>)


INFO:root:Training 34/286 done
INFO:root:Training 35/286 done


tensor([[0., 1.]])
tensor([[0.3765, 0.5689]], grad_fn=<AddmmBackward>)
tensor(0.1638, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4522, 0.3899]], grad_fn=<AddmmBackward>)
tensor(0.2261, grad_fn=<MseLossBackward>)


INFO:root:Training 36/286 done
INFO:root:Training 37/286 done


tensor([[0., 1.]])
tensor([[0.4436, 0.6310]], grad_fn=<AddmmBackward>)
tensor(0.1665, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5112, 0.3523]], grad_fn=<AddmmBackward>)
tensor(0.1815, grad_fn=<MseLossBackward>)


INFO:root:Training 38/286 done
INFO:root:Training 39/286 done


tensor([[0., 1.]])
tensor([[0.2575, 0.9489]], grad_fn=<AddmmBackward>)
tensor(0.0345, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6779, 0.2856]], grad_fn=<AddmmBackward>)
tensor(0.0927, grad_fn=<MseLossBackward>)


INFO:root:Training 40/286 done
INFO:root:Training 41/286 done


tensor([[0., 1.]])
tensor([[0.1280, 1.0895]], grad_fn=<AddmmBackward>)
tensor(0.0122, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.8674, 0.2422]], grad_fn=<AddmmBackward>)
tensor(0.0381, grad_fn=<MseLossBackward>)


INFO:root:Training 42/286 done
INFO:root:Training 43/286 done


tensor([[0., 1.]])
tensor([[0.5158, 0.5342]], grad_fn=<AddmmBackward>)
tensor(0.2415, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[-0.2697,  1.8214]], grad_fn=<AddmmBackward>)
tensor(2.4649, grad_fn=<MseLossBackward>)


INFO:root:Training 44/286 done
INFO:root:Training 45/286 done


tensor([[0., 1.]])
tensor([[0.5996, 0.2071]], grad_fn=<AddmmBackward>)
tensor(0.4942, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4486, 0.3018]], grad_fn=<AddmmBackward>)
tensor(0.1976, grad_fn=<MseLossBackward>)


INFO:root:Training 46/286 done
INFO:root:Training 47/286 done


tensor([[0., 1.]])
tensor([[0.4993, 0.2610]], grad_fn=<AddmmBackward>)
tensor(0.3977, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5152, 0.4278]], grad_fn=<AddmmBackward>)
tensor(0.2090, grad_fn=<MseLossBackward>)


INFO:root:Training 48/286 done
INFO:root:Training 49/286 done


tensor([[0., 1.]])
tensor([[0.5243, 0.3261]], grad_fn=<AddmmBackward>)
tensor(0.3645, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5588, 0.3883]], grad_fn=<AddmmBackward>)
tensor(0.1727, grad_fn=<MseLossBackward>)


INFO:root:Training 50/286 done
INFO:root:Training 51/286 done


tensor([[0., 1.]])
tensor([[0.5476, 0.3825]], grad_fn=<AddmmBackward>)
tensor(0.3405, grad_fn=<MseLossBackward>)


INFO:root:Training 52/286 done


tensor([[1., 0.]])
tensor([[0.5515, 0.4902]], grad_fn=<AddmmBackward>)
tensor(0.2207, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5155, 0.3797]], grad_fn=<AddmmBackward>)
tensor(0.3252, grad_fn=<MseLossBackward>)


INFO:root:Training 53/286 done
INFO:root:Training 54/286 done


tensor([[1., 0.]])
tensor([[0.5596, 0.3944]], grad_fn=<AddmmBackward>)
tensor(0.1748, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5525, 0.5138]], grad_fn=<AddmmBackward>)
tensor(0.2708, grad_fn=<MseLossBackward>)


INFO:root:Training 55/286 done
INFO:root:Training 56/286 done


tensor([[1., 0.]])
tensor([[0.4867, 0.3748]], grad_fn=<AddmmBackward>)
tensor(0.2020, grad_fn=<MseLossBackward>)


INFO:root:Training 57/286 done


tensor([[0., 1.]])
tensor([[0.4961, 0.4462]], grad_fn=<AddmmBackward>)
tensor(0.2764, grad_fn=<MseLossBackward>)


INFO:root:Training 58/286 done


tensor([[1., 0.]])
tensor([[0.4433, 0.4556]], grad_fn=<AddmmBackward>)
tensor(0.2587, grad_fn=<MseLossBackward>)


INFO:root:Training 59/286 done


tensor([[0., 1.]])
tensor([[0.4735, 0.4928]], grad_fn=<AddmmBackward>)
tensor(0.2407, grad_fn=<MseLossBackward>)


INFO:root:Training 60/286 done


tensor([[1., 0.]])
tensor([[0.4833, 0.5591]], grad_fn=<AddmmBackward>)
tensor(0.2898, grad_fn=<MseLossBackward>)


INFO:root:Training 61/286 done


tensor([[0., 1.]])
tensor([[0.4791, 0.4166]], grad_fn=<AddmmBackward>)
tensor(0.2849, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5637, 0.4362]], grad_fn=<AddmmBackward>)
tensor(0.1903, grad_fn=<MseLossBackward>)


INFO:root:Training 62/286 done
INFO:root:Training 63/286 done


tensor([[0., 1.]])
tensor([[0.5468, 0.4700]], grad_fn=<AddmmBackward>)
tensor(0.2899, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5384, 0.4807]], grad_fn=<AddmmBackward>)
tensor(0.2221, grad_fn=<MseLossBackward>)


INFO:root:Training 64/286 done
INFO:root:Training 65/286 done


tensor([[0., 1.]])
tensor([[0.5885, 0.4637]], grad_fn=<AddmmBackward>)
tensor(0.3169, grad_fn=<MseLossBackward>)


INFO:root:Training 66/286 done


tensor([[1., 0.]])
tensor([[0.4996, 0.4019]], grad_fn=<AddmmBackward>)
tensor(0.2060, grad_fn=<MseLossBackward>)


INFO:root:Training 67/286 done


tensor([[0., 1.]])
tensor([[0.4686, 0.5075]], grad_fn=<AddmmBackward>)
tensor(0.2311, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.7211, 0.3518]], grad_fn=<AddmmBackward>)
tensor(0.1008, grad_fn=<MseLossBackward>)


INFO:root:Training 68/286 done
INFO:root:Training 69/286 done


tensor([[0., 1.]])
tensor([[0.3768, 0.6919]], grad_fn=<AddmmBackward>)
tensor(0.1184, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5384, 0.5528]], grad_fn=<AddmmBackward>)
tensor(0.2593, grad_fn=<MseLossBackward>)


INFO:root:Training 70/286 done
INFO:root:Training 71/286 done


tensor([[0., 1.]])
tensor([[1.2413, 0.2143]], grad_fn=<AddmmBackward>)
tensor(1.0791, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4489, 0.3109]], grad_fn=<AddmmBackward>)
tensor(0.2002, grad_fn=<MseLossBackward>)


INFO:root:Training 72/286 done
INFO:root:Training 73/286 done


tensor([[0., 1.]])
tensor([[0.4031, 0.3829]], grad_fn=<AddmmBackward>)
tensor(0.2717, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4541, 0.3296]], grad_fn=<AddmmBackward>)
tensor(0.2033, grad_fn=<MseLossBackward>)


INFO:root:Training 74/286 done
INFO:root:Training 75/286 done


tensor([[0., 1.]])
tensor([[0.3878, 0.4846]], grad_fn=<AddmmBackward>)
tensor(0.2080, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4670, 0.3411]], grad_fn=<AddmmBackward>)
tensor(0.2002, grad_fn=<MseLossBackward>)


INFO:root:Training 76/286 done
INFO:root:Training 77/286 done


tensor([[0., 1.]])
tensor([[0.3852, 0.5229]], grad_fn=<AddmmBackward>)
tensor(0.1880, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4792, 0.3488]], grad_fn=<AddmmBackward>)
tensor(0.1964, grad_fn=<MseLossBackward>)


INFO:root:Training 78/286 done
INFO:root:Training 79/286 done


tensor([[0., 1.]])
tensor([[0.3822, 0.5572]], grad_fn=<AddmmBackward>)
tensor(0.1711, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4789, 0.3741]], grad_fn=<AddmmBackward>)
tensor(0.2057, grad_fn=<MseLossBackward>)


INFO:root:Training 80/286 done
INFO:root:Training 81/286 done


tensor([[0., 1.]])
tensor([[0.3949, 0.5463]], grad_fn=<AddmmBackward>)
tensor(0.1809, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5286, 0.3575]], grad_fn=<AddmmBackward>)
tensor(0.1750, grad_fn=<MseLossBackward>)


INFO:root:Training 82/286 done
INFO:root:Training 83/286 done


tensor([[0., 1.]])
tensor([[0.2908, 0.8849]], grad_fn=<AddmmBackward>)
tensor(0.0489, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5470, 0.3443]], grad_fn=<AddmmBackward>)
tensor(0.1619, grad_fn=<MseLossBackward>)


INFO:root:Training 84/286 done
INFO:root:Training 85/286 done


tensor([[0., 1.]])
tensor([[0.2189, 0.9427]], grad_fn=<AddmmBackward>)
tensor(0.0256, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5671, 0.3284]], grad_fn=<AddmmBackward>)
tensor(0.1476, grad_fn=<MseLossBackward>)


INFO:root:Training 86/286 done
INFO:root:Training 87/286 done


tensor([[0., 1.]])
tensor([[0.1785, 0.9341]], grad_fn=<AddmmBackward>)
tensor(0.0181, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5986, 0.3038]], grad_fn=<AddmmBackward>)
tensor(0.1267, grad_fn=<MseLossBackward>)


INFO:root:Training 88/286 done
INFO:root:Training 89/286 done


tensor([[0., 1.]])
tensor([[0.1626, 0.8785]], grad_fn=<AddmmBackward>)
tensor(0.0206, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5926, 0.3369]], grad_fn=<AddmmBackward>)
tensor(0.1397, grad_fn=<MseLossBackward>)


INFO:root:Training 90/286 done
INFO:root:Training 91/286 done


tensor([[0., 1.]])
tensor([[0.3053, 0.6544]], grad_fn=<AddmmBackward>)
tensor(0.1063, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6656, 0.2969]], grad_fn=<AddmmBackward>)
tensor(0.1000, grad_fn=<MseLossBackward>)


INFO:root:Training 92/286 done
INFO:root:Training 93/286 done


tensor([[0., 1.]])
tensor([[0.1649, 0.8940]], grad_fn=<AddmmBackward>)
tensor(0.0192, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.7405, 0.2784]], grad_fn=<AddmmBackward>)
tensor(0.0724, grad_fn=<MseLossBackward>)


INFO:root:Training 94/286 done
INFO:root:Training 95/286 done


tensor([[0., 1.]])
tensor([[0.2832, 0.7484]], grad_fn=<AddmmBackward>)
tensor(0.0718, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5662, 0.3386]], grad_fn=<AddmmBackward>)
tensor(0.1514, grad_fn=<MseLossBackward>)


INFO:root:Training 96/286 done
INFO:root:Training 97/286 done


tensor([[0., 1.]])
tensor([[0.1143, 0.9496]], grad_fn=<AddmmBackward>)
tensor(0.0078, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6873, 0.2644]], grad_fn=<AddmmBackward>)
tensor(0.0838, grad_fn=<MseLossBackward>)


INFO:root:Training 98/286 done
INFO:root:Training 99/286 done


tensor([[0., 1.]])
tensor([[0.1552, 0.8542]], grad_fn=<AddmmBackward>)
tensor(0.0227, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6151, 0.4130]], grad_fn=<AddmmBackward>)
tensor(0.1593, grad_fn=<MseLossBackward>)


INFO:root:Training 100/286 done
INFO:root:Training 101/286 done


tensor([[0., 1.]])
tensor([[0.6759, 0.2300]], grad_fn=<AddmmBackward>)
tensor(0.5249, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5925, 0.3080]], grad_fn=<AddmmBackward>)
tensor(0.1305, grad_fn=<MseLossBackward>)


INFO:root:Training 102/286 done
INFO:root:Training 103/286 done


tensor([[0., 1.]])
tensor([[0.4439, 0.4583]], grad_fn=<AddmmBackward>)
tensor(0.2453, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5828, 0.3418]], grad_fn=<AddmmBackward>)
tensor(0.1454, grad_fn=<MseLossBackward>)


INFO:root:Training 104/286 done
INFO:root:Training 105/286 done


tensor([[0., 1.]])
tensor([[0.2017, 0.8197]], grad_fn=<AddmmBackward>)
tensor(0.0366, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.8380, 0.2614]], grad_fn=<AddmmBackward>)
tensor(0.0473, grad_fn=<MseLossBackward>)


INFO:root:Training 106/286 done
INFO:root:Training 107/286 done


tensor([[0., 1.]])
tensor([[-0.1452,  1.3255]], grad_fn=<AddmmBackward>)
tensor(0.0635, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[1.0330, 0.1790]], grad_fn=<AddmmBackward>)
tensor(0.0166, grad_fn=<MseLossBackward>)


INFO:root:Training 108/286 done
INFO:root:Training 109/286 done


tensor([[0., 1.]])
tensor([[0.3557, 0.5263]], grad_fn=<AddmmBackward>)
tensor(0.1755, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6184, 0.2721]], grad_fn=<AddmmBackward>)
tensor(0.1098, grad_fn=<MseLossBackward>)


INFO:root:Training 110/286 done
INFO:root:Training 111/286 done


tensor([[0., 1.]])
tensor([[0.3786, 0.4940]], grad_fn=<AddmmBackward>)
tensor(0.1997, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6425, 0.2736]], grad_fn=<AddmmBackward>)
tensor(0.1013, grad_fn=<MseLossBackward>)


INFO:root:Training 112/286 done
INFO:root:Training 113/286 done


tensor([[0., 1.]])
tensor([[0.1868, 0.7936]], grad_fn=<AddmmBackward>)
tensor(0.0387, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6332, 0.3712]], grad_fn=<AddmmBackward>)
tensor(0.1362, grad_fn=<MseLossBackward>)


INFO:root:Training 114/286 done
INFO:root:Training 115/286 done


tensor([[0., 1.]])
tensor([[0.0868, 0.9299]], grad_fn=<AddmmBackward>)
tensor(0.0062, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.7803, 0.2130]], grad_fn=<AddmmBackward>)
tensor(0.0468, grad_fn=<MseLossBackward>)


INFO:root:Training 116/286 done
INFO:root:Training 117/286 done


tensor([[0., 1.]])
tensor([[-0.3326,  1.5578]], grad_fn=<AddmmBackward>)
tensor(0.2109, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.7899, 0.1746]], grad_fn=<AddmmBackward>)
tensor(0.0373, grad_fn=<MseLossBackward>)


INFO:root:Training 118/286 done
INFO:root:Training 119/286 done


tensor([[0., 1.]])
tensor([[0.8223, 0.1476]], grad_fn=<AddmmBackward>)
tensor(0.7013, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6499, 0.2536]], grad_fn=<AddmmBackward>)
tensor(0.0934, grad_fn=<MseLossBackward>)


INFO:root:Training 120/286 done
INFO:root:Training 121/286 done


tensor([[0., 1.]])
tensor([[0.6737, 0.2380]], grad_fn=<AddmmBackward>)
tensor(0.5172, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6185, 0.2981]], grad_fn=<AddmmBackward>)
tensor(0.1172, grad_fn=<MseLossBackward>)


INFO:root:Training 122/286 done
INFO:root:Training 123/286 done


tensor([[0., 1.]])
tensor([[0.6532, 0.2711]], grad_fn=<AddmmBackward>)
tensor(0.4790, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6054, 0.3232]], grad_fn=<AddmmBackward>)
tensor(0.1301, grad_fn=<MseLossBackward>)


INFO:root:Training 124/286 done
INFO:root:Training 125/286 done


tensor([[0., 1.]])
tensor([[0.6284, 0.3051]], grad_fn=<AddmmBackward>)
tensor(0.4389, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5896, 0.3491]], grad_fn=<AddmmBackward>)
tensor(0.1451, grad_fn=<MseLossBackward>)


INFO:root:Training 126/286 done
INFO:root:Training 127/286 done


tensor([[0., 1.]])
tensor([[0.5576, 0.3674]], grad_fn=<AddmmBackward>)
tensor(0.3556, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5792, 0.3686]], grad_fn=<AddmmBackward>)
tensor(0.1564, grad_fn=<MseLossBackward>)


INFO:root:Training 128/286 done
INFO:root:Training 129/286 done


tensor([[0., 1.]])
tensor([[0.3931, 0.5060]], grad_fn=<AddmmBackward>)
tensor(0.1993, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6242, 0.3454]], grad_fn=<AddmmBackward>)
tensor(0.1302, grad_fn=<MseLossBackward>)


INFO:root:Training 130/286 done
INFO:root:Training 131/286 done


tensor([[0., 1.]])
tensor([[0.1090, 0.8717]], grad_fn=<AddmmBackward>)
tensor(0.0142, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.7176, 0.3091]], grad_fn=<AddmmBackward>)
tensor(0.0877, grad_fn=<MseLossBackward>)


INFO:root:Training 132/286 done
INFO:root:Training 133/286 done


tensor([[0., 1.]])
tensor([[0.0133, 0.9798]], grad_fn=<AddmmBackward>)
tensor(0.0003, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.8524, 0.2664]], grad_fn=<AddmmBackward>)
tensor(0.0464, grad_fn=<MseLossBackward>)


INFO:root:Training 134/286 done
INFO:root:Training 135/286 done


tensor([[0., 1.]])
tensor([[0.3406, 0.6083]], grad_fn=<AddmmBackward>)
tensor(0.1347, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6380, 0.3126]], grad_fn=<AddmmBackward>)
tensor(0.1144, grad_fn=<MseLossBackward>)


INFO:root:Training 136/286 done
INFO:root:Training 137/286 done


tensor([[0., 1.]])
tensor([[0.2577, 0.6532]], grad_fn=<AddmmBackward>)
tensor(0.0934, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6904, 0.3045]], grad_fn=<AddmmBackward>)
tensor(0.0943, grad_fn=<MseLossBackward>)


INFO:root:Training 138/286 done
INFO:root:Training 139/286 done


tensor([[0., 1.]])
tensor([[0.0363, 0.9700]], grad_fn=<AddmmBackward>)
tensor(0.0011, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.7667, 0.2714]], grad_fn=<AddmmBackward>)
tensor(0.0640, grad_fn=<MseLossBackward>)


INFO:root:Training 140/286 done
INFO:root:Training 141/286 done


tensor([[0., 1.]])
tensor([[-0.3832,  1.5885]], grad_fn=<AddmmBackward>)
tensor(0.2466, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6732, 0.2840]], grad_fn=<AddmmBackward>)
tensor(0.0938, grad_fn=<MseLossBackward>)


INFO:root:Training 142/286 done
INFO:root:Training 143/286 done


tensor([[0., 1.]])
tensor([[0.5266, 0.3771]], grad_fn=<AddmmBackward>)
tensor(0.3327, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6677, 0.2782]], grad_fn=<AddmmBackward>)
tensor(0.0939, grad_fn=<MseLossBackward>)


INFO:root:Training 144/286 done
INFO:root:Training 145/286 done


tensor([[0., 1.]])
tensor([[0.5017, 0.3759]], grad_fn=<AddmmBackward>)
tensor(0.3206, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.9254, 0.2086]], grad_fn=<AddmmBackward>)
tensor(0.0245, grad_fn=<MseLossBackward>)


INFO:root:Training 146/286 done
INFO:root:Training 147/286 done


tensor([[0., 1.]])
tensor([[0.2053, 0.6473]], grad_fn=<AddmmBackward>)
tensor(0.0833, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.7749, 0.3675]], grad_fn=<AddmmBackward>)
tensor(0.0929, grad_fn=<MseLossBackward>)


INFO:root:Training 148/286 done
INFO:root:Training 149/286 done


tensor([[0., 1.]])
tensor([[0.0920, 0.8386]], grad_fn=<AddmmBackward>)
tensor(0.0173, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.8915, 0.2010]], grad_fn=<AddmmBackward>)
tensor(0.0261, grad_fn=<MseLossBackward>)


INFO:root:Training 150/286 done
INFO:root:Training 151/286 done


tensor([[0., 1.]])
tensor([[0.5731, 0.4077]], grad_fn=<AddmmBackward>)
tensor(0.3396, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4429, 0.4052]], grad_fn=<AddmmBackward>)
tensor(0.2373, grad_fn=<MseLossBackward>)


INFO:root:Training 152/286 done
INFO:root:Training 153/286 done


tensor([[0., 1.]])
tensor([[0.5230, 0.4253]], grad_fn=<AddmmBackward>)
tensor(0.3019, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6281, 0.3533]], grad_fn=<AddmmBackward>)
tensor(0.1315, grad_fn=<MseLossBackward>)


INFO:root:Training 154/286 done
INFO:root:Training 155/286 done


tensor([[0., 1.]])
tensor([[0.2996, 0.6486]], grad_fn=<AddmmBackward>)
tensor(0.1066, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6621, 0.3175]], grad_fn=<AddmmBackward>)
tensor(0.1075, grad_fn=<MseLossBackward>)


INFO:root:Training 156/286 done
INFO:root:Training 157/286 done


tensor([[0., 1.]])
tensor([[0.2750, 0.6785]], grad_fn=<AddmmBackward>)
tensor(0.0895, grad_fn=<MseLossBackward>)


INFO:root:Training 158/286 done


tensor([[1., 0.]])
tensor([[0.6882, 0.3144]], grad_fn=<AddmmBackward>)
tensor(0.0980, grad_fn=<MseLossBackward>)


INFO:root:Training 159/286 done


tensor([[0., 1.]])
tensor([[0.1873, 0.7978]], grad_fn=<AddmmBackward>)
tensor(0.0380, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.5352, 0.4557]], grad_fn=<AddmmBackward>)
tensor(0.2119, grad_fn=<MseLossBackward>)


INFO:root:Training 160/286 done
INFO:root:Training 161/286 done


tensor([[0., 1.]])
tensor([[0.3835, 0.5787]], grad_fn=<AddmmBackward>)
tensor(0.1623, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.8430, 0.2756]], grad_fn=<AddmmBackward>)
tensor(0.0503, grad_fn=<MseLossBackward>)


INFO:root:Training 162/286 done
INFO:root:Training 163/286 done


tensor([[0., 1.]])
tensor([[0.3689, 0.5785]], grad_fn=<AddmmBackward>)
tensor(0.1569, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6040, 0.3932]], grad_fn=<AddmmBackward>)
tensor(0.1557, grad_fn=<MseLossBackward>)


INFO:root:Training 164/286 done
INFO:root:Training 165/286 done


tensor([[0., 1.]])
tensor([[0.4218, 0.5462]], grad_fn=<AddmmBackward>)
tensor(0.1919, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[-1.5248,  3.0017]], grad_fn=<AddmmBackward>)
tensor(7.6924, grad_fn=<MseLossBackward>)


INFO:root:Training 166/286 done
INFO:root:Training 167/286 done


tensor([[0., 1.]])
tensor([[0.7936, 0.1463]], grad_fn=<AddmmBackward>)
tensor(0.6793, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.7135, 0.2008]], grad_fn=<AddmmBackward>)
tensor(0.0612, grad_fn=<MseLossBackward>)


INFO:root:Training 168/286 done
INFO:root:Training 169/286 done


tensor([[0., 1.]])
tensor([[0.7499, 0.1881]], grad_fn=<AddmmBackward>)
tensor(0.6107, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.6704, 0.2405]], grad_fn=<AddmmBackward>)
tensor(0.0832, grad_fn=<MseLossBackward>)


INFO:root:Training 170/286 done
INFO:root:Training 171/286 done


tensor([[0., 1.]])
tensor([[0.7102, 0.2250]], grad_fn=<AddmmBackward>)
tensor(0.5525, grad_fn=<MseLossBackward>)


INFO:root:Training 172/286 done


tensor([[1., 0.]])
tensor([[0.6229, 0.2739]], grad_fn=<AddmmBackward>)
tensor(0.1086, grad_fn=<MseLossBackward>)


INFO:root:Training 173/286 done


tensor([[0., 1.]])
tensor([[0.6831, 0.2539]], grad_fn=<AddmmBackward>)
tensor(0.5116, grad_fn=<MseLossBackward>)


INFO:root:Training 174/286 done


tensor([[1., 0.]])
tensor([[0.5903, 0.3057]], grad_fn=<AddmmBackward>)
tensor(0.1307, grad_fn=<MseLossBackward>)


INFO:root:Training 175/286 done


tensor([[0., 1.]])
tensor([[0.6696, 0.2783]], grad_fn=<AddmmBackward>)
tensor(0.4846, grad_fn=<MseLossBackward>)


INFO:root:Training 176/286 done


tensor([[1., 0.]])
tensor([[0.5959, 0.3241]], grad_fn=<AddmmBackward>)
tensor(0.1342, grad_fn=<MseLossBackward>)


INFO:root:Training 177/286 done


tensor([[0., 1.]])
tensor([[0.6567, 0.3002]], grad_fn=<AddmmBackward>)
tensor(0.4605, grad_fn=<MseLossBackward>)


INFO:root:Training 178/286 done


tensor([[1., 0.]])
tensor([[0.5885, 0.3439]], grad_fn=<AddmmBackward>)
tensor(0.1438, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.6365, 0.3221]], grad_fn=<AddmmBackward>)
tensor(0.4323, grad_fn=<MseLossBackward>)


INFO:root:Training 179/286 done
INFO:root:Training 180/286 done


tensor([[1., 0.]])
tensor([[0.5849, 0.3615]], grad_fn=<AddmmBackward>)
tensor(0.1515, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.6172, 0.3399]], grad_fn=<AddmmBackward>)
tensor(0.4083, grad_fn=<MseLossBackward>)


INFO:root:Training 181/286 done
INFO:root:Training 182/286 done


tensor([[1., 0.]])
tensor([[0.5695, 0.3788]], grad_fn=<AddmmBackward>)
tensor(0.1644, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.6124, 0.3553]], grad_fn=<AddmmBackward>)
tensor(0.3954, grad_fn=<MseLossBackward>)


INFO:root:Training 183/286 done
INFO:root:Training 184/286 done


tensor([[1., 0.]])
tensor([[0.5552, 0.3942]], grad_fn=<AddmmBackward>)
tensor(0.1766, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5939, 0.3699]], grad_fn=<AddmmBackward>)
tensor(0.3749, grad_fn=<MseLossBackward>)


INFO:root:Training 185/286 done
INFO:root:Training 186/286 done


tensor([[1., 0.]])
tensor([[0.5511, 0.4071]], grad_fn=<AddmmBackward>)
tensor(0.1836, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5852, 0.3815]], grad_fn=<AddmmBackward>)
tensor(0.3625, grad_fn=<MseLossBackward>)


INFO:root:Training 187/286 done
INFO:root:Training 188/286 done


tensor([[1., 0.]])
tensor([[0.5385, 0.4177]], grad_fn=<AddmmBackward>)
tensor(0.1937, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5774, 0.3941]], grad_fn=<AddmmBackward>)
tensor(0.3502, grad_fn=<MseLossBackward>)


INFO:root:Training 189/286 done
INFO:root:Training 190/286 done


tensor([[1., 0.]])
tensor([[0.5333, 0.4276]], grad_fn=<AddmmBackward>)
tensor(0.2004, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5661, 0.4048]], grad_fn=<AddmmBackward>)
tensor(0.3374, grad_fn=<MseLossBackward>)


INFO:root:Training 191/286 done
INFO:root:Training 192/286 done


tensor([[1., 0.]])
tensor([[0.5268, 0.4373]], grad_fn=<AddmmBackward>)
tensor(0.2076, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5610, 0.4135]], grad_fn=<AddmmBackward>)
tensor(0.3294, grad_fn=<MseLossBackward>)


INFO:root:Training 193/286 done
INFO:root:Training 194/286 done


tensor([[1., 0.]])
tensor([[0.5194, 0.4452]], grad_fn=<AddmmBackward>)
tensor(0.2146, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5550, 0.4219]], grad_fn=<AddmmBackward>)
tensor(0.3211, grad_fn=<MseLossBackward>)


INFO:root:Training 195/286 done
INFO:root:Training 196/286 done


tensor([[1., 0.]])
tensor([[0.5191, 0.4523]], grad_fn=<AddmmBackward>)
tensor(0.2179, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5551, 0.4298]], grad_fn=<AddmmBackward>)
tensor(0.3166, grad_fn=<MseLossBackward>)


INFO:root:Training 197/286 done
INFO:root:Training 198/286 done


tensor([[1., 0.]])
tensor([[0.5172, 0.4614]], grad_fn=<AddmmBackward>)
tensor(0.2230, grad_fn=<MseLossBackward>)


INFO:root:Training 199/286 done


tensor([[0., 1.]])
tensor([[0.5492, 0.4345]], grad_fn=<AddmmBackward>)
tensor(0.3107, grad_fn=<MseLossBackward>)


INFO:root:Training 200/286 done


tensor([[1., 0.]])
tensor([[0.5110, 0.4664]], grad_fn=<AddmmBackward>)
tensor(0.2283, grad_fn=<MseLossBackward>)


INFO:root:Training 201/286 done


tensor([[0., 1.]])
tensor([[0.5416, 0.4394]], grad_fn=<AddmmBackward>)
tensor(0.3038, grad_fn=<MseLossBackward>)


INFO:root:Training 202/286 done


tensor([[1., 0.]])
tensor([[0.5110, 0.4694]], grad_fn=<AddmmBackward>)
tensor(0.2298, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5369, 0.4448]], grad_fn=<AddmmBackward>)
tensor(0.2983, grad_fn=<MseLossBackward>)


INFO:root:Training 203/286 done
INFO:root:Training 204/286 done


tensor([[1., 0.]])
tensor([[0.5105, 0.4754]], grad_fn=<AddmmBackward>)
tensor(0.2328, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5392, 0.4493]], grad_fn=<AddmmBackward>)
tensor(0.2970, grad_fn=<MseLossBackward>)


INFO:root:Training 205/286 done
INFO:root:Training 206/286 done


tensor([[1., 0.]])
tensor([[0.4995, 0.4784]], grad_fn=<AddmmBackward>)
tensor(0.2397, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5349, 0.4528]], grad_fn=<AddmmBackward>)
tensor(0.2928, grad_fn=<MseLossBackward>)


INFO:root:Training 207/286 done
INFO:root:Training 208/286 done


tensor([[1., 0.]])
tensor([[0.5009, 0.4820]], grad_fn=<AddmmBackward>)
tensor(0.2407, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5268, 0.4567]], grad_fn=<AddmmBackward>)
tensor(0.2863, grad_fn=<MseLossBackward>)


INFO:root:Training 209/286 done
INFO:root:Training 210/286 done


tensor([[1., 0.]])
tensor([[0.5025, 0.4863]], grad_fn=<AddmmBackward>)
tensor(0.2420, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5242, 0.4616]], grad_fn=<AddmmBackward>)
tensor(0.2823, grad_fn=<MseLossBackward>)


INFO:root:Training 211/286 done
INFO:root:Training 212/286 done


tensor([[1., 0.]])
tensor([[0.4813, 0.4886]], grad_fn=<AddmmBackward>)
tensor(0.2539, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5271, 0.4627]], grad_fn=<AddmmBackward>)
tensor(0.2833, grad_fn=<MseLossBackward>)


INFO:root:Training 213/286 done
INFO:root:Training 214/286 done


tensor([[1., 0.]])
tensor([[0.4875, 0.4919]], grad_fn=<AddmmBackward>)
tensor(0.2523, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5244, 0.4647]], grad_fn=<AddmmBackward>)
tensor(0.2808, grad_fn=<MseLossBackward>)


INFO:root:Training 215/286 done
INFO:root:Training 216/286 done


tensor([[1., 0.]])
tensor([[0.5050, 0.4962]], grad_fn=<AddmmBackward>)
tensor(0.2456, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5299, 0.4708]], grad_fn=<AddmmBackward>)
tensor(0.2804, grad_fn=<MseLossBackward>)


INFO:root:Training 217/286 done
INFO:root:Training 218/286 done


tensor([[1., 0.]])
tensor([[0.5013, 0.4964]], grad_fn=<AddmmBackward>)
tensor(0.2475, grad_fn=<MseLossBackward>)


INFO:root:Training 219/286 done


tensor([[0., 1.]])
tensor([[0.5313, 0.4703]], grad_fn=<AddmmBackward>)
tensor(0.2814, grad_fn=<MseLossBackward>)


INFO:root:Training 220/286 done


tensor([[1., 0.]])
tensor([[0.4977, 0.4985]], grad_fn=<AddmmBackward>)
tensor(0.2504, grad_fn=<MseLossBackward>)


INFO:root:Training 221/286 done


tensor([[0., 1.]])
tensor([[0.5215, 0.4716]], grad_fn=<AddmmBackward>)
tensor(0.2756, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4822, 0.5000]], grad_fn=<AddmmBackward>)
tensor(0.2591, grad_fn=<MseLossBackward>)


INFO:root:Training 222/286 done
INFO:root:Training 223/286 done


tensor([[0., 1.]])
tensor([[0.5199, 0.4737]], grad_fn=<AddmmBackward>)
tensor(0.2736, grad_fn=<MseLossBackward>)


INFO:root:Training 224/286 done


tensor([[1., 0.]])
tensor([[0.4789, 0.5018]], grad_fn=<AddmmBackward>)
tensor(0.2617, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5280, 0.4738]], grad_fn=<AddmmBackward>)
tensor(0.2778, grad_fn=<MseLossBackward>)


INFO:root:Training 225/286 done
INFO:root:Training 226/286 done


tensor([[1., 0.]])
tensor([[0.4850, 0.5020]], grad_fn=<AddmmBackward>)
tensor(0.2586, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5199, 0.4733]], grad_fn=<AddmmBackward>)
tensor(0.2739, grad_fn=<MseLossBackward>)


INFO:root:Training 227/286 done
INFO:root:Training 228/286 done


tensor([[1., 0.]])
tensor([[0.4952, 0.5017]], grad_fn=<AddmmBackward>)
tensor(0.2533, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5215, 0.4770]], grad_fn=<AddmmBackward>)
tensor(0.2728, grad_fn=<MseLossBackward>)


INFO:root:Training 229/286 done
INFO:root:Training 230/286 done


tensor([[1., 0.]])
tensor([[0.4879, 0.5026]], grad_fn=<AddmmBackward>)
tensor(0.2575, grad_fn=<MseLossBackward>)


INFO:root:Training 231/286 done


tensor([[0., 1.]])
tensor([[0.5176, 0.4775]], grad_fn=<AddmmBackward>)
tensor(0.2704, grad_fn=<MseLossBackward>)


INFO:root:Training 232/286 done


tensor([[1., 0.]])
tensor([[0.5031, 0.5058]], grad_fn=<AddmmBackward>)
tensor(0.2514, grad_fn=<MseLossBackward>)


INFO:root:Training 233/286 done


tensor([[0., 1.]])
tensor([[0.5174, 0.4790]], grad_fn=<AddmmBackward>)
tensor(0.2696, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4953, 0.5092]], grad_fn=<AddmmBackward>)
tensor(0.2570, grad_fn=<MseLossBackward>)


INFO:root:Training 234/286 done
INFO:root:Training 235/286 done


tensor([[0., 1.]])
tensor([[0.5108, 0.4812]], grad_fn=<AddmmBackward>)
tensor(0.2650, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4924, 0.5096]], grad_fn=<AddmmBackward>)
tensor(0.2587, grad_fn=<MseLossBackward>)


INFO:root:Training 236/286 done
INFO:root:Training 237/286 done


tensor([[0., 1.]])
tensor([[0.5298, 0.4812]], grad_fn=<AddmmBackward>)
tensor(0.2749, grad_fn=<MseLossBackward>)


INFO:root:Training 238/286 done


tensor([[1., 0.]])
tensor([[0.5107, 0.5087]], grad_fn=<AddmmBackward>)
tensor(0.2491, grad_fn=<MseLossBackward>)


INFO:root:Training 239/286 done


tensor([[0., 1.]])
tensor([[0.5438, 0.4807]], grad_fn=<AddmmBackward>)
tensor(0.2827, grad_fn=<MseLossBackward>)


INFO:root:Training 240/286 done


tensor([[1., 0.]])
tensor([[0.4914, 0.5080]], grad_fn=<AddmmBackward>)
tensor(0.2584, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5236, 0.4791]], grad_fn=<AddmmBackward>)
tensor(0.2728, grad_fn=<MseLossBackward>)


INFO:root:Training 241/286 done
INFO:root:Training 242/286 done


tensor([[1., 0.]])
tensor([[0.4699, 0.5070]], grad_fn=<AddmmBackward>)
tensor(0.2691, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5030, 0.4798]], grad_fn=<AddmmBackward>)
tensor(0.2618, grad_fn=<MseLossBackward>)


INFO:root:Training 243/286 done
INFO:root:Training 244/286 done


tensor([[1., 0.]])
tensor([[0.4730, 0.5086]], grad_fn=<AddmmBackward>)
tensor(0.2682, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5156, 0.4807]], grad_fn=<AddmmBackward>)
tensor(0.2677, grad_fn=<MseLossBackward>)


INFO:root:Training 245/286 done
INFO:root:Training 246/286 done


tensor([[1., 0.]])
tensor([[0.4786, 0.5080]], grad_fn=<AddmmBackward>)
tensor(0.2649, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5163, 0.4810]], grad_fn=<AddmmBackward>)
tensor(0.2679, grad_fn=<MseLossBackward>)


INFO:root:Training 247/286 done
INFO:root:Training 248/286 done


tensor([[1., 0.]])
tensor([[0.4850, 0.5097]], grad_fn=<AddmmBackward>)
tensor(0.2625, grad_fn=<MseLossBackward>)


INFO:root:Training 249/286 done


tensor([[0., 1.]])
tensor([[0.5147, 0.4822]], grad_fn=<AddmmBackward>)
tensor(0.2665, grad_fn=<MseLossBackward>)


INFO:root:Training 250/286 done


tensor([[1., 0.]])
tensor([[0.4849, 0.5083]], grad_fn=<AddmmBackward>)
tensor(0.2619, grad_fn=<MseLossBackward>)


INFO:root:Training 251/286 done


tensor([[0., 1.]])
tensor([[0.5100, 0.4822]], grad_fn=<AddmmBackward>)
tensor(0.2641, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4790, 0.5086]], grad_fn=<AddmmBackward>)
tensor(0.2651, grad_fn=<MseLossBackward>)


INFO:root:Training 252/286 done
INFO:root:Training 253/286 done


tensor([[0., 1.]])
tensor([[0.5069, 0.4828]], grad_fn=<AddmmBackward>)
tensor(0.2622, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4883, 0.5091]], grad_fn=<AddmmBackward>)
tensor(0.2605, grad_fn=<MseLossBackward>)


INFO:root:Training 254/286 done
INFO:root:Training 255/286 done


tensor([[0., 1.]])
tensor([[0.5110, 0.4828]], grad_fn=<AddmmBackward>)
tensor(0.2643, grad_fn=<MseLossBackward>)
tensor([[1., 0.]])
tensor([[0.4943, 0.5099]], grad_fn=<AddmmBackward>)
tensor(0.2579, grad_fn=<MseLossBackward>)


INFO:root:Training 256/286 done
INFO:root:Training 257/286 done


tensor([[0., 1.]])
tensor([[0.5035, 0.4852]], grad_fn=<AddmmBackward>)
tensor(0.2593, grad_fn=<MseLossBackward>)


INFO:root:Training 258/286 done


tensor([[1., 0.]])
tensor([[0.4734, 0.5127]], grad_fn=<AddmmBackward>)
tensor(0.2701, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5033, 0.4859]], grad_fn=<AddmmBackward>)
tensor(0.2588, grad_fn=<MseLossBackward>)


INFO:root:Training 259/286 done
INFO:root:Training 260/286 done


tensor([[1., 0.]])
tensor([[0.4702, 0.5133]], grad_fn=<AddmmBackward>)
tensor(0.2721, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.4921, 0.4863]], grad_fn=<AddmmBackward>)
tensor(0.2530, grad_fn=<MseLossBackward>)


INFO:root:Training 261/286 done
INFO:root:Training 262/286 done


tensor([[1., 0.]])
tensor([[0.4887, 0.5135]], grad_fn=<AddmmBackward>)
tensor(0.2626, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5317, 0.4855]], grad_fn=<AddmmBackward>)
tensor(0.2737, grad_fn=<MseLossBackward>)


INFO:root:Training 263/286 done
INFO:root:Training 264/286 done


tensor([[1., 0.]])
tensor([[0.4867, 0.5125]], grad_fn=<AddmmBackward>)
tensor(0.2630, grad_fn=<MseLossBackward>)


INFO:root:Training 265/286 done


tensor([[0., 1.]])
tensor([[0.5325, 0.4845]], grad_fn=<AddmmBackward>)
tensor(0.2746, grad_fn=<MseLossBackward>)


INFO:root:Training 266/286 done


tensor([[1., 0.]])
tensor([[0.4855, 0.5124]], grad_fn=<AddmmBackward>)
tensor(0.2636, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5212, 0.4840]], grad_fn=<AddmmBackward>)
tensor(0.2690, grad_fn=<MseLossBackward>)


INFO:root:Training 267/286 done
INFO:root:Training 268/286 done


tensor([[1., 0.]])
tensor([[0.4866, 0.5108]], grad_fn=<AddmmBackward>)
tensor(0.2622, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5117, 0.4849]], grad_fn=<AddmmBackward>)
tensor(0.2636, grad_fn=<MseLossBackward>)


INFO:root:Training 269/286 done
INFO:root:Training 270/286 done


tensor([[1., 0.]])
tensor([[0.4926, 0.5121]], grad_fn=<AddmmBackward>)
tensor(0.2599, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5309, 0.4849]], grad_fn=<AddmmBackward>)
tensor(0.2736, grad_fn=<MseLossBackward>)


INFO:root:Training 271/286 done
INFO:root:Training 272/286 done


tensor([[1., 0.]])
tensor([[0.4913, 0.5097]], grad_fn=<AddmmBackward>)
tensor(0.2593, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5397, 0.4844]], grad_fn=<AddmmBackward>)
tensor(0.2785, grad_fn=<MseLossBackward>)


INFO:root:Training 273/286 done
INFO:root:Training 274/286 done


tensor([[1., 0.]])
tensor([[0.4814, 0.5121]], grad_fn=<AddmmBackward>)
tensor(0.2656, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5087, 0.4849]], grad_fn=<AddmmBackward>)
tensor(0.2620, grad_fn=<MseLossBackward>)


INFO:root:Training 275/286 done
INFO:root:Training 276/286 done


tensor([[1., 0.]])
tensor([[0.4756, 0.5126]], grad_fn=<AddmmBackward>)
tensor(0.2689, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5191, 0.4849]], grad_fn=<AddmmBackward>)
tensor(0.2674, grad_fn=<MseLossBackward>)


INFO:root:Training 277/286 done
INFO:root:Training 278/286 done


tensor([[1., 0.]])
tensor([[0.4909, 0.5110]], grad_fn=<AddmmBackward>)
tensor(0.2601, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5249, 0.4852]], grad_fn=<AddmmBackward>)
tensor(0.2703, grad_fn=<MseLossBackward>)


INFO:root:Training 279/286 done
INFO:root:Training 280/286 done


tensor([[1., 0.]])
tensor([[0.4856, 0.5131]], grad_fn=<AddmmBackward>)
tensor(0.2639, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5119, 0.4840]], grad_fn=<AddmmBackward>)
tensor(0.2641, grad_fn=<MseLossBackward>)


INFO:root:Training 281/286 done
INFO:root:Training 282/286 done


tensor([[1., 0.]])
tensor([[0.4790, 0.5120]], grad_fn=<AddmmBackward>)
tensor(0.2668, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5097, 0.4851]], grad_fn=<AddmmBackward>)
tensor(0.2624, grad_fn=<MseLossBackward>)


INFO:root:Training 283/286 done
INFO:root:Training 284/286 done


tensor([[1., 0.]])
tensor([[0.4876, 0.5118]], grad_fn=<AddmmBackward>)
tensor(0.2622, grad_fn=<MseLossBackward>)
tensor([[0., 1.]])
tensor([[0.5060, 0.4860]], grad_fn=<AddmmBackward>)
tensor(0.2601, grad_fn=<MseLossBackward>)


INFO:root:Training 285/286 done
