# layers.py

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math

class myRNN(nn.Module):
    def __init__(self,input_size,hidden_size,num_layers,dp=0,bd=False):
        super(myRNN,self).__init__()
        self.hidden_dim = hidden_size
        self.n_layers = num_layers
        self.RNN = nn.GRU(input_size = input_size,hidden_size=hidden_size,num_layers=num_layers,dropout=dp,batch_first=True,bidirectional=bd)
       
    def forward(self,x,h0=None):
        out,h = self.RNN(x,h0)
        return out,h

class attention(nn.Module):
    def __init__(self,qembed_dim, kembed_dim=None, vembed_dim=None, hidden_dim=None, out_dim=None, dropout=0):
        super(attention, self).__init__()
        if kembed_dim is None:
            kembed_dim = qembed_dim
        if hidden_dim is None:
            hidden_dim = kembed_dim
        if out_dim is None:
            out_dim = kembed_dim
        if vembed_dim is None:
            vembed_dim = kembed_dim
            
        self.qembed_dim = qembed_dim
        self.kembed_dim = kembed_dim
        self.vembed_dim = vembed_dim
        
        self.hidden_dim = hidden_dim
        self.for_key = nn.Linear(kembed_dim,hidden_dim)
        self.for_query = nn.Linear(qembed_dim,hidden_dim)
        self.for_value = nn.Linear(vembed_dim,hidden_dim)
        self.normalise_factor = hidden_dim**(1/2)
    
    def mask_score(self,s,m):
        for i in range(s.size()[0]):
            for j in range(s.size()[1]):
                for k in range(s.size()[2]):
                    if m[i][j][k] == 0:
                        s[i][j][k] = float('-inf')   #So that after softmax, 0 weight is given to it
        return s
    
    def forward(self,key,query,mask=None):
        if len(query.shape) == 1:
            query = torch.unsqueeze(query, dim=0)
        if len(key.shape) == 1:
            key = torch.unsqueeze(key, dim=0)
            
        if len(query.shape) == 2:
            query = torch.unsqueeze(query, dim=1)
        if len(key.shape) == 2:
            key = torch.unsqueeze(key, dim=1)
            
        new_query = self.for_query(query)
        new_key = self.for_key(key)
        new_value = self.for_value(key)
        
        score = torch.bmm(new_query,new_key.permute(0,2,1))/self.normalise_factor
        
        if mask != None:
            score = self.mask_score(score,mask)
            
        score = F.softmax(score,-1)
        score.data[score!=score] = 0         #removing nan values
        
        output = torch.bmm(score,new_value)
        return output,score

class interact(nn.Module):
    def __init__(self,hidden_dim,weight_matrix,utt2idx):
        super(interact, self).__init__()
        self.hidden_size = hidden_dim

        self.embedding, num_embeddings, embedding_dim = create_emb_layer(weight_matrix,utt2idx)
        self.rnnD = myRNN(embedding_dim, hidden_dim,1)   #Dialogue
        self.drop1 = nn.Dropout()
        
        self.rnnG = myRNN(embedding_dim*3, hidden_dim,1)   #Global level
        self.drop2 = nn.Dropout()
        
        self.attn = attention(embedding_dim)
        
        self.rnnS = myRNN(embedding_dim*2, embedding_dim*2,1)   #Speaker representation
        self.drop3 = nn.Dropout()

    def forward(self, chat_ids, speaker_info, sp_dialogues, sp_ind, inputs):
        whole_dialogue_indices = inputs
        
        bert_embs = self.embedding(whole_dialogue_indices)
               
        dialogue, h1 = self.rnnD(bert_embs)    #Get global level representation
        dialogue = self.drop1(dialogue)

        device = inputs.device
        
        fop = torch.zeros((dialogue.size()[0],dialogue.size()[1],dialogue.size()[2])).to(device)
        fop2 = torch.zeros((dialogue.size()[0],dialogue.size()[1],dialogue.size()[2]*3)).to(device)
        op = torch.zeros((dialogue.size()[0],dialogue.size()[1],dialogue.size()[2])).to(device)
        spop = torch.zeros((dialogue.size()[0],dialogue.size()[1],dialogue.size()[2]*2)).to(device)
                    
        h0 = torch.randn(1, 1, self.hidden_size*2).to(device)
        d_h = torch.randn(1, 1, self.hidden_size).to(device)
        attn_h = torch.randn(1, 1, self.hidden_size).to(device)
        
        for b in range(dialogue.size()[0]):
            d_id = chat_ids[b]
            speaker_hidden_states = {}
            for s in range(dialogue.size()[1]):
                fop = op.clone()
                
                current_utt = dialogue[b][s]
                
                current_speaker = speaker_info[d_id][s]
                
                if current_speaker not in speaker_hidden_states:
                    speaker_hidden_states[current_speaker] = h0
                
                h = speaker_hidden_states[current_speaker]
                current_utt_emb = torch.unsqueeze(torch.unsqueeze(current_utt,0),0)
                
                key = fop[b][:s+1].clone()
                key = torch.unsqueeze(key,0)
                
                if s == 0:
                    tmp = torch.cat([attn_h,current_utt_emb],-1).to(device)
                    spop[b][s], h_new = self.rnnS(tmp,h)
                else:
                    query = current_utt_emb
                    attn_op,_ = self.attn(key,query)
                    
                    tmp = torch.cat([attn_op,current_utt_emb],-1).to(device)
                    spop[b][s], h_new = self.rnnS(tmp,h)
                
                spop[b][s] = spop[b][s].add(tmp)        # Residual Connection        
                speaker_hidden_states[current_speaker] = h_new
                
                fop2[b][s] = torch.cat([spop[b][s],dialogue[b][s]],-1)
                tmp = torch.unsqueeze(torch.unsqueeze(fop2[b][s].clone(),0),0)
                op[b][s],d_h = self.rnnG(tmp,d_h)

        return op,spop
    
class fc_e(nn.Module):
    def __init__(self,inp_dim,op_dim):
        super(fc_e,self).__init__()
        self.linear1 = nn.Linear(inp_dim,int(inp_dim/2))
        self.drop1 = nn.Dropout()
        
        self.linear2 = nn.Linear(int(inp_dim/2),int(inp_dim/4))
        self.drop2 = nn.Dropout(0.6)
        
        self.linear3 = nn.Linear(int(inp_dim/4),op_dim)
        self.drop3 = nn.Dropout(0.7)
    def forward(self,x):
        ip = x.float()
    
        op = self.linear1(ip)
        op = self.drop1(op)
        
        op = self.linear2(op)
        op = self.drop2(op)
        
        op = self.linear3(op)
        op = self.drop3(op)
        
        return op

class fc_t(nn.Module):
    def __init__(self,inp_dim,op_dim):
        super(fc_t,self).__init__()
        self.linear1 = nn.Linear(inp_dim,inp_dim)
        self.drop1 = nn.Dropout(0.7)
        
        self.linear2 = nn.Linear(inp_dim,inp_dim)
        self.drop2 = nn.Dropout(0.7)
        
        self.linear3 = nn.Linear(inp_dim,int(inp_dim/2))
        self.drop3 = nn.Dropout(0.7)
        
        self.linear4 = nn.Linear(int(inp_dim/2),int(inp_dim/4))
        self.drop4 = nn.Dropout(0.7)
        
        self.linear5 = nn.Linear(int(inp_dim/4),op_dim)
        self.drop5 = nn.Dropout(0.7)
    def forward(self,x):
        ip = x.float()
    
        op = self.linear1(ip)
        op = self.drop1(op)
        
        op = self.linear2(ip)
        op = self.drop2(op)
        
        op = self.linear3(ip)
        op = self.drop3(op)
        
        op = self.linear4(op)
        op = self.drop4(op)
        
        op = self.linear5(op)
        op = self.drop5(op)
        
        return op
    
class maskedattn(nn.Module):
    def __init__(self,batch_size, s_len, emb_size):
        super(maskedattn,self).__init__()
        self.b_len = batch_size
        self.s_len = s_len
        self.emb_size = emb_size
        self.attn = attention(emb_size*2, kembed_dim=emb_size, out_dim=emb_size)
    
    def create_mask(self,n):
        mask = torch.zeros((1, self.s_len, self.emb_size), dtype=torch.uint8)
        mask[:n+1] = torch.ones((self.emb_size), dtype=torch.uint8)
        mask = mask.repeat(self.b_len,1,1)
        return mask
        
    def forward(self,key,query):
        device = key.device

        ops = torch.zeros([key.size()[0],key.size()[1], key.size()[2]], dtype=torch.float32).to(device)
        for i in range(key.size()[1]):
          mask = self.create_mask(i)
          op,_ = self.attn(key,query,mask=mask)
          for b in range(op.size()[0]):
            ops[b][i] = op[b][i]
        return ops
    
class memnet(nn.Module):
  def __init__(self,num_hops,hidden_size,batch_size,seq_len):
    super(memnet,self).__init__()
    self.num_hops = num_hops
    self.rnn = myRNN(hidden_size, hidden_size, 1)
    self.masked_attention = maskedattn(batch_size,seq_len,hidden_size)
  
  def forward(self,globl,spl):
    X = globl
    for hop in range(self.num_hops):
      dialogue,h = self.rnn(X)
      X = self.masked_attention(dialogue,spl)
    return X

class pool(nn.Module):
    def __init__(self,mode="mean"):
        super(pool,self).__init__()
        self.mode = mode
    def forward(self,x):
        device = x.device
        op = torch.zeros((x.size()[0],x.size()[1],x.size()[2])).to(device)
        for b in range(x.size()[0]):
            this_tensor = []
            for s in range(x.size()[1]):
                this_tensor.append(x[b][s])
                if self.mode == "mean":
                    op[b][s] = torch.mean(torch.stack(this_tensor),0)
                elif self.mode == "max":
                    op[b][s],_ = torch.max(torch.stack(this_tensor),0)
                elif self.mode == "sum":
                    op[b][s] = torch.sum(torch.stack(this_tensor),0)
                else:
                    print("Error: Mode can be either mean or max only")
        return op

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0), :]
        return self.dropout(x)

# Pickle Data Loader.py

In [2]:
pickle_folder_path = "/kaggle/input/meld-pickles/Pickles/"

import pickle

def load_erc():
    with open(pickle_folder_path + "idx2utt.pickle","rb") as f:
        idx2utt = pickle.load(f)
    with open(pickle_folder_path + "utt2idx.pickle","rb") as f:
        utt2idx = pickle.load(f)
        
    with open(pickle_folder_path + "idx2emo.pickle","rb") as f:
        idx2emo = pickle.load(f)
    with open(pickle_folder_path + "emo2idx.pickle","rb") as f:
        emo2idx = pickle.load(f)
        
    with open(pickle_folder_path + "idx2speaker.pickle","rb") as f:
        idx2speaker = pickle.load(f)
    with open(pickle_folder_path + "speaker2idx.pickle","rb") as f:
        speaker2idx = pickle.load(f)

    with open(pickle_folder_path + "weight_matrix.pickle","rb") as f:
        weight_matrix = pickle.load(f)

    with open(pickle_folder_path + "train_data.pickle","rb") as f:
        my_dataset_train = pickle.load(f)
        
    with open(pickle_folder_path + "test_data.pickle","rb") as f:
        my_dataset_test = pickle.load(f)
        
    with open(pickle_folder_path + "final_speaker_info.pickle","rb") as f:
        final_speaker_info = pickle.load(f)
        
    with open(pickle_folder_path + "final_speaker_dialogues.pickle","rb") as f:
        final_speaker_dialogues = pickle.load(f)
        
    with open(pickle_folder_path + "final_speaker_emotions.pickle","rb") as f:
        final_speaker_emotions = pickle.load(f)
        
    with open(pickle_folder_path + "final_speaker_indices.pickle","rb") as f:
        final_speaker_indices = pickle.load(f)
        
    with open(pickle_folder_path + "final_utt_len.pickle","rb") as f:
        final_utt_len = pickle.load(f)

    return idx2utt, utt2idx, idx2emo, emo2idx, idx2speaker,\
        speaker2idx, weight_matrix, my_dataset_train, my_dataset_test,\
        final_speaker_info, final_speaker_dialogues, final_speaker_emotions,\
        final_speaker_indices, final_utt_len

def load_efr():
    with open(pickle_folder_path + "idx2utt.pickle","rb") as f:
        idx2utt = pickle.load(f)
    with open(pickle_folder_path + "utt2idx.pickle","rb") as f:
        utt2idx = pickle.load(f)
        
    with open(pickle_folder_path + "idx2emo.pickle","rb") as f:
        idx2emo = pickle.load(f)
    with open(pickle_folder_path + "emo2idx.pickle","rb") as f:
        emo2idx = pickle.load(f)
        
    with open(pickle_folder_path + "idx2speaker.pickle","rb") as f:
        idx2speaker = pickle.load(f)
    with open(pickle_folder_path + "speaker2idx.pickle","rb") as f:
        speaker2idx = pickle.load(f)

    with open(pickle_folder_path + "weight_matrix.pickle","rb") as f:
        weight_matrix = pickle.load(f)

    with open(pickle_folder_path + "train_data_trig.pickle","rb") as f:
        my_dataset_train = pickle.load(f)

    with open(pickle_folder_path + "test_data_trig.pickle","rb") as f:
        my_dataset_test = pickle.load(f)
        
    with open(pickle_folder_path + "global_speaker_info_trig.pickle","rb") as f:
        global_speaker_info = pickle.load(f)
        
    with open(pickle_folder_path + "speaker_dialogues_trig.pickle","rb") as f:
        speaker_dialogues = pickle.load(f)
        
    with open(pickle_folder_path + "speaker_emotions_trig.pickle","rb") as f:
        speaker_emotions = pickle.load(f)
        
    with open(pickle_folder_path + "speaker_indices_trig.pickle","rb") as f:
        speaker_indices = pickle.load(f)
        
    with open(pickle_folder_path + "utt_len_trig.pickle","rb") as f:
        utt_len = pickle.load(f)
        
    with open(pickle_folder_path + "global_speaker_info_test_trig.pickle","rb") as f:
        global_speaker_info_test = pickle.load(f)
        
    with open(pickle_folder_path + "speaker_dialogues_test_trig.pickle","rb") as f:
        speaker_dialogues_test = pickle.load(f)
        
    with open(pickle_folder_path + "speaker_emotions_test_trig.pickle","rb") as f:
        speaker_emotions_test = pickle.load(f)
        
    with open(pickle_folder_path + "speaker_indices_test_trig.pickle","rb") as f:
        speaker_indices_test = pickle.load(f)
        
    with open(pickle_folder_path + "utt_len_test_trig.pickle","rb") as f:
        utt_len_test = pickle.load(f)

    return idx2utt, utt2idx, idx2emo, emo2idx, idx2speaker,\
        speaker2idx, weight_matrix, my_dataset_train, my_dataset_test,\
        global_speaker_info, speaker_dialogues, speaker_emotions, \
        speaker_indices, utt_len, global_speaker_info_test, speaker_dialogues_test, \
        speaker_emotions_test, speaker_indices_test, utt_len_test

# utils.py

In [3]:
import torch.nn as nn

##Source: https://medium.com/@martinpella/how-to-use-pre-trained-word-embeddings-in-pytorch-71ca59249f76
def create_emb_layer(weights_matrix, utt2idx, non_trainable=False):
    num_embeddings, embedding_dim = weights_matrix.size()
    emb_layer = nn.Embedding(num_embeddings, embedding_dim, padding_idx=utt2idx["<pad>"])
    emb_layer.load_state_dict({'weight': weights_matrix})
    if non_trainable:
        emb_layer.weight.requires_grad = False
    return emb_layer, num_embeddings, embedding_dim

# models.py

In [4]:
import torch
from torch.nn import TransformerEncoder, TransformerEncoderLayer

class ERC_MMN(nn.Module):
    def __init__(self,hidden_size,weight_matrix,utt2idx,batch_size,seq_len):
        super(ERC_MMN,self).__init__()
        self.ia = interact(hidden_size,weight_matrix,utt2idx)
        self.mn = memnet(4,hidden_size,batch_size,seq_len)
        self.pool = pool()
        
        self.rnn_c = myRNN(hidden_size*3,hidden_size*2,1)
        
        self.rnn_e = myRNN(hidden_size*2,hidden_size*2,1)
                
        self.linear1 = fc_e(hidden_size*2,7)

    def forward(self,c_ids,speaker_info,sp_dialogues,sp_em,sp_ind,x1,mode="train"):
        glob, splvl = self.ia(c_ids,speaker_info,sp_dialogues,sp_ind,x1)

        op = self.mn(glob,splvl)
        op = self.pool(op)

        op = torch.cat([splvl,op],dim=2)

        rnn_c_op,_ = self.rnn_c(op)

        rnn_e_op,_ = self.rnn_e(rnn_c_op)
        fip = rnn_e_op.add(rnn_c_op)      # Residual Connection
        fop1 = self.linear1(fip)

        return fip,fop1

class EFR_TX(nn.Module):
    def __init__(self, weight_matrix, utt2idx, nclass, ninp, count_speakers, nsp, nhead, nhid, nlayers, device, dropout=0.5):
        super(EFR_TX, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder, num_embeddings, embedding_dim = create_emb_layer(weight_matrix, utt2idx)
        self.ninp = ninp
        self.decoder = nn.Linear(2*ninp, nclass)
        self.speakers_embedding = torch.nn.Embedding(count_speakers, nsp)

        self.init_weights()
        self.device = device

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src, d_ids, sp_ids, ut_len):
        device = 'cuda'
        torch.set_default_device('cuda')
        if self.src_mask is None or self.src_mask.size(0) != len(src):
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask

        # Old Code
        # src = self.encoder(src) * math.sqrt(self.ninp)
        # New
        src = self.encoder(src)
        new_src = torch.zeros(src.shape[0],src.shape[1],self.ninp)
        for ix1,mat in enumerate(src):
            for ix2,vec in enumerate(mat):
                new_src[ix1][ix2] = torch.cat([self.speakers_embedding(torch.tensor(sp_ids[ix1][ix2], device=device, dtype=torch.long)), src[ix1][ix2]],-1)
        src = new_src
        src = src * math.sqrt(self.ninp)        
        
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, self.src_mask)
        
        decoder_ip = torch.zeros(output.size()[0],output.size()[1],output.size()[2]*2).to(self.device)
        for b in range(output.size()[0]):
            d_id = d_ids[b][0]
            main_utt = output[b][ut_len[d_id]-1]
            for s in range(ut_len[d_id]):
                this_utt = output[b][s]
                decoder_ip[b][s] = torch.cat([this_utt,main_utt],-1)
        
        output = self.decoder(decoder_ip)
        
        return decoder_ip,output

class ERC_true_EFR(nn.Module):
    def __init__(self, weight_matrix, utt2idx, nclass, ninp, nhead, nhid, nlayers, dropout=0.5):
        super(ERC_true_EFR, self).__init__()
        self.model_type = 'Transformer'
        self.src_mask = None
        self.pos_encoder = PositionalEncoding(ninp, dropout)
        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
        self.encoder, num_embeddings, embedding_dim = create_emb_layer(weight_matrix,utt2idx)
        
        self.emoGRU = myRNN(7,100,1)
        self.ninp = ninp
        self.decoder = nn.Linear(2*ninp+100, nclass)

        self.init_weights()

    def _generate_square_subsequent_mask(self, sz):
        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
        return mask

    def init_weights(self):
        initrange = 0.1
        self.encoder.weight.data.uniform_(-initrange, initrange)
        self.decoder.bias.data.zero_()
        self.decoder.weight.data.uniform_(-initrange, initrange)

    def forward(self, src, em_seq, d_ids, ut_len):
        if self.src_mask is None or self.src_mask.size(0) != len(src):
            device = src.device
            mask = self._generate_square_subsequent_mask(len(src)).to(device)
            self.src_mask = mask

        src = self.encoder(src) * math.sqrt(self.ninp)
        src = self.pos_encoder(src)
        output = self.transformer_encoder(src, self.src_mask)
        
        emo_seq,_ = self.emoGRU(em_seq.float())
        
        decoder_ip = torch.zeros(output.size()[0],output.size()[1],output.size()[2]*2).cuda()
        for b in range(output.size()[0]):
            d_id = d_ids[b][0]
            main_utt = output[b][ut_len[d_id]-1]
            for s in range(ut_len[d_id]):
                this_utt = output[b][s]
                decoder_ip[b][s] = torch.cat([this_utt,main_utt],-1)
        
        decoder_ip = torch.cat([decoder_ip,emo_seq],-1)
        output = self.decoder(decoder_ip)
        
        return output

class ERC_EFR_multitask(nn.Module):
    def __init__(self,hidden_size,weight_matrix,utt2idx,batch_size,seq_len):
        super(ERC_EFR_multitask,self).__init__()
        self.ia = interact(hidden_size,weight_matrix,utt2idx)
        self.mn = memnet(4,hidden_size,batch_size,seq_len)
        self.pool = pool()
        
        self.rnn_c = myRNN(hidden_size*3,hidden_size*2,1)
        
        self.rnn_e = myRNN(hidden_size*2,hidden_size*2,1)
        self.rnn_t = myRNN(hidden_size*2,hidden_size,1)

        self.linear1 = fc_e(hidden_size*2,7)
        self.linear2 = fc_t(hidden_size*2,2)

    def forward(self,c_ids,speaker_info,sp_dialogues,sp_em,sp_ind,freeze,x1,mode="train"):
        speaker_emo = {}
        speaker_emo_distance = {}
        
        for d_id in c_ids:
            speaker_emo[d_id] = {}
            speaker_emo_distance[d_id] = {}
                    
        if freeze:
            with torch.no_grad():
                glob, splvl = self.ia(c_ids,speaker_info,sp_dialogues,sp_ind,x1)
        
                op = self.mn(glob,splvl)
                op = self.pool(op)

                op = torch.cat([splvl,op],dim=2)

                rnn_c_op,_ = self.rnn_c(op)

                rnn_e_op,_ = self.rnn_e(rnn_c_op)
                rnn_e_op = rnn_e_op.add(rnn_c_op)      # Residual Connection
                fop1 = self.linear1(rnn_e_op)
        else:
            glob, splvl = self.ia(c_ids,speaker_info,sp_dialogues,sp_ind,x1)
        
            op = self.mn(glob,splvl)
            op = self.pool(op)
            
            op = torch.cat([splvl,op],dim=2)

            rnn_c_op,_ = self.rnn_c(op)

            rnn_e_op,_ = self.rnn_e(rnn_c_op)
            rnn_e_op = rnn_e_op.add(rnn_c_op)      # Residual Connection
            fop1 = self.linear1(rnn_e_op)
        
        rnn_t_op,_ = self.rnn_t(rnn_c_op)

        fop2_final = []
        for b in range(rnn_t_op.size()[0]):
            d_id = c_ids[b]
            fop2_final_tmp = []
            for s in range(rnn_t_op.size()[1]):
                fop2_final_tmp_tmp = []
                concerned_utt = rnn_t_op[b][s]
                
                if s < 4:
                    r = s+1
                else:
                    r = 4
                
                for s2 in range(r,-1,-1):
                    this_utt = rnn_t_op[b][s-s2]
                    tmp = torch.cat((concerned_utt,this_utt),-1)
                    fop2 = self.linear2(tmp)

                    fop2_final_tmp_tmp.append(fop2)
                fop2_final_tmp.append(fop2_final_tmp_tmp)
            fop2_final.append(fop2_final_tmp)
        return fop1,fop2_final

class cascade(nn.Module):
    def __init__(self,hidden_size,nclasses):
        super(cascade,self).__init__()        
        self.linear = fc_e(hidden_size*4,nclasses)
    
    def forward(self,x1):
        op = self.linear(x1)
        return op

# Train EFR-TX.py

In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm.auto import tqdm
from torch.utils import data
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

batch_size = 1000
seq_len = 5
seq2_len = seq_len
emb_size = 768
hidden_size = 768
batch_first = True

torch.set_default_device('cuda')
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

idx2utt, utt2idx, idx2emo, emo2idx, idx2speaker,\
        speaker2idx, weight_matrix, my_dataset_train, my_dataset_test,\
        global_speaker_info, speaker_dialogues, speaker_emotions, \
        speaker_indices, utt_len, global_speaker_info_test, speaker_dialogues_test, \
        speaker_emotions_test, speaker_indices_test, utt_len_test = load_efr()
    
def get_train_test_loader(bs):
    print(len(my_dataset_train))
    train_data_iter = data.DataLoader(my_dataset_train, batch_size=bs)
    test_data_iter = data.DataLoader(my_dataset_test, batch_size=bs)
    
    return train_data_iter, test_data_iter
    
def train(model, train_data_loader, epochs):
    class_weights2 = torch.FloatTensor(weights2).to(device)
    criterion2 = nn.CrossEntropyLoss(weight=class_weights2,reduction='none').to(device)
    
    optimizer = torch.optim.Adam(model.parameters(),lr=1e-3)
    
    max_f1_2 = 0
   
    for epoch in tqdm(range(epochs)):
        print(list(model.parameters())[0])
        print("\n\n-------Epoch {}-------\n\n".format(epoch+1))
        model.train()
        
        avg_loss = 0
       
        y_true2 = []
        y_pred2 = []
            
        for i_batch, sample_batched in tqdm(enumerate(train_data_loader)):
            dialogue_ids = sample_batched[0].tolist()
            inputs = sample_batched[1].to(device)
            targets2 = sample_batched[3].to(device)
            
            # Creating the speaker_ids
            speaker_ids = []
            for d_ids_list in dialogue_ids:
              sp_id_list = [0] * len(d_ids_list)
              for ix, d_id in enumerate(d_ids_list):
                sp_id = global_speaker_info[d_id][0]
                sp_id_list[ix] = sp_id
              speaker_ids.append(sp_id_list)
            
            optimizer.zero_grad()
            
            _,outputs = model(inputs,dialogue_ids,speaker_ids,utt_len)
            
            loss = 0
            for b in range(outputs.size()[0]):
              loss2 = 0
              
              for s in range(utt_len[dialogue_ids[b][0]]):
                pred2 = outputs[b][s]
                pred_flip = torch.argmax(F.softmax(pred2.to(device),-1),-1)
                
                truth2 = targets2[b][s]

                y_pred2.append(pred_flip.item())
                y_true2.append(truth2.long().to(device).item())

                pred2_ = torch.unsqueeze(pred2,0)
                truth2_ = torch.unsqueeze(truth2,0)
                
                loss2 += criterion2(pred2_,truth2_)
              loss2 /= utt_len[dialogue_ids[b][0]]
            
            loss += loss2
            loss /= outputs.size()[0]
            avg_loss += loss

            loss.backward()            
            optimizer.step()
            
        avg_loss /= len(train_data_loader)
        
        print("Average Loss = ",avg_loss)
        if epoch%10==0:
            f1_2_cls,v_loss = validate(model, data_iter_test, epoch)
        
        # if f1_2_cls[1] > max_f1_2:
        #     print(f"Saving model at epoch {epoch}")
        #     max_f1_2 = f1_2_cls[1]
        #     torch.save(model.state_dict(), "./best_model.pth")

    return model

def validate(model, test_data_loader,epoch):
    print("\n\n***VALIDATION ({})***\n\n".format(epoch))
    
    class_weights2 = torch.FloatTensor(weights2).to(device)
    criterion2 = nn.CrossEntropyLoss(weight=class_weights2,reduction='none').to(device)

    model.eval()

    with torch.no_grad():
      avg_loss = 0
      y_true2 = []
      y_pred2 = []

      for i_batch, sample_batched in tqdm(enumerate(test_data_loader)):
            dialogue_ids = sample_batched[0].tolist()           
            inputs = sample_batched[1].to(device)
            targets2 = sample_batched[3].to(device)
            
            # Creating the speaker_ids
            speaker_ids = []
            for d_ids_list in dialogue_ids:
              sp_id_list = [0] * len(d_ids_list)
              for ix, d_id in enumerate(d_ids_list):
                sp_id = global_speaker_info[d_id][0]
                sp_id_list[ix] = sp_id
              speaker_ids.append(sp_id_list)
                       
            _,outputs = model(inputs,dialogue_ids,speaker_ids,utt_len)
            
            loss = 0
            for b in range(outputs.size()[0]):
              loss2 = 0
              
              for s in range(utt_len_test[dialogue_ids[b][0]]):
                pred2 = outputs[b][s]
                pred_flip = torch.argmax(F.softmax(pred2.to(device),-1),-1)
                
                truth2 = targets2[b][s]

                y_pred2.append(pred_flip.item())
                y_true2.append(truth2.long().to(device).item())

                pred2_ = torch.unsqueeze(pred2,0)
                truth2_ = torch.unsqueeze(truth2,0)
                
                loss2 += criterion2(pred2_,truth2_)
              loss2 /= utt_len_test[dialogue_ids[b][0]]
            
            loss += loss2
            loss /= outputs.size()[0]
            avg_loss += loss

      avg_loss /= len(test_data_loader)

      class_report = classification_report(y_true2,y_pred2)
      conf_mat2 = confusion_matrix(y_true2,y_pred2)

      print(class_report)
      print("Confusion Matrix: \n",conf_mat2)
    
      f1 = f1_score(y_true2,y_pred2)
      return f1,avg_loss

nclass = 2
utt_emsize = 768
personality_size = 100
nhid = 768
nlayers = 6
nhead = 2
dropout = 0.2
count_speakers = len(speaker2idx)
model = EFR_TX(weight_matrix, utt2idx, nclass, personality_size + utt_emsize, count_speakers, personality_size, nhead, nhid, nlayers, device, dropout).to(device)

weights2 = [1.0, 2.5]
data_iter_train, data_iter_test = get_train_test_loader(batch_size)
model = train(model, data_iter_train, epochs = 300)



4000


  0%|          | 0/300 [00:00<?, ?it/s]

Parameter containing:
tensor([[ 0.0314,  0.0320,  0.0376,  ..., -0.0166,  0.0076, -0.0198],
        [-0.0256, -0.0315,  0.0331,  ...,  0.0132, -0.0072, -0.0407],
        [ 0.0372,  0.0107,  0.0310,  ...,  0.0129,  0.0166,  0.0234],
        ...,
        [-0.0360,  0.0230, -0.0051,  ...,  0.0309,  0.0294,  0.0178],
        [ 0.0352, -0.0325,  0.0056,  ...,  0.0242,  0.0087, -0.0280],
        [-0.0259,  0.0272, -0.0300,  ...,  0.0019, -0.0361, -0.0124]],
       device='cuda:0', requires_grad=True)


-------Epoch 1-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0225], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (0)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0345,  0.0288,  0.0400,  ..., -0.0134,  0.0091, -0.0167],
        [-0.0294, -0.0327,  0.0347,  ...,  0.0131, -0.0079, -0.0413],
        [ 0.0376,  0.0137,  0.0273,  ...,  0.0121,  0.0156,  0.0203],
        ...,
        [-0.0389,  0.0262, -0.0028,  ...,  0.0286,  0.0259,  0.0160],
        [ 0.0333, -0.0304,  0.0027,  ...,  0.0224,  0.0049, -0.0255],
        [-0.0292,  0.0294, -0.0336,  ..., -0.0017, -0.0394, -0.0158]],
       device='cuda:0', requires_grad=True)


-------Epoch 2-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0074], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0360,  0.0272,  0.0404,  ..., -0.0117,  0.0099, -0.0154],
        [-0.0318, -0.0323,  0.0344,  ...,  0.0116, -0.0070, -0.0434],
        [ 0.0365,  0.0160,  0.0243,  ...,  0.0126,  0.0167,  0.0185],
        ...,
        [-0.0401,  0.0279, -0.0019,  ...,  0.0278,  0.0239,  0.0158],
        [ 0.0327, -0.0298,  0.0016,  ...,  0.0222,  0.0027, -0.0246],
        [-0.0312,  0.0303, -0.0357,  ..., -0.0038, -0.0413, -0.0178]],
       device='cuda:0', requires_grad=True)


-------Epoch 3-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0055], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0369,  0.0263,  0.0406,  ..., -0.0107,  0.0104, -0.0147],
        [-0.0333, -0.0321,  0.0343,  ...,  0.0107, -0.0064, -0.0447],
        [ 0.0359,  0.0175,  0.0226,  ...,  0.0129,  0.0174,  0.0174],
        ...,
        [-0.0408,  0.0290, -0.0013,  ...,  0.0272,  0.0226,  0.0157],
        [ 0.0324, -0.0294,  0.0010,  ...,  0.0220,  0.0013, -0.0240],
        [-0.0324,  0.0308, -0.0370,  ..., -0.0051, -0.0424, -0.0190]],
       device='cuda:0', requires_grad=True)


-------Epoch 4-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0030], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0375,  0.0256,  0.0408,  ..., -0.0100,  0.0107, -0.0142],
        [-0.0343, -0.0319,  0.0342,  ...,  0.0101, -0.0061, -0.0456],
        [ 0.0355,  0.0184,  0.0214,  ...,  0.0131,  0.0178,  0.0167],
        ...,
        [-0.0413,  0.0297, -0.0009,  ...,  0.0269,  0.0218,  0.0157],
        [ 0.0322, -0.0292,  0.0005,  ...,  0.0219,  0.0004, -0.0237],
        [-0.0332,  0.0312, -0.0379,  ..., -0.0059, -0.0431, -0.0198]],
       device='cuda:0', requires_grad=True)


-------Epoch 5-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0379,  0.0252,  0.0409,  ..., -0.0096,  0.0109, -0.0138],
        [-0.0349, -0.0318,  0.0341,  ...,  0.0097, -0.0058, -0.0462],
        [ 0.0352,  0.0190,  0.0206,  ...,  0.0132,  0.0181,  0.0162],
        ...,
        [-0.0416,  0.0301, -0.0007,  ...,  0.0267,  0.0212,  0.0156],
        [ 0.0320, -0.0290,  0.0002,  ...,  0.0219, -0.0002, -0.0234],
        [-0.0338,  0.0314, -0.0385,  ..., -0.0065, -0.0436, -0.0203]],
       device='cuda:0', requires_grad=True)


-------Epoch 6-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8231e-02,  2.4896e-02,  4.1001e-02,  ..., -9.2883e-03,
          1.1062e-02, -1.3583e-02],
        [-3.5359e-02, -3.1750e-02,  3.4067e-02,  ...,  9.4561e-03,
         -5.6238e-03, -4.6548e-02],
        [ 3.5021e-02,  1.9462e-02,  2.0046e-02,  ...,  1.3282e-02,
          1.8327e-02,  1.5917e-02],
        ...,
        [-4.1864e-02,  3.0460e-02, -4.7785e-04,  ...,  2.6545e-02,
          2.0810e-02,  1.5570e-02],
        [ 3.1949e-02, -2.8915e-02,  2.6380e-05,  ...,  2.1831e-02,
         -6.0484e-04, -2.3257e-02],
        [-3.4125e-02,  3.1602e-02, -3.8853e-02,  ..., -6.8542e-03,
         -4.3953e-02, -2.0704e-02]], device='cuda:0', requires_grad=True)


-------Epoch 7-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0384,  0.0247,  0.0411,  ..., -0.0091,  0.0112, -0.0134],
        [-0.0357, -0.0317,  0.0340,  ...,  0.0093, -0.0055, -0.0468],
        [ 0.0349,  0.0198,  0.0197,  ...,  0.0133,  0.0185,  0.0157],
        ...,
        [-0.0420,  0.0307, -0.0004,  ...,  0.0264,  0.0205,  0.0155],
        [ 0.0319, -0.0288, -0.0001,  ...,  0.0218, -0.0009, -0.0231],
        [-0.0344,  0.0317, -0.0391,  ..., -0.0071, -0.0442, -0.0210]],
       device='cuda:0', requires_grad=True)


-------Epoch 8-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0386,  0.0246,  0.0411,  ..., -0.0089,  0.0112, -0.0133],
        [-0.0359, -0.0317,  0.0340,  ...,  0.0091, -0.0054, -0.0470],
        [ 0.0348,  0.0200,  0.0194,  ...,  0.0134,  0.0186,  0.0155],
        ...,
        [-0.0421,  0.0308, -0.0003,  ...,  0.0264,  0.0204,  0.0155],
        [ 0.0318, -0.0288, -0.0002,  ...,  0.0218, -0.0011, -0.0231],
        [-0.0346,  0.0318, -0.0393,  ..., -0.0073, -0.0443, -0.0211]],
       device='cuda:0', requires_grad=True)


-------Epoch 9-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0387,  0.0245,  0.0411,  ..., -0.0088,  0.0113, -0.0132],
        [-0.0360, -0.0316,  0.0340,  ...,  0.0091, -0.0054, -0.0471],
        [ 0.0347,  0.0201,  0.0192,  ...,  0.0134,  0.0186,  0.0154],
        ...,
        [-0.0422,  0.0309, -0.0002,  ...,  0.0263,  0.0202,  0.0155],
        [ 0.0318, -0.0287, -0.0003,  ...,  0.0218, -0.0012, -0.0230],
        [-0.0347,  0.0318, -0.0394,  ..., -0.0074, -0.0445, -0.0212]],
       device='cuda:0', requires_grad=True)


-------Epoch 10-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0026], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0387,  0.0244,  0.0411,  ..., -0.0088,  0.0113, -0.0132],
        [-0.0361, -0.0316,  0.0340,  ...,  0.0090, -0.0053, -0.0472],
        [ 0.0347,  0.0202,  0.0191,  ...,  0.0134,  0.0187,  0.0154],
        ...,
        [-0.0423,  0.0310, -0.0002,  ...,  0.0263,  0.0202,  0.0155],
        [ 0.0318, -0.0287, -0.0003,  ...,  0.0218, -0.0013, -0.0230],
        [-0.0348,  0.0319, -0.0395,  ..., -0.0075, -0.0445, -0.0213]],
       device='cuda:0', requires_grad=True)


-------Epoch 11-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0019], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (10)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0388,  0.0243,  0.0411,  ..., -0.0087,  0.0113, -0.0131],
        [-0.0362, -0.0316,  0.0340,  ...,  0.0090, -0.0053, -0.0473],
        [ 0.0347,  0.0203,  0.0190,  ...,  0.0134,  0.0187,  0.0153],
        ...,
        [-0.0423,  0.0311, -0.0001,  ...,  0.0263,  0.0201,  0.0155],
        [ 0.0318, -0.0287, -0.0004,  ...,  0.0217, -0.0014, -0.0229],
        [-0.0348,  0.0319, -0.0396,  ..., -0.0076, -0.0446, -0.0214]],
       device='cuda:0', requires_grad=True)


-------Epoch 12-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0018], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0388,  0.0243,  0.0412,  ..., -0.0087,  0.0114, -0.0131],
        [-0.0362, -0.0316,  0.0340,  ...,  0.0089, -0.0053, -0.0473],
        [ 0.0346,  0.0203,  0.0190,  ...,  0.0134,  0.0187,  0.0153],
        ...,
        [-0.0423,  0.0311, -0.0001,  ...,  0.0262,  0.0201,  0.0155],
        [ 0.0318, -0.0287, -0.0004,  ...,  0.0217, -0.0014, -0.0229],
        [-0.0349,  0.0319, -0.0396,  ..., -0.0076, -0.0446, -0.0214]],
       device='cuda:0', requires_grad=True)


-------Epoch 13-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0026], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0388,  0.0243,  0.0412,  ..., -0.0087,  0.0114, -0.0131],
        [-0.0363, -0.0316,  0.0340,  ...,  0.0089, -0.0053, -0.0473],
        [ 0.0346,  0.0203,  0.0189,  ...,  0.0135,  0.0187,  0.0152],
        ...,
        [-0.0423,  0.0311, -0.0001,  ...,  0.0262,  0.0200,  0.0155],
        [ 0.0317, -0.0287, -0.0004,  ...,  0.0217, -0.0015, -0.0229],
        [-0.0349,  0.0319, -0.0397,  ..., -0.0076, -0.0446, -0.0214]],
       device='cuda:0', requires_grad=True)


-------Epoch 14-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0022], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0388,  0.0243,  0.0412,  ..., -0.0086,  0.0114, -0.0131],
        [-0.0363, -0.0316,  0.0340,  ...,  0.0089, -0.0053, -0.0474],
        [ 0.0346,  0.0204,  0.0189,  ...,  0.0135,  0.0188,  0.0152],
        ...,
        [-0.0423,  0.0311, -0.0001,  ...,  0.0262,  0.0200,  0.0155],
        [ 0.0317, -0.0287, -0.0004,  ...,  0.0217, -0.0015, -0.0229],
        [-0.0349,  0.0319, -0.0397,  ..., -0.0077, -0.0447, -0.0215]],
       device='cuda:0', requires_grad=True)


-------Epoch 15-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0017], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0388,  0.0243,  0.0412,  ..., -0.0086,  0.0114, -0.0131],
        [-0.0363, -0.0316,  0.0340,  ...,  0.0089, -0.0053, -0.0474],
        [ 0.0346,  0.0204,  0.0189,  ...,  0.0135,  0.0188,  0.0152],
        ...,
        [-0.0423,  0.0311, -0.0001,  ...,  0.0262,  0.0200,  0.0155],
        [ 0.0317, -0.0287, -0.0004,  ...,  0.0217, -0.0015, -0.0229],
        [-0.0349,  0.0320, -0.0397,  ..., -0.0077, -0.0447, -0.0215]],
       device='cuda:0', requires_grad=True)


-------Epoch 16-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0023], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8839e-02,  2.4264e-02,  4.1170e-02,  ..., -8.6297e-03,
          1.1382e-02, -1.3080e-02],
        [-3.6317e-02, -3.1602e-02,  3.3967e-02,  ...,  8.8775e-03,
         -5.2505e-03, -4.7383e-02],
        [ 3.4609e-02,  2.0393e-02,  1.8880e-02,  ...,  1.3464e-02,
          1.8766e-02,  1.5207e-02],
        ...,
        [-4.2349e-02,  3.1157e-02, -9.6059e-05,  ...,  2.6218e-02,
          1.9985e-02,  1.5498e-02],
        [ 3.1737e-02, -2.8670e-02, -4.0897e-04,  ...,  2.1736e-02,
         -1.4960e-03, -2.2893e-02],
        [-3.4915e-02,  3.1958e-02, -3.9704e-02,  ..., -7.6808e-03,
         -4.4674e-02, -2.1488e-02]], device='cuda:0', requires_grad=True)


-------Epoch 17-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0019], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8843e-02,  2.4260e-02,  4.1171e-02,  ..., -8.6251e-03,
          1.1384e-02, -1.3076e-02],
        [-3.6324e-02, -3.1601e-02,  3.3966e-02,  ...,  8.8734e-03,
         -5.2479e-03, -4.7389e-02],
        [ 3.4606e-02,  2.0399e-02,  1.8872e-02,  ...,  1.3465e-02,
          1.8769e-02,  1.5202e-02],
        ...,
        [-4.2353e-02,  3.1162e-02, -9.3344e-05,  ...,  2.6216e-02,
          1.9979e-02,  1.5497e-02],
        [ 3.1736e-02, -2.8668e-02, -4.1205e-04,  ...,  2.1735e-02,
         -1.5023e-03, -2.2890e-02],
        [-3.4921e-02,  3.1961e-02, -3.9710e-02,  ..., -7.6867e-03,
         -4.4679e-02, -2.1494e-02]], device='cuda:0', requires_grad=True)


-------Epoch 18-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8846e-02,  2.4257e-02,  4.1172e-02,  ..., -8.6219e-03,
          1.1386e-02, -1.3074e-02],
        [-3.6328e-02, -3.1600e-02,  3.3966e-02,  ...,  8.8707e-03,
         -5.2461e-03, -4.7393e-02],
        [ 3.4604e-02,  2.0404e-02,  1.8866e-02,  ...,  1.3466e-02,
          1.8771e-02,  1.5199e-02],
        ...,
        [-4.2355e-02,  3.1165e-02, -9.1499e-05,  ...,  2.6214e-02,
          1.9975e-02,  1.5497e-02],
        [ 3.1735e-02, -2.8667e-02, -4.1414e-04,  ...,  2.1735e-02,
         -1.5066e-03, -2.2889e-02],
        [-3.4924e-02,  3.1962e-02, -3.9715e-02,  ..., -7.6906e-03,
         -4.4683e-02, -2.1497e-02]], device='cuda:0', requires_grad=True)


-------Epoch 19-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0017], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8848e-02,  2.4255e-02,  4.1172e-02,  ..., -8.6198e-03,
          1.1387e-02, -1.3072e-02],
        [-3.6331e-02, -3.1600e-02,  3.3966e-02,  ...,  8.8688e-03,
         -5.2449e-03, -4.7396e-02],
        [ 3.4603e-02,  2.0407e-02,  1.8862e-02,  ...,  1.3467e-02,
          1.8773e-02,  1.5196e-02],
        ...,
        [-4.2357e-02,  3.1167e-02, -9.0252e-05,  ...,  2.6213e-02,
          1.9972e-02,  1.5497e-02],
        [ 3.1734e-02, -2.8666e-02, -4.1555e-04,  ...,  2.1734e-02,
         -1.5095e-03, -2.2888e-02],
        [-3.4927e-02,  3.1963e-02, -3.9717e-02,  ..., -7.6933e-03,
         -4.4685e-02, -2.1500e-02]], device='cuda:0', requires_grad=True)


-------Epoch 20-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8849e-02,  2.4253e-02,  4.1173e-02,  ..., -8.6183e-03,
          1.1387e-02, -1.3071e-02],
        [-3.6333e-02, -3.1600e-02,  3.3965e-02,  ...,  8.8675e-03,
         -5.2441e-03, -4.7397e-02],
        [ 3.4602e-02,  2.0409e-02,  1.8860e-02,  ...,  1.3468e-02,
          1.8773e-02,  1.5195e-02],
        ...,
        [-4.2358e-02,  3.1169e-02, -8.9401e-05,  ...,  2.6212e-02,
          1.9971e-02,  1.5497e-02],
        [ 3.1733e-02, -2.8666e-02, -4.1645e-04,  ...,  2.1734e-02,
         -1.5114e-03, -2.2887e-02],
        [-3.4929e-02,  3.1964e-02, -3.9719e-02,  ..., -7.6951e-03,
         -4.4687e-02, -2.1502e-02]], device='cuda:0', requires_grad=True)


-------Epoch 21-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (20)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 3.8850e-02,  2.4253e-02,  4.1173e-02,  ..., -8.6174e-03,
          1.1388e-02, -1.3070e-02],
        [-3.6335e-02, -3.1600e-02,  3.3965e-02,  ...,  8.8666e-03,
         -5.2436e-03, -4.7398e-02],
        [ 3.4602e-02,  2.0410e-02,  1.8858e-02,  ...,  1.3468e-02,
          1.8774e-02,  1.5194e-02],
        ...,
        [-4.2358e-02,  3.1170e-02, -8.8821e-05,  ...,  2.6212e-02,
          1.9969e-02,  1.5496e-02],
        [ 3.1733e-02, -2.8665e-02, -4.1707e-04,  ...,  2.1734e-02,
         -1.5127e-03, -2.2886e-02],
        [-3.4929e-02,  3.1965e-02, -3.9720e-02,  ..., -7.6963e-03,
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8851e-02,  2.4252e-02,  4.1173e-02,  ..., -8.6167e-03,
          1.1388e-02, -1.3070e-02],
        [-3.6336e-02, -3.1599e-02,  3.3965e-02,  ...,  8.8659e-03,
         -5.2432e-03, -4.7399e-02],
        [ 3.4601e-02,  2.0411e-02,  1.8857e-02,  ...,  1.3468e-02,
          1.8775e-02,  1.5193e-02],
        ...,
        [-4.2359e-02,  3.1170e-02, -8.8433e-05,  ...,  2.6211e-02,
          1.9969e-02,  1.5496e-02],
        [ 3.1733e-02, -2.8665e-02, -4.1748e-04,  ...,  2.1734e-02,
         -1.5136e-03, -2.2886e-02],
        [-3.4930e-02,  3.1965e-02, -3.9721e-02,  ..., -7.6971e-03,
         -4.4688e-02, -2.1503e-02]], device='cuda:0', requires_grad=True)


-------Epoch 23-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8851e-02,  2.4252e-02,  4.1173e-02,  ..., -8.6163e-03,
          1.1389e-02, -1.3070e-02],
        [-3.6337e-02, -3.1599e-02,  3.3965e-02,  ...,  8.8654e-03,
         -5.2429e-03, -4.7400e-02],
        [ 3.4601e-02,  2.0412e-02,  1.8855e-02,  ...,  1.3467e-02,
          1.8775e-02,  1.5192e-02],
        ...,
        [-4.2359e-02,  3.1171e-02, -8.8268e-05,  ...,  2.6211e-02,
          1.9968e-02,  1.5496e-02],
        [ 3.1733e-02, -2.8665e-02, -4.1751e-04,  ...,  2.1734e-02,
         -1.5141e-03, -2.2886e-02],
        [-3.4931e-02,  3.1965e-02, -3.9722e-02,  ..., -7.6977e-03,
         -4.4689e-02, -2.1504e-02]], device='cuda:0', requires_grad=True)


-------Epoch 24-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8851e-02,  2.4251e-02,  4.1173e-02,  ..., -8.6161e-03,
          1.1389e-02, -1.3070e-02],
        [-3.6337e-02, -3.1599e-02,  3.3965e-02,  ...,  8.8651e-03,
         -5.2426e-03, -4.7400e-02],
        [ 3.4600e-02,  2.0413e-02,  1.8853e-02,  ...,  1.3465e-02,
          1.8776e-02,  1.5192e-02],
        ...,
        [-4.2359e-02,  3.1171e-02, -8.8237e-05,  ...,  2.6211e-02,
          1.9968e-02,  1.5496e-02],
        [ 3.1733e-02, -2.8665e-02, -4.1734e-04,  ...,  2.1734e-02,
         -1.5145e-03, -2.2885e-02],
        [-3.4932e-02,  3.1965e-02, -3.9722e-02,  ..., -7.6980e-03,
         -4.4689e-02, -2.1504e-02]], device='cuda:0', requires_grad=True)


-------Epoch 25-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0018], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8851e-02,  2.4251e-02,  4.1173e-02,  ..., -8.6160e-03,
          1.1389e-02, -1.3070e-02],
        [-3.6338e-02, -3.1599e-02,  3.3965e-02,  ...,  8.8648e-03,
         -5.2424e-03, -4.7400e-02],
        [ 3.4600e-02,  2.0413e-02,  1.8852e-02,  ...,  1.3464e-02,
          1.8776e-02,  1.5192e-02],
        ...,
        [-4.2360e-02,  3.1171e-02, -8.8225e-05,  ...,  2.6211e-02,
          1.9967e-02,  1.5496e-02],
        [ 3.1733e-02, -2.8665e-02, -4.1715e-04,  ...,  2.1734e-02,
         -1.5147e-03, -2.2885e-02],
        [-3.4932e-02,  3.1965e-02, -3.9722e-02,  ..., -7.6982e-03,
         -4.4689e-02, -2.1504e-02]], device='cuda:0', requires_grad=True)


-------Epoch 26-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8851e-02,  2.4251e-02,  4.1173e-02,  ..., -8.6159e-03,
          1.1390e-02, -1.3069e-02],
        [-3.6338e-02, -3.1599e-02,  3.3965e-02,  ...,  8.8646e-03,
         -5.2422e-03, -4.7400e-02],
        [ 3.4600e-02,  2.0413e-02,  1.8851e-02,  ...,  1.3463e-02,
          1.8776e-02,  1.5192e-02],
        ...,
        [-4.2360e-02,  3.1172e-02, -8.8221e-05,  ...,  2.6211e-02,
          1.9967e-02,  1.5496e-02],
        [ 3.1733e-02, -2.8665e-02, -4.1703e-04,  ...,  2.1734e-02,
         -1.5148e-03, -2.2885e-02],
        [-3.4933e-02,  3.1966e-02, -3.9722e-02,  ..., -7.6983e-03,
         -4.4689e-02, -2.1505e-02]], device='cuda:0', requires_grad=True)


-------Epoch 27-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0020], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8851e-02,  2.4251e-02,  4.1172e-02,  ..., -8.6159e-03,
          1.1390e-02, -1.3069e-02],
        [-3.6338e-02, -3.1599e-02,  3.3965e-02,  ...,  8.8645e-03,
         -5.2421e-03, -4.7401e-02],
        [ 3.4599e-02,  2.0413e-02,  1.8850e-02,  ...,  1.3462e-02,
          1.8777e-02,  1.5191e-02],
        ...,
        [-4.2360e-02,  3.1172e-02, -8.8217e-05,  ...,  2.6211e-02,
          1.9967e-02,  1.5496e-02],
        [ 3.1733e-02, -2.8664e-02, -4.1698e-04,  ...,  2.1734e-02,
         -1.5150e-03, -2.2885e-02],
        [-3.4933e-02,  3.1966e-02, -3.9722e-02,  ..., -7.6984e-03,
         -4.4689e-02, -2.1505e-02]], device='cuda:0', requires_grad=True)


-------Epoch 28-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0020], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8852e-02,  2.4251e-02,  4.1172e-02,  ..., -8.6158e-03,
          1.1390e-02, -1.3069e-02],
        [-3.6338e-02, -3.1599e-02,  3.3965e-02,  ...,  8.8644e-03,
         -5.2421e-03, -4.7401e-02],
        [ 3.4599e-02,  2.0413e-02,  1.8850e-02,  ...,  1.3462e-02,
          1.8777e-02,  1.5191e-02],
        ...,
        [-4.2360e-02,  3.1172e-02, -8.8108e-05,  ...,  2.6211e-02,
          1.9967e-02,  1.5496e-02],
        [ 3.1733e-02, -2.8664e-02, -4.1676e-04,  ...,  2.1734e-02,
         -1.5150e-03, -2.2885e-02],
        [-3.4933e-02,  3.1966e-02, -3.9723e-02,  ..., -7.6985e-03,
         -4.4690e-02, -2.1505e-02]], device='cuda:0', requires_grad=True)


-------Epoch 29-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0022], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8851e-02,  2.4251e-02,  4.1172e-02,  ..., -8.6157e-03,
          1.1390e-02, -1.3070e-02],
        [-3.6338e-02, -3.1599e-02,  3.3965e-02,  ...,  8.8643e-03,
         -5.2421e-03, -4.7401e-02],
        [ 3.4598e-02,  2.0413e-02,  1.8847e-02,  ...,  1.3463e-02,
          1.8777e-02,  1.5191e-02],
        ...,
        [-4.2360e-02,  3.1172e-02, -8.7932e-05,  ...,  2.6211e-02,
          1.9967e-02,  1.5496e-02],
        [ 3.1733e-02, -2.8664e-02, -4.1639e-04,  ...,  2.1734e-02,
         -1.5150e-03, -2.2885e-02],
        [-3.4934e-02,  3.1966e-02, -3.9723e-02,  ..., -7.6985e-03,
         -4.4690e-02, -2.1505e-02]], device='cuda:0', requires_grad=True)


-------Epoch 30-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0017], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8851e-02,  2.4251e-02,  4.1172e-02,  ..., -8.6156e-03,
          1.1390e-02, -1.3070e-02],
        [-3.6338e-02, -3.1599e-02,  3.3965e-02,  ...,  8.8641e-03,
         -5.2421e-03, -4.7401e-02],
        [ 3.4598e-02,  2.0414e-02,  1.8845e-02,  ...,  1.3465e-02,
          1.8777e-02,  1.5191e-02],
        ...,
        [-4.2360e-02,  3.1172e-02, -8.7688e-05,  ...,  2.6211e-02,
          1.9967e-02,  1.5496e-02],
        [ 3.1732e-02, -2.8664e-02, -4.1701e-04,  ...,  2.1734e-02,
         -1.5151e-03, -2.2885e-02],
        [-3.4933e-02,  3.1965e-02, -3.9723e-02,  ..., -7.6985e-03,
         -4.4690e-02, -2.1505e-02]], device='cuda:0', requires_grad=True)


-------Epoch 31-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0026], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (30)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 3.8852e-02,  2.4251e-02,  4.1171e-02,  ..., -8.6152e-03,
          1.1390e-02, -1.3069e-02],
        [-3.6337e-02, -3.1599e-02,  3.3965e-02,  ...,  8.8639e-03,
         -5.2422e-03, -4.7402e-02],
        [ 3.4599e-02,  2.0415e-02,  1.8842e-02,  ...,  1.3470e-02,
          1.8777e-02,  1.5192e-02],
        ...,
        [-4.2359e-02,  3.1172e-02, -8.6560e-05,  ...,  2.6211e-02,
          1.9967e-02,  1.5496e-02],
        [ 3.1733e-02, -2.8664e-02, -4.1490e-04,  ...,  2.1734e-02,
         -1.5154e-03, -2.2884e-02],
        [-3.4934e-02,  3.1965e-02, -3.9724e-02,  ..., -7.6986e-03,
         

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0021], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8854e-02,  2.4249e-02,  4.1181e-02,  ..., -8.6056e-03,
          1.1384e-02, -1.3061e-02],
        [-3.6341e-02, -3.1599e-02,  3.3961e-02,  ...,  8.8619e-03,
         -5.2396e-03, -4.7420e-02],
        [ 3.4594e-02,  2.0402e-02,  1.8864e-02,  ...,  1.3534e-02,
          1.8776e-02,  1.5194e-02],
        ...,
        [-4.2358e-02,  3.1170e-02, -8.7479e-05,  ...,  2.6210e-02,
          1.9968e-02,  1.5497e-02],
        [ 3.1730e-02, -2.8657e-02, -4.0517e-04,  ...,  2.1737e-02,
         -1.5148e-03, -2.2882e-02],
        [-3.4979e-02,  3.1963e-02, -3.9735e-02,  ..., -7.6991e-03,
         -4.4686e-02, -2.1501e-02]], device='cuda:0', requires_grad=True)


-------Epoch 33-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0019], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8854e-02,  2.4250e-02,  4.1192e-02,  ..., -8.5940e-03,
          1.1382e-02, -1.3059e-02],
        [-3.6345e-02, -3.1601e-02,  3.3967e-02,  ...,  8.8630e-03,
         -5.2430e-03, -4.7455e-02],
        [ 3.4589e-02,  2.0388e-02,  1.8907e-02,  ...,  1.3591e-02,
          1.8773e-02,  1.5190e-02],
        ...,
        [-4.2356e-02,  3.1168e-02, -7.9454e-05,  ...,  2.6211e-02,
          1.9974e-02,  1.5496e-02],
        [ 3.1729e-02, -2.8650e-02, -3.8365e-04,  ...,  2.1738e-02,
         -1.5074e-03, -2.2885e-02],
        [-3.5014e-02,  3.1962e-02, -3.9757e-02,  ..., -7.7014e-03,
         -4.4713e-02, -2.1496e-02]], device='cuda:0', requires_grad=True)


-------Epoch 34-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0024], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8857e-02,  2.4224e-02,  4.1184e-02,  ..., -8.5738e-03,
          1.1423e-02, -1.3096e-02],
        [-3.6356e-02, -3.1587e-02,  3.3978e-02,  ...,  8.8541e-03,
         -5.2606e-03, -4.7422e-02],
        [ 3.4593e-02,  2.0341e-02,  1.8929e-02,  ...,  1.3684e-02,
          1.8792e-02,  1.5160e-02],
        ...,
        [-4.2366e-02,  3.1165e-02, -5.7579e-05,  ...,  2.6214e-02,
          1.9994e-02,  1.5496e-02],
        [ 3.1689e-02, -2.8635e-02, -4.1429e-04,  ...,  2.1744e-02,
         -1.4840e-03, -2.2917e-02],
        [-3.4958e-02,  3.1969e-02, -3.9814e-02,  ..., -7.7108e-03,
         -4.4797e-02, -2.1493e-02]], device='cuda:0', requires_grad=True)


-------Epoch 35-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0016], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8869e-02,  2.4138e-02,  4.1169e-02,  ..., -8.5233e-03,
          1.1530e-02, -1.3188e-02],
        [-3.6391e-02, -3.1536e-02,  3.3987e-02,  ...,  8.8109e-03,
         -5.3074e-03, -4.7267e-02],
        [ 3.4607e-02,  2.0222e-02,  1.8878e-02,  ...,  1.3807e-02,
          1.8844e-02,  1.5090e-02],
        ...,
        [-4.2382e-02,  3.1160e-02, -4.3349e-05,  ...,  2.6217e-02,
          2.0009e-02,  1.5500e-02],
        [ 3.1617e-02, -2.8623e-02, -5.2098e-04,  ...,  2.1750e-02,
         -1.4659e-03, -2.2963e-02],
        [-3.4860e-02,  3.1983e-02, -3.9863e-02,  ..., -7.7201e-03,
         -4.4833e-02, -2.1496e-02]], device='cuda:0', requires_grad=True)


-------Epoch 36-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0016], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8863e-02,  2.4128e-02,  4.1202e-02,  ..., -8.5868e-03,
          1.1624e-02, -1.3313e-02],
        [-3.6396e-02, -3.1532e-02,  3.3975e-02,  ...,  8.8267e-03,
         -5.3411e-03, -4.7136e-02],
        [ 3.4579e-02,  2.0253e-02,  1.8910e-02,  ...,  1.3007e-02,
          1.8910e-02,  1.4959e-02],
        ...,
        [-4.2295e-02,  3.1137e-02, -2.6537e-05,  ...,  2.6218e-02,
          2.0059e-02,  1.5508e-02],
        [ 3.1788e-02, -2.8627e-02, -6.1510e-04,  ...,  2.1777e-02,
         -1.4207e-03, -2.3061e-02],
        [-3.5081e-02,  3.1979e-02, -3.9984e-02,  ..., -7.7048e-03,
         -4.4938e-02, -2.1482e-02]], device='cuda:0', requires_grad=True)


-------Epoch 37-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0016], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0381,  0.0243,  0.0387,  ..., -0.0087,  0.0113, -0.0134],
        [-0.0340, -0.0315,  0.0358,  ...,  0.0088, -0.0049, -0.0476],
        [ 0.0361,  0.0204,  0.0223,  ...,  0.0116,  0.0193,  0.0146],
        ...,
        [-0.0417,  0.0313, -0.0005,  ...,  0.0264,  0.0200,  0.0159],
        [ 0.0332, -0.0282,  0.0010,  ...,  0.0223, -0.0015, -0.0225],
        [-0.0379,  0.0317, -0.0414,  ..., -0.0076, -0.0450, -0.0214]],
       device='cuda:0', requires_grad=True)


-------Epoch 38-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0375,  0.0244,  0.0370,  ..., -0.0088,  0.0111, -0.0135],
        [-0.0325, -0.0316,  0.0370,  ...,  0.0088, -0.0046, -0.0479],
        [ 0.0372,  0.0204,  0.0245,  ...,  0.0105,  0.0196,  0.0143],
        ...,
        [-0.0413,  0.0314, -0.0007,  ...,  0.0265,  0.0200,  0.0161],
        [ 0.0343, -0.0277,  0.0022,  ...,  0.0226, -0.0015, -0.0223],
        [-0.0403,  0.0315, -0.0424,  ..., -0.0074, -0.0452, -0.0213]],
       device='cuda:0', requires_grad=True)


-------Epoch 39-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0372,  0.0244,  0.0359,  ..., -0.0089,  0.0108, -0.0135],
        [-0.0314, -0.0316,  0.0378,  ...,  0.0089, -0.0043, -0.0478],
        [ 0.0380,  0.0205,  0.0260,  ...,  0.0098,  0.0199,  0.0141],
        ...,
        [-0.0409,  0.0315, -0.0008,  ...,  0.0266,  0.0200,  0.0163],
        [ 0.0351, -0.0274,  0.0032,  ...,  0.0229, -0.0015, -0.0220],
        [-0.0419,  0.0314, -0.0431,  ..., -0.0073, -0.0454, -0.0213]],
       device='cuda:0', requires_grad=True)


-------Epoch 40-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0370,  0.0245,  0.0352,  ..., -0.0089,  0.0107, -0.0135],
        [-0.0308, -0.0317,  0.0383,  ...,  0.0091, -0.0041, -0.0474],
        [ 0.0384,  0.0206,  0.0270,  ...,  0.0090,  0.0200,  0.0140],
        ...,
        [-0.0407,  0.0315, -0.0007,  ...,  0.0267,  0.0200,  0.0164],
        [ 0.0356, -0.0272,  0.0044,  ...,  0.0231, -0.0014, -0.0216],
        [-0.0432,  0.0314, -0.0438,  ..., -0.0072, -0.0454, -0.0212]],
       device='cuda:0', requires_grad=True)


-------Epoch 41-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (40)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0369,  0.0241,  0.0373,  ..., -0.0082,  0.0081, -0.0157],
        [-0.0306, -0.0313,  0.0381,  ...,  0.0093, -0.0034, -0.0439],
        [ 0.0387,  0.0202,  0.0306,  ...,  0.0107,  0.0182,  0.0121],
        ...,
        [-0.0384,  0.0288, -0.0033,  ...,  0.0262,  0.0194,  0.0142],
        [ 0.0384, -0.0238,  0.0077,  ...,  0.0216,  0.0003, -0.0184],
        [-0.0470,  0.0348, -0.0472,  ..., -0.0061, -0.0429, -0.0237]],
       device='cuda:0', requires_grad=True)


-------Epoch 42-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0376,  0.0234,  0.0405,  ..., -0.0067,  0.0047, -0.0149],
        [-0.0315, -0.0300,  0.0370,  ...,  0.0077, -0.0013, -0.0451],
        [ 0.0384,  0.0190,  0.0352,  ...,  0.0144,  0.0157,  0.0106],
        ...,
        [-0.0371,  0.0262, -0.0068,  ...,  0.0254,  0.0190,  0.0123],
        [ 0.0424, -0.0216,  0.0121,  ...,  0.0206,  0.0010, -0.0142],
        [-0.0507,  0.0376, -0.0508,  ..., -0.0051, -0.0385, -0.0270]],
       device='cuda:0', requires_grad=True)


-------Epoch 43-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0382,  0.0233,  0.0432,  ..., -0.0059,  0.0024, -0.0141],
        [-0.0324, -0.0291,  0.0363,  ...,  0.0065,  0.0002, -0.0460],
        [ 0.0383,  0.0186,  0.0385,  ...,  0.0165,  0.0141,  0.0097],
        ...,
        [-0.0360,  0.0245, -0.0091,  ...,  0.0249,  0.0185,  0.0112],
        [ 0.0456, -0.0203,  0.0148,  ...,  0.0192,  0.0004, -0.0105],
        [-0.0533,  0.0397, -0.0528,  ..., -0.0042, -0.0348, -0.0299]],
       device='cuda:0', requires_grad=True)


-------Epoch 44-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.8820e-02,  2.2929e-02,  4.5084e-02,  ..., -5.1118e-03,
          7.6726e-04, -1.3432e-02],
        [-3.3494e-02, -2.8150e-02,  3.5720e-02,  ...,  5.5419e-03,
          1.1918e-03, -4.6667e-02],
        [ 3.8000e-02,  1.8470e-02,  4.0597e-02,  ...,  1.7490e-02,
          1.2995e-02,  9.1028e-03],
        ...,
        [-3.5703e-02,  2.3289e-02, -1.0761e-02,  ...,  2.4482e-02,
          1.8096e-02,  1.0514e-02],
        [ 4.7963e-02, -1.9562e-02,  1.7682e-02,  ...,  1.7859e-02,
          5.1540e-05, -7.8330e-03],
        [-5.4915e-02,  4.1143e-02, -5.4978e-02,  ..., -3.6172e-03,
         -3.2298e-02, -3.1885e-02]], device='cuda:0', requires_grad=True)


-------Epoch 45-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0392,  0.0229,  0.0463,  ..., -0.0044, -0.0010, -0.0125],
        [-0.0341, -0.0276,  0.0354,  ...,  0.0048,  0.0020, -0.0474],
        [ 0.0376,  0.0186,  0.0420,  ...,  0.0179,  0.0120,  0.0090],
        ...,
        [-0.0361,  0.0224, -0.0122,  ...,  0.0241,  0.0178,  0.0100],
        [ 0.0495, -0.0197,  0.0219,  ...,  0.0165, -0.0002, -0.0055],
        [-0.0560,  0.0424, -0.0585,  ..., -0.0028, -0.0306, -0.0337]],
       device='cuda:0', requires_grad=True)


-------Epoch 46-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0395,  0.0229,  0.0471,  ..., -0.0039, -0.0023, -0.0118],
        [-0.0345, -0.0273,  0.0351,  ...,  0.0043,  0.0026, -0.0479],
        [ 0.0373,  0.0188,  0.0430,  ...,  0.0181,  0.0112,  0.0090],
        ...,
        [-0.0362,  0.0218, -0.0125,  ...,  0.0238,  0.0172,  0.0093],
        [ 0.0503, -0.0195,  0.0238,  ...,  0.0156, -0.0002, -0.0043],
        [-0.0566,  0.0432, -0.0602,  ..., -0.0022, -0.0299, -0.0348]],
       device='cuda:0', requires_grad=True)


-------Epoch 47-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0397,  0.0230,  0.0477,  ..., -0.0035, -0.0032, -0.0114],
        [-0.0348, -0.0270,  0.0350,  ...,  0.0040,  0.0030, -0.0482],
        [ 0.0371,  0.0189,  0.0436,  ...,  0.0183,  0.0107,  0.0090],
        ...,
        [-0.0363,  0.0215, -0.0127,  ...,  0.0237,  0.0167,  0.0088],
        [ 0.0510, -0.0194,  0.0251,  ...,  0.0150, -0.0002, -0.0036],
        [-0.0571,  0.0436, -0.0614,  ..., -0.0018, -0.0294, -0.0356]],
       device='cuda:0', requires_grad=True)


-------Epoch 48-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0398,  0.0230,  0.0480,  ..., -0.0034, -0.0038, -0.0111],
        [-0.0349, -0.0269,  0.0349,  ...,  0.0040,  0.0033, -0.0484],
        [ 0.0369,  0.0189,  0.0441,  ...,  0.0184,  0.0103,  0.0090],
        ...,
        [-0.0359,  0.0212, -0.0115,  ...,  0.0234,  0.0157,  0.0090],
        [ 0.0524, -0.0193,  0.0270,  ...,  0.0141, -0.0007, -0.0024],
        [-0.0563,  0.0439, -0.0601,  ..., -0.0023, -0.0310, -0.0350]],
       device='cuda:0', requires_grad=True)


-------Epoch 49-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0386,  0.0243,  0.0493,  ..., -0.0047, -0.0059, -0.0122],
        [-0.0340, -0.0281,  0.0339,  ...,  0.0053,  0.0056, -0.0472],
        [ 0.0355,  0.0203,  0.0454,  ...,  0.0171,  0.0085,  0.0077],
        ...,
        [-0.0368,  0.0208, -0.0121,  ...,  0.0223,  0.0146,  0.0080],
        [ 0.0537, -0.0192,  0.0287,  ...,  0.0141, -0.0010, -0.0014],
        [-0.0563,  0.0439, -0.0603,  ..., -0.0034, -0.0322, -0.0351]],
       device='cuda:0', requires_grad=True)


-------Epoch 50-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 3.4515e-02,  2.8354e-02,  5.3384e-02,  ..., -8.8421e-03,
         -1.0132e-02, -1.6242e-02],
        [-2.9888e-02, -3.2272e-02,  2.9899e-02,  ...,  9.5127e-03,
          9.8566e-03, -4.3173e-02],
        [ 3.1342e-02,  2.4434e-02,  4.9637e-02,  ...,  1.3016e-02,
          4.3564e-03,  3.5362e-03],
        ...,
        [-4.0041e-02,  2.0069e-02, -1.6036e-02,  ...,  1.9340e-02,
          1.2422e-02,  4.7378e-03],
        [ 5.5508e-02, -1.9000e-02,  3.1228e-02,  ...,  1.5500e-02,
         -1.0959e-03, -4.5078e-05],
        [-5.7716e-02,  4.3380e-02, -6.3140e-02,  ..., -6.0961e-03,
         -3.3867e-02, -3.6441e-02]], device='cuda:0', requires_grad=True)


-------Epoch 51-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (50)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0318,  0.0314,  0.0562,  ..., -0.0115, -0.0130, -0.0187],
        [-0.0273, -0.0349,  0.0275,  ...,  0.0125,  0.0126, -0.0406],
        [ 0.0285,  0.0268,  0.0524,  ...,  0.0103,  0.0016,  0.0006],
        ...,
        [-0.0412,  0.0196, -0.0181,  ...,  0.0170,  0.0098,  0.0022],
        [ 0.0581, -0.0197,  0.0344,  ...,  0.0161,  0.0004,  0.0002],
        [-0.0588,  0.0433, -0.0653,  ..., -0.0083, -0.0375, -0.0371]],
       device='cuda:0', requires_grad=True)


-------Epoch 52-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0020], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0300,  0.0337,  0.0583,  ..., -0.0131, -0.0149, -0.0203],
        [-0.0255, -0.0367,  0.0259,  ...,  0.0145,  0.0145, -0.0388],
        [ 0.0267,  0.0281,  0.0540,  ...,  0.0084, -0.0001, -0.0015],
        ...,
        [-0.0430,  0.0204, -0.0193,  ...,  0.0153,  0.0100, -0.0006],
        [ 0.0626, -0.0219,  0.0375,  ...,  0.0151,  0.0010,  0.0001],
        [-0.0617,  0.0451, -0.0672,  ..., -0.0091, -0.0378, -0.0384]],
       device='cuda:0', requires_grad=True)


-------Epoch 53-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0288,  0.0353,  0.0596,  ..., -0.0142, -0.0161, -0.0213],
        [-0.0243, -0.0378,  0.0249,  ...,  0.0158,  0.0157, -0.0377],
        [ 0.0255,  0.0289,  0.0551,  ...,  0.0071, -0.0013, -0.0029],
        ...,
        [-0.0457,  0.0218, -0.0202,  ...,  0.0143,  0.0110, -0.0030],
        [ 0.0678, -0.0245,  0.0387,  ...,  0.0140,  0.0012,  0.0003],
        [-0.0656,  0.0477, -0.0683,  ..., -0.0092, -0.0368, -0.0401]],
       device='cuda:0', requires_grad=True)


-------Epoch 54-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0281,  0.0363,  0.0605,  ..., -0.0149, -0.0169, -0.0220],
        [-0.0236, -0.0386,  0.0242,  ...,  0.0166,  0.0165, -0.0369],
        [ 0.0246,  0.0295,  0.0558,  ...,  0.0063, -0.0021, -0.0038],
        ...,
        [-0.0474,  0.0227, -0.0208,  ...,  0.0137,  0.0117, -0.0047],
        [ 0.0713, -0.0262,  0.0394,  ...,  0.0133,  0.0013,  0.0005],
        [-0.0681,  0.0493, -0.0691,  ..., -0.0092, -0.0361, -0.0413]],
       device='cuda:0', requires_grad=True)


-------Epoch 55-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0275,  0.0369,  0.0611,  ..., -0.0153, -0.0175, -0.0225],
        [-0.0228, -0.0389,  0.0238,  ...,  0.0171,  0.0170, -0.0364],
        [ 0.0240,  0.0299,  0.0563,  ...,  0.0057, -0.0026, -0.0044],
        ...,
        [-0.0486,  0.0233, -0.0212,  ...,  0.0133,  0.0122, -0.0058],
        [ 0.0736, -0.0273,  0.0400,  ...,  0.0128,  0.0014,  0.0005],
        [-0.0698,  0.0505, -0.0696,  ..., -0.0092, -0.0356, -0.0420]],
       device='cuda:0', requires_grad=True)


-------Epoch 56-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0246,  0.0346,  0.0615,  ..., -0.0181, -0.0151, -0.0228],
        [-0.0236, -0.0405,  0.0235,  ...,  0.0164,  0.0188, -0.0361],
        [ 0.0212,  0.0274,  0.0566,  ...,  0.0031, -0.0002, -0.0048],
        ...,
        [-0.0494,  0.0237, -0.0215,  ...,  0.0131,  0.0125, -0.0066],
        [ 0.0750, -0.0279,  0.0401,  ...,  0.0125,  0.0015,  0.0004],
        [-0.0710,  0.0512, -0.0699,  ..., -0.0092, -0.0353, -0.0426]],
       device='cuda:0', requires_grad=True)


-------Epoch 57-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0016], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0207,  0.0307,  0.0617,  ..., -0.0219, -0.0112, -0.0230],
        [-0.0250, -0.0426,  0.0233,  ...,  0.0151,  0.0213, -0.0359],
        [ 0.0174,  0.0237,  0.0568,  ..., -0.0006,  0.0036, -0.0051],
        ...,
        [-0.0499,  0.0268, -0.0199,  ...,  0.0151,  0.0126, -0.0089],
        [ 0.0760, -0.0240,  0.0410,  ...,  0.0158,  0.0014, -0.0025],
        [-0.0717,  0.0532, -0.0678,  ..., -0.0065, -0.0352, -0.0437]],
       device='cuda:0', requires_grad=True)


-------Epoch 58-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0181,  0.0282,  0.0619,  ..., -0.0245, -0.0086, -0.0231],
        [-0.0260, -0.0441,  0.0232,  ...,  0.0142,  0.0230, -0.0357],
        [ 0.0148,  0.0212,  0.0570,  ..., -0.0031,  0.0061, -0.0053],
        ...,
        [-0.0502,  0.0288, -0.0188,  ...,  0.0165,  0.0126, -0.0104],
        [ 0.0768, -0.0215,  0.0417,  ...,  0.0178,  0.0013, -0.0045],
        [-0.0722,  0.0544, -0.0664,  ..., -0.0047, -0.0351, -0.0444]],
       device='cuda:0', requires_grad=True)


-------Epoch 59-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0164,  0.0265,  0.0619,  ..., -0.0262, -0.0069, -0.0232],
        [-0.0267, -0.0450,  0.0231,  ...,  0.0136,  0.0241, -0.0356],
        [ 0.0131,  0.0196,  0.0569,  ..., -0.0047,  0.0078, -0.0054],
        ...,
        [-0.0504,  0.0319, -0.0176,  ...,  0.0173,  0.0126, -0.0126],
        [ 0.0775, -0.0182,  0.0438,  ...,  0.0190,  0.0016, -0.0068],
        [-0.0725,  0.0561, -0.0653,  ..., -0.0037, -0.0351, -0.0454]],
       device='cuda:0', requires_grad=True)


-------Epoch 60-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0152,  0.0254,  0.0617,  ..., -0.0273, -0.0058, -0.0233],
        [-0.0271, -0.0456,  0.0231,  ...,  0.0133,  0.0248, -0.0355],
        [ 0.0120,  0.0185,  0.0568,  ..., -0.0058,  0.0089, -0.0055],
        ...,
        [-0.0502,  0.0352, -0.0162,  ...,  0.0178,  0.0127, -0.0150],
        [ 0.0786, -0.0149,  0.0466,  ...,  0.0197,  0.0021, -0.0093],
        [-0.0726,  0.0580, -0.0645,  ..., -0.0030, -0.0349, -0.0465]],
       device='cuda:0', requires_grad=True)


-------Epoch 61-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (60)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0145,  0.0237,  0.0623,  ..., -0.0281, -0.0051, -0.0222],
        [-0.0274, -0.0449,  0.0228,  ...,  0.0131,  0.0253, -0.0362],
        [ 0.0112,  0.0167,  0.0574,  ..., -0.0065,  0.0096, -0.0043],
        ...,
        [-0.0512,  0.0383, -0.0169,  ...,  0.0167,  0.0144, -0.0177],
        [ 0.0787, -0.0125,  0.0469,  ...,  0.0191,  0.0012, -0.0121],
        [-0.0709,  0.0573, -0.0625,  ..., -0.0039, -0.0362, -0.0486]],
       device='cuda:0', requires_grad=True)


-------Epoch 62-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0140,  0.0204,  0.0646,  ..., -0.0286, -0.0047, -0.0183],
        [-0.0276, -0.0416,  0.0219,  ...,  0.0129,  0.0258, -0.0385],
        [ 0.0108,  0.0131,  0.0596,  ..., -0.0070,  0.0099, -0.0005],
        ...,
        [-0.0558,  0.0430, -0.0214,  ...,  0.0121,  0.0189, -0.0223],
        [ 0.0741, -0.0094,  0.0425,  ...,  0.0145, -0.0034, -0.0166],
        [-0.0663,  0.0528, -0.0579,  ..., -0.0085, -0.0408, -0.0532]],
       device='cuda:0', requires_grad=True)


-------Epoch 63-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0136,  0.0181,  0.0661,  ..., -0.0289, -0.0044, -0.0157],
        [-0.0277, -0.0394,  0.0213,  ...,  0.0128,  0.0262, -0.0400],
        [ 0.0104,  0.0106,  0.0611,  ..., -0.0073,  0.0102,  0.0021],
        ...,
        [-0.0588,  0.0460, -0.0244,  ...,  0.0091,  0.0219, -0.0254],
        [ 0.0711, -0.0074,  0.0396,  ...,  0.0115, -0.0064, -0.0197],
        [-0.0633,  0.0497, -0.0549,  ..., -0.0115, -0.0438, -0.0562]],
       device='cuda:0', requires_grad=True)


-------Epoch 64-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0134,  0.0166,  0.0671,  ..., -0.0292, -0.0043, -0.0139],
        [-0.0278, -0.0379,  0.0209,  ...,  0.0128,  0.0264, -0.0410],
        [ 0.0102,  0.0090,  0.0621,  ..., -0.0075,  0.0103,  0.0038],
        ...,
        [-0.0608,  0.0480, -0.0264,  ...,  0.0070,  0.0239, -0.0274],
        [ 0.0691, -0.0061,  0.0376,  ...,  0.0094, -0.0085, -0.0217],
        [-0.0613,  0.0477, -0.0528,  ..., -0.0135, -0.0458, -0.0582]],
       device='cuda:0', requires_grad=True)


-------Epoch 65-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0136,  0.0156,  0.0670,  ..., -0.0293, -0.0042, -0.0123],
        [-0.0277, -0.0370,  0.0213,  ...,  0.0127,  0.0261, -0.0421],
        [ 0.0097,  0.0080,  0.0614,  ..., -0.0076,  0.0108,  0.0052],
        ...,
        [-0.0621,  0.0494, -0.0277,  ...,  0.0057,  0.0251, -0.0287],
        [ 0.0678, -0.0053,  0.0364,  ...,  0.0081, -0.0098, -0.0230],
        [-0.0599,  0.0464, -0.0515,  ..., -0.0149, -0.0471, -0.0596]],
       device='cuda:0', requires_grad=True)


-------Epoch 66-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0017], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0139,  0.0144,  0.0659,  ..., -0.0297, -0.0043, -0.0112],
        [-0.0271, -0.0379,  0.0204,  ...,  0.0115,  0.0249, -0.0431],
        [ 0.0098,  0.0061,  0.0592,  ..., -0.0089,  0.0103,  0.0059],
        ...,
        [-0.0629,  0.0503, -0.0285,  ...,  0.0049,  0.0258, -0.0296],
        [ 0.0670, -0.0039,  0.0355,  ...,  0.0072, -0.0107, -0.0239],
        [-0.0590,  0.0455, -0.0506,  ..., -0.0157, -0.0480, -0.0604]],
       device='cuda:0', requires_grad=True)


-------Epoch 67-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0017], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0143,  0.0125,  0.0630,  ..., -0.0307, -0.0048, -0.0107],
        [-0.0256, -0.0419,  0.0167,  ...,  0.0080,  0.0218, -0.0444],
        [ 0.0110,  0.0017,  0.0545,  ..., -0.0126,  0.0079,  0.0054],
        ...,
        [-0.0635,  0.0511, -0.0291,  ...,  0.0043,  0.0267, -0.0302],
        [ 0.0663, -0.0013,  0.0347,  ...,  0.0067, -0.0113, -0.0245],
        [-0.0584,  0.0449, -0.0501,  ..., -0.0163, -0.0485, -0.0611]],
       device='cuda:0', requires_grad=True)


-------Epoch 68-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0146,  0.0112,  0.0611,  ..., -0.0314, -0.0051, -0.0103],
        [-0.0246, -0.0446,  0.0143,  ...,  0.0056,  0.0197, -0.0452],
        [ 0.0119, -0.0012,  0.0514,  ..., -0.0150,  0.0063,  0.0051],
        ...,
        [-0.0639,  0.0518, -0.0291,  ...,  0.0040,  0.0276, -0.0306],
        [ 0.0659,  0.0017,  0.0345,  ...,  0.0063, -0.0116, -0.0249],
        [-0.0580,  0.0444, -0.0498,  ..., -0.0168, -0.0489, -0.0615]],
       device='cuda:0', requires_grad=True)


-------Epoch 69-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0147,  0.0104,  0.0598,  ..., -0.0318, -0.0053, -0.0101],
        [-0.0239, -0.0463,  0.0127,  ...,  0.0041,  0.0183, -0.0458],
        [ 0.0124, -0.0031,  0.0493,  ..., -0.0167,  0.0052,  0.0048],
        ...,
        [-0.0641,  0.0523, -0.0291,  ...,  0.0038,  0.0282, -0.0309],
        [ 0.0656,  0.0035,  0.0344,  ...,  0.0060, -0.0117, -0.0251],
        [-0.0577,  0.0442, -0.0497,  ..., -0.0170, -0.0492, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 70-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0019], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0148,  0.0098,  0.0590,  ..., -0.0321, -0.0054, -0.0099],
        [-0.0234, -0.0475,  0.0116,  ...,  0.0031,  0.0174, -0.0462],
        [ 0.0128, -0.0043,  0.0479,  ..., -0.0178,  0.0045,  0.0047],
        ...,
        [-0.0643,  0.0526, -0.0291,  ...,  0.0036,  0.0286, -0.0311],
        [ 0.0654,  0.0046,  0.0344,  ...,  0.0059, -0.0118, -0.0253],
        [-0.0576,  0.0440, -0.0496,  ..., -0.0172, -0.0494, -0.0619]],
       device='cuda:0', requires_grad=True)


-------Epoch 71-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (70)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0149,  0.0095,  0.0585,  ..., -0.0323, -0.0055, -0.0098],
        [-0.0231, -0.0483,  0.0109,  ...,  0.0024,  0.0168, -0.0464],
        [ 0.0130, -0.0052,  0.0470,  ..., -0.0185,  0.0041,  0.0046],
        ...,
        [-0.0644,  0.0528, -0.0290,  ...,  0.0035,  0.0288, -0.0312],
        [ 0.0653,  0.0055,  0.0344,  ...,  0.0058, -0.0119, -0.0254],
        [-0.0575,  0.0438, -0.0495,  ..., -0.0173, -0.0495, -0.0620]],
       device='cuda:0', requires_grad=True)


-------Epoch 72-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0092,  0.0581,  ..., -0.0324, -0.0055, -0.0097],
        [-0.0230, -0.0488,  0.0104,  ...,  0.0020,  0.0164, -0.0466],
        [ 0.0132, -0.0057,  0.0464,  ..., -0.0189,  0.0037,  0.0045],
        ...,
        [-0.0645,  0.0530, -0.0290,  ...,  0.0035,  0.0290, -0.0313],
        [ 0.0652,  0.0060,  0.0344,  ...,  0.0057, -0.0120, -0.0255],
        [-0.0574,  0.0438, -0.0495,  ..., -0.0174, -0.0496, -0.0621]],
       device='cuda:0', requires_grad=True)


-------Epoch 73-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0091,  0.0579,  ..., -0.0325, -0.0056, -0.0096],
        [-0.0228, -0.0491,  0.0101,  ...,  0.0017,  0.0162, -0.0467],
        [ 0.0133, -0.0061,  0.0460,  ..., -0.0193,  0.0035,  0.0044],
        ...,
        [-0.0645,  0.0531, -0.0288,  ...,  0.0034,  0.0294, -0.0313],
        [ 0.0650,  0.0060,  0.0341,  ...,  0.0056, -0.0121, -0.0255],
        [-0.0574,  0.0437, -0.0497,  ..., -0.0175, -0.0497, -0.0622]],
       device='cuda:0', requires_grad=True)


-------Epoch 74-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0090,  0.0578,  ..., -0.0326, -0.0056, -0.0095],
        [-0.0227, -0.0493,  0.0099,  ...,  0.0015,  0.0160, -0.0468],
        [ 0.0134, -0.0064,  0.0458,  ..., -0.0195,  0.0034,  0.0044],
        ...,
        [-0.0644,  0.0531, -0.0285,  ...,  0.0034,  0.0299, -0.0313],
        [ 0.0648,  0.0050,  0.0336,  ...,  0.0056, -0.0123, -0.0256],
        [-0.0575,  0.0437, -0.0500,  ..., -0.0176, -0.0499, -0.0622]],
       device='cuda:0', requires_grad=True)


-------Epoch 75-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0577,  ..., -0.0326, -0.0056, -0.0095],
        [-0.0226, -0.0495,  0.0098,  ...,  0.0014,  0.0159, -0.0469],
        [ 0.0135, -0.0066,  0.0456,  ..., -0.0196,  0.0033,  0.0044],
        ...,
        [-0.0642,  0.0531, -0.0281,  ...,  0.0034,  0.0299, -0.0314],
        [ 0.0648,  0.0029,  0.0334,  ...,  0.0056, -0.0124, -0.0256],
        [-0.0576,  0.0437, -0.0504,  ..., -0.0176, -0.0499, -0.0623]],
       device='cuda:0', requires_grad=True)


-------Epoch 76-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0016], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0088,  0.0576,  ..., -0.0327, -0.0056, -0.0095],
        [-0.0225, -0.0496,  0.0097,  ...,  0.0014,  0.0158, -0.0469],
        [ 0.0136, -0.0067,  0.0455,  ..., -0.0197,  0.0033,  0.0044],
        ...,
        [-0.0641,  0.0532, -0.0278,  ...,  0.0034,  0.0301, -0.0314],
        [ 0.0648,  0.0046,  0.0339,  ...,  0.0056, -0.0124, -0.0256],
        [-0.0577,  0.0436, -0.0511,  ..., -0.0177, -0.0502, -0.0621]],
       device='cuda:0', requires_grad=True)


-------Epoch 77-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0088,  0.0576,  ..., -0.0327, -0.0056, -0.0094],
        [-0.0224, -0.0496,  0.0096,  ...,  0.0013,  0.0158, -0.0470],
        [ 0.0136, -0.0068,  0.0454,  ..., -0.0197,  0.0032,  0.0044],
        ...,
        [-0.0640,  0.0531, -0.0276,  ...,  0.0034,  0.0306, -0.0314],
        [ 0.0648,  0.0047,  0.0340,  ...,  0.0056, -0.0123, -0.0256],
        [-0.0577,  0.0436, -0.0514,  ..., -0.0177, -0.0505, -0.0620]],
       device='cuda:0', requires_grad=True)


-------Epoch 78-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0088,  0.0576,  ..., -0.0327, -0.0056, -0.0094],
        [-0.0224, -0.0497,  0.0095,  ...,  0.0013,  0.0157, -0.0470],
        [ 0.0136, -0.0068,  0.0453,  ..., -0.0198,  0.0032,  0.0045],
        ...,
        [-0.0640,  0.0530, -0.0276,  ...,  0.0035,  0.0314, -0.0314],
        [ 0.0649,  0.0051,  0.0338,  ...,  0.0056, -0.0122, -0.0257],
        [-0.0578,  0.0435, -0.0515,  ..., -0.0178, -0.0507, -0.0619]],
       device='cuda:0', requires_grad=True)


-------Epoch 79-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0088,  0.0575,  ..., -0.0327, -0.0056, -0.0095],
        [-0.0224, -0.0497,  0.0095,  ...,  0.0012,  0.0157, -0.0469],
        [ 0.0137, -0.0069,  0.0453,  ..., -0.0198,  0.0032,  0.0044],
        ...,
        [-0.0639,  0.0529, -0.0276,  ...,  0.0035,  0.0319, -0.0314],
        [ 0.0650,  0.0062,  0.0336,  ...,  0.0056, -0.0123, -0.0257],
        [-0.0578,  0.0435, -0.0516,  ..., -0.0178, -0.0508, -0.0619]],
       device='cuda:0', requires_grad=True)


-------Epoch 80-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0088,  0.0575,  ..., -0.0326, -0.0056, -0.0099],
        [-0.0223, -0.0498,  0.0095,  ...,  0.0011,  0.0157, -0.0464],
        [ 0.0137, -0.0069,  0.0453,  ..., -0.0198,  0.0032,  0.0042],
        ...,
        [-0.0639,  0.0528, -0.0276,  ...,  0.0035,  0.0322, -0.0314],
        [ 0.0650,  0.0067,  0.0335,  ...,  0.0056, -0.0123, -0.0257],
        [-0.0579,  0.0435, -0.0516,  ..., -0.0178, -0.0509, -0.0618]],
       device='cuda:0', requires_grad=True)


-------Epoch 81-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (80)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0151,  0.0088,  0.0575,  ..., -0.0325, -0.0056, -0.0101],
        [-0.0223, -0.0499,  0.0095,  ...,  0.0010,  0.0157, -0.0461],
        [ 0.0137, -0.0069,  0.0453,  ..., -0.0198,  0.0032,  0.0041],
        ...,
        [-0.0638,  0.0527, -0.0276,  ...,  0.0035,  0.0327, -0.0314],
        [ 0.0651,  0.0068,  0.0334,  ...,  0.0055, -0.0123, -0.0257],
        [-0.0579,  0.0435, -0.0516,  ..., -0.0178, -0.0509, -0.0618]],
       device='cuda:0', requires_grad=True)


-------Epoch 82-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0575,  ..., -0.0325, -0.0056, -0.0103],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0459],
        [ 0.0137, -0.0069,  0.0453,  ..., -0.0198,  0.0032,  0.0040],
        ...,
        [-0.0638,  0.0526, -0.0276,  ...,  0.0035,  0.0330, -0.0314],
        [ 0.0651,  0.0070,  0.0333,  ...,  0.0055, -0.0123, -0.0257],
        [-0.0579,  0.0435, -0.0516,  ..., -0.0178, -0.0509, -0.0618]],
       device='cuda:0', requires_grad=True)


-------Epoch 83-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0575,  ..., -0.0325, -0.0056, -0.0104],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0458],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0032,  0.0040],
        ...,
        [-0.0638,  0.0526, -0.0276,  ...,  0.0035,  0.0332, -0.0314],
        [ 0.0651,  0.0072,  0.0333,  ...,  0.0055, -0.0123, -0.0257],
        [-0.0579,  0.0434, -0.0516,  ..., -0.0179, -0.0509, -0.0618]],
       device='cuda:0', requires_grad=True)


-------Epoch 84-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0575,  ..., -0.0325, -0.0056, -0.0104],
        [-0.0222, -0.0500,  0.0094,  ...,  0.0009,  0.0157, -0.0457],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0032,  0.0040],
        ...,
        [-0.0638,  0.0526, -0.0276,  ...,  0.0035,  0.0333, -0.0314],
        [ 0.0651,  0.0074,  0.0333,  ...,  0.0055, -0.0123, -0.0257],
        [-0.0579,  0.0434, -0.0516,  ..., -0.0179, -0.0509, -0.0618]],
       device='cuda:0', requires_grad=True)


-------Epoch 85-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0575,  ..., -0.0324, -0.0056, -0.0105],
        [-0.0222, -0.0500,  0.0095,  ...,  0.0009,  0.0157, -0.0456],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0032,  0.0039],
        ...,
        [-0.0638,  0.0526, -0.0276,  ...,  0.0035,  0.0334, -0.0314],
        [ 0.0652,  0.0055,  0.0327,  ...,  0.0056, -0.0122, -0.0257],
        [-0.0579,  0.0435, -0.0514,  ..., -0.0179, -0.0510, -0.0618]],
       device='cuda:0', requires_grad=True)


-------Epoch 86-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0575,  ..., -0.0324, -0.0056, -0.0105],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0456],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0197,  0.0032,  0.0039],
        ...,
        [-0.0638,  0.0526, -0.0275,  ...,  0.0035,  0.0333, -0.0314],
        [ 0.0653,  0.0028,  0.0320,  ...,  0.0056, -0.0121, -0.0257],
        [-0.0579,  0.0436, -0.0511,  ..., -0.0180, -0.0511, -0.0618]],
       device='cuda:0', requires_grad=True)


-------Epoch 87-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0575,  ..., -0.0324, -0.0056, -0.0106],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0456],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0197,  0.0032,  0.0039],
        ...,
        [-0.0638,  0.0526, -0.0275,  ...,  0.0035,  0.0331, -0.0314],
        [ 0.0656,  0.0005,  0.0312,  ...,  0.0056, -0.0120, -0.0257],
        [-0.0579,  0.0436, -0.0508,  ..., -0.0180, -0.0513, -0.0618]],
       device='cuda:0', requires_grad=True)


-------Epoch 88-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0008], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0575,  ..., -0.0323, -0.0056, -0.0106],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0456],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0197,  0.0032,  0.0038],
        ...,
        [-0.0638,  0.0526, -0.0275,  ...,  0.0035,  0.0330, -0.0314],
        [ 0.0657, -0.0009,  0.0307,  ...,  0.0056, -0.0119, -0.0257],
        [-0.0578,  0.0436, -0.0506,  ..., -0.0180, -0.0514, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 89-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0575,  ..., -0.0323, -0.0056, -0.0106],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0456],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0197,  0.0032,  0.0038],
        ...,
        [-0.0637,  0.0526, -0.0275,  ...,  0.0035,  0.0328, -0.0314],
        [ 0.0658, -0.0019,  0.0304,  ...,  0.0056, -0.0119, -0.0257],
        [-0.0578,  0.0435, -0.0505,  ..., -0.0180, -0.0515, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 90-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0574,  ..., -0.0323, -0.0056, -0.0106],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0456],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0197,  0.0032,  0.0038],
        ...,
        [-0.0637,  0.0526, -0.0276,  ...,  0.0035,  0.0327, -0.0314],
        [ 0.0659, -0.0024,  0.0302,  ...,  0.0056, -0.0119, -0.0257],
        [-0.0578,  0.0435, -0.0504,  ..., -0.0180, -0.0515, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 91-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0021], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (90)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0574,  ..., -0.0323, -0.0056, -0.0106],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0456],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0197,  0.0032,  0.0038],
        ...,
        [-0.0637,  0.0526, -0.0276,  ...,  0.0035,  0.0326, -0.0314],
        [ 0.0659, -0.0027,  0.0300,  ...,  0.0056, -0.0119, -0.0257],
        [-0.0578,  0.0435, -0.0503,  ..., -0.0180, -0.0515, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 92-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0016], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0574,  ..., -0.0323, -0.0056, -0.0106],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0197,  0.0031,  0.0038],
        ...,
        [-0.0638,  0.0526, -0.0276,  ...,  0.0035,  0.0325, -0.0314],
        [ 0.0659, -0.0029,  0.0299,  ...,  0.0056, -0.0119, -0.0257],
        [-0.0577,  0.0435, -0.0503,  ..., -0.0180, -0.0516, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 93-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0574,  ..., -0.0324, -0.0056, -0.0107],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0197,  0.0031,  0.0038],
        ...,
        [-0.0638,  0.0526, -0.0277,  ...,  0.0035,  0.0326, -0.0314],
        [ 0.0660, -0.0029,  0.0299,  ...,  0.0056, -0.0118, -0.0257],
        [-0.0578,  0.0435, -0.0503,  ..., -0.0180, -0.0516, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 94-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0573,  ..., -0.0324, -0.0056, -0.0107],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0157, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0197,  0.0031,  0.0038],
        ...,
        [-0.0637,  0.0527, -0.0277,  ...,  0.0035,  0.0328, -0.0314],
        [ 0.0661, -0.0027,  0.0300,  ...,  0.0056, -0.0118, -0.0257],
        [-0.0578,  0.0435, -0.0503,  ..., -0.0180, -0.0516, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 95-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0573,  ..., -0.0324, -0.0057, -0.0107],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0158, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0031,  0.0038],
        ...,
        [-0.0637,  0.0527, -0.0278,  ...,  0.0035,  0.0329, -0.0314],
        [ 0.0662, -0.0025,  0.0301,  ...,  0.0056, -0.0118, -0.0257],
        [-0.0578,  0.0435, -0.0503,  ..., -0.0180, -0.0516, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 96-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0017], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0573,  ..., -0.0324, -0.0057, -0.0107],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0158, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0031,  0.0037],
        ...,
        [-0.0637,  0.0527, -0.0278,  ...,  0.0035,  0.0328, -0.0314],
        [ 0.0663, -0.0029,  0.0301,  ...,  0.0056, -0.0117, -0.0257],
        [-0.0578,  0.0435, -0.0503,  ..., -0.0180, -0.0516, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 97-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0573,  ..., -0.0324, -0.0057, -0.0107],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0158, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0031,  0.0037],
        ...,
        [-0.0637,  0.0527, -0.0278,  ...,  0.0035,  0.0328, -0.0314],
        [ 0.0663, -0.0031,  0.0301,  ...,  0.0056, -0.0117, -0.0257],
        [-0.0578,  0.0435, -0.0503,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 98-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0574,  ..., -0.0324, -0.0057, -0.0107],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0158, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0031,  0.0037],
        ...,
        [-0.0637,  0.0527, -0.0278,  ...,  0.0035,  0.0328, -0.0314],
        [ 0.0663, -0.0032,  0.0301,  ...,  0.0056, -0.0117, -0.0257],
        [-0.0578,  0.0435, -0.0503,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 99-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0574,  ..., -0.0324, -0.0057, -0.0107],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0158, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0031,  0.0037],
        ...,
        [-0.0637,  0.0528, -0.0278,  ...,  0.0035,  0.0328, -0.0314],
        [ 0.0663, -0.0033,  0.0301,  ...,  0.0056, -0.0117, -0.0257],
        [-0.0578,  0.0435, -0.0503,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 100-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0574,  ..., -0.0324, -0.0057, -0.0107],
        [-0.0222, -0.0499,  0.0095,  ...,  0.0009,  0.0158, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0031,  0.0037],
        ...,
        [-0.0637,  0.0528, -0.0278,  ...,  0.0035,  0.0329, -0.0314],
        [ 0.0663, -0.0033,  0.0302,  ...,  0.0056, -0.0117, -0.0257],
        [-0.0578,  0.0435, -0.0503,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 101-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (100)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0574,  ..., -0.0324, -0.0057, -0.0107],
        [-0.0222, -0.0499,  0.0094,  ...,  0.0009,  0.0158, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0031,  0.0037],
        ...,
        [-0.0637,  0.0528, -0.0280,  ...,  0.0035,  0.0328, -0.0314],
        [ 0.0663, -0.0033,  0.0302,  ...,  0.0056, -0.0117, -0.0257],
        [-0.0578,  0.0435, -0.0503,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 102-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0574,  ..., -0.0324, -0.0057, -0.0107],
        [-0.0222, -0.0499,  0.0094,  ...,  0.0009,  0.0158, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0031,  0.0037],
        ...,
        [-0.0637,  0.0528, -0.0280,  ...,  0.0035,  0.0327, -0.0314],
        [ 0.0663, -0.0033,  0.0302,  ...,  0.0056, -0.0117, -0.0257],
        [-0.0578,  0.0435, -0.0504,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 103-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0574,  ..., -0.0324, -0.0057, -0.0107],
        [-0.0222, -0.0499,  0.0094,  ...,  0.0009,  0.0158, -0.0455],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0031,  0.0037],
        ...,
        [-0.0637,  0.0528, -0.0281,  ...,  0.0035,  0.0327, -0.0314],
        [ 0.0663, -0.0033,  0.0302,  ...,  0.0056, -0.0116, -0.0257],
        [-0.0578,  0.0435, -0.0504,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 104-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0573,  ..., -0.0324, -0.0057, -0.0108],
        [-0.0223, -0.0499,  0.0095,  ...,  0.0009,  0.0158, -0.0454],
        [ 0.0137, -0.0069,  0.0452,  ..., -0.0198,  0.0031,  0.0036],
        ...,
        [-0.0637,  0.0528, -0.0281,  ...,  0.0035,  0.0326, -0.0314],
        [ 0.0663, -0.0033,  0.0302,  ...,  0.0056, -0.0116, -0.0257],
        [-0.0578,  0.0435, -0.0504,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 105-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0573,  ..., -0.0324, -0.0057, -0.0108],
        [-0.0223, -0.0499,  0.0095,  ...,  0.0009,  0.0158, -0.0454],
        [ 0.0137, -0.0069,  0.0451,  ..., -0.0198,  0.0031,  0.0035],
        ...,
        [-0.0637,  0.0528, -0.0281,  ...,  0.0035,  0.0326, -0.0314],
        [ 0.0663, -0.0032,  0.0302,  ...,  0.0056, -0.0116, -0.0257],
        [-0.0578,  0.0436, -0.0504,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 106-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0573,  ..., -0.0324, -0.0057, -0.0108],
        [-0.0223, -0.0499,  0.0095,  ...,  0.0009,  0.0158, -0.0454],
        [ 0.0137, -0.0069,  0.0451,  ..., -0.0198,  0.0031,  0.0035],
        ...,
        [-0.0637,  0.0528, -0.0282,  ...,  0.0035,  0.0324, -0.0314],
        [ 0.0663, -0.0036,  0.0305,  ...,  0.0056, -0.0117, -0.0257],
        [-0.0578,  0.0435, -0.0504,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 107-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0573,  ..., -0.0324, -0.0057, -0.0108],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0009,  0.0158, -0.0454],
        [ 0.0138, -0.0069,  0.0451,  ..., -0.0198,  0.0031,  0.0034],
        ...,
        [-0.0637,  0.0528, -0.0283,  ...,  0.0035,  0.0320, -0.0314],
        [ 0.0663, -0.0040,  0.0309,  ...,  0.0056, -0.0118, -0.0257],
        [-0.0578,  0.0435, -0.0505,  ..., -0.0180, -0.0518, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 108-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0573,  ..., -0.0324, -0.0057, -0.0108],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0009,  0.0158, -0.0453],
        [ 0.0138, -0.0069,  0.0451,  ..., -0.0198,  0.0031,  0.0034],
        ...,
        [-0.0637,  0.0527, -0.0284,  ...,  0.0035,  0.0318, -0.0314],
        [ 0.0663, -0.0044,  0.0311,  ...,  0.0056, -0.0119, -0.0257],
        [-0.0578,  0.0435, -0.0505,  ..., -0.0180, -0.0518, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 109-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0573,  ..., -0.0325, -0.0056, -0.0109],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0453],
        [ 0.0138, -0.0070,  0.0451,  ..., -0.0197,  0.0029,  0.0038],
        ...,
        [-0.0642,  0.0553, -0.0268,  ...,  0.0033,  0.0313, -0.0312],
        [ 0.0667, -0.0093,  0.0301,  ...,  0.0056, -0.0119, -0.0258],
        [-0.0578,  0.0435, -0.0505,  ..., -0.0180, -0.0518, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 110-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0090,  0.0573,  ..., -0.0325, -0.0055, -0.0111],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0453],
        [ 0.0139, -0.0071,  0.0451,  ..., -0.0195,  0.0025,  0.0044],
        ...,
        [-0.0646,  0.0577, -0.0253,  ...,  0.0030,  0.0309, -0.0310],
        [ 0.0671, -0.0139,  0.0291,  ...,  0.0057, -0.0119, -0.0258],
        [-0.0578,  0.0435, -0.0505,  ..., -0.0180, -0.0518, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 111-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (110)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0150,  0.0090,  0.0573,  ..., -0.0326, -0.0055, -0.0113],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0453],
        [ 0.0140, -0.0072,  0.0450,  ..., -0.0194,  0.0023,  0.0049],
        ...,
        [-0.0649,  0.0592, -0.0243,  ...,  0.0029,  0.0306, -0.0309],
        [ 0.0674, -0.0169,  0.0285,  ...,  0.0057, -0.0120, -0.0259],
        [-0.0578,  0.0435, -0.0506,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 112-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0091,  0.0573,  ..., -0.0326, -0.0054, -0.0114],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0072,  0.0450,  ..., -0.0193,  0.0022,  0.0051],
        ...,
        [-0.0651,  0.0603, -0.0236,  ...,  0.0028,  0.0305, -0.0308],
        [ 0.0675, -0.0189,  0.0280,  ...,  0.0057, -0.0120, -0.0259],
        [-0.0578,  0.0434, -0.0506,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 113-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0016], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0091,  0.0573,  ..., -0.0326, -0.0054, -0.0114],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0072,  0.0450,  ..., -0.0192,  0.0021,  0.0053],
        ...,
        [-0.0652,  0.0610, -0.0232,  ...,  0.0027,  0.0303, -0.0307],
        [ 0.0676, -0.0202,  0.0277,  ...,  0.0057, -0.0120, -0.0259],
        [-0.0578,  0.0434, -0.0506,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 114-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0091,  0.0573,  ..., -0.0326, -0.0054, -0.0115],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0020,  0.0054],
        ...,
        [-0.0653,  0.0614, -0.0229,  ...,  0.0026,  0.0303, -0.0307],
        [ 0.0677, -0.0210,  0.0276,  ...,  0.0058, -0.0120, -0.0259],
        [-0.0578,  0.0434, -0.0506,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 115-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0091,  0.0573,  ..., -0.0326, -0.0054, -0.0115],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0020,  0.0055],
        ...,
        [-0.0654,  0.0617, -0.0227,  ...,  0.0026,  0.0302, -0.0307],
        [ 0.0677, -0.0216,  0.0275,  ...,  0.0058, -0.0120, -0.0259],
        [-0.0578,  0.0434, -0.0506,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 116-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0573,  ..., -0.0326, -0.0054, -0.0115],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0020,  0.0056],
        ...,
        [-0.0654,  0.0619, -0.0226,  ...,  0.0026,  0.0301, -0.0307],
        [ 0.0678, -0.0221,  0.0276,  ...,  0.0058, -0.0119, -0.0259],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 117-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0573,  ..., -0.0326, -0.0053, -0.0115],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0020,  0.0056],
        ...,
        [-0.0654,  0.0620, -0.0226,  ...,  0.0026,  0.0301, -0.0306],
        [ 0.0678, -0.0223,  0.0276,  ...,  0.0058, -0.0119, -0.0259],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0180, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 118-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0573,  ..., -0.0327, -0.0053, -0.0115],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0019,  0.0056],
        ...,
        [-0.0654,  0.0621, -0.0225,  ...,  0.0026,  0.0301, -0.0306],
        [ 0.0678, -0.0225,  0.0277,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 119-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0573,  ..., -0.0327, -0.0053, -0.0116],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0019,  0.0056],
        ...,
        [-0.0655,  0.0622, -0.0225,  ...,  0.0026,  0.0301, -0.0306],
        [ 0.0678, -0.0226,  0.0277,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 120-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0573,  ..., -0.0327, -0.0053, -0.0116],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0019,  0.0056],
        ...,
        [-0.0655,  0.0622, -0.0225,  ...,  0.0026,  0.0300, -0.0306],
        [ 0.0678, -0.0227,  0.0277,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 121-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (120)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0573,  ..., -0.0327, -0.0053, -0.0116],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0019,  0.0056],
        ...,
        [-0.0655,  0.0622, -0.0225,  ...,  0.0026,  0.0300, -0.0306],
        [ 0.0678, -0.0228,  0.0277,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 122-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0573,  ..., -0.0327, -0.0053, -0.0116],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0019,  0.0057],
        ...,
        [-0.0655,  0.0623, -0.0224,  ...,  0.0026,  0.0300, -0.0306],
        [ 0.0678, -0.0228,  0.0278,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 123-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0573,  ..., -0.0327, -0.0053, -0.0115],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0019,  0.0057],
        ...,
        [-0.0655,  0.0623, -0.0224,  ...,  0.0026,  0.0300, -0.0306],
        [ 0.0678, -0.0228,  0.0278,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 124-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0573,  ..., -0.0327, -0.0053, -0.0115],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0019,  0.0057],
        ...,
        [-0.0655,  0.0623, -0.0224,  ...,  0.0026,  0.0300, -0.0306],
        [ 0.0678, -0.0229,  0.0278,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 125-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0573,  ..., -0.0327, -0.0053, -0.0115],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0450,  ..., -0.0192,  0.0019,  0.0057],
        ...,
        [-0.0655,  0.0623, -0.0224,  ...,  0.0026,  0.0300, -0.0306],
        [ 0.0678, -0.0229,  0.0278,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 126-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0576,  ..., -0.0326, -0.0053, -0.0114],
        [-0.0223, -0.0499,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0451,  ..., -0.0192,  0.0020,  0.0058],
        ...,
        [-0.0655,  0.0623, -0.0225,  ...,  0.0026,  0.0299, -0.0306],
        [ 0.0678, -0.0229,  0.0278,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0518, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 127-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0578,  ..., -0.0326, -0.0053, -0.0113],
        [-0.0223, -0.0500,  0.0096,  ...,  0.0010,  0.0157, -0.0452],
        [ 0.0140, -0.0073,  0.0451,  ..., -0.0192,  0.0020,  0.0058],
        ...,
        [-0.0655,  0.0623, -0.0225,  ...,  0.0026,  0.0298, -0.0306],
        [ 0.0678, -0.0229,  0.0278,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0518, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 128-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0579,  ..., -0.0326, -0.0053, -0.0112],
        [-0.0223, -0.0500,  0.0097,  ...,  0.0011,  0.0157, -0.0453],
        [ 0.0140, -0.0073,  0.0451,  ..., -0.0192,  0.0020,  0.0059],
        ...,
        [-0.0656,  0.0623, -0.0225,  ...,  0.0026,  0.0297, -0.0306],
        [ 0.0678, -0.0229,  0.0278,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0518, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 129-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0580,  ..., -0.0326, -0.0052, -0.0112],
        [-0.0223, -0.0500,  0.0097,  ...,  0.0011,  0.0157, -0.0453],
        [ 0.0140, -0.0073,  0.0451,  ..., -0.0192,  0.0020,  0.0059],
        ...,
        [-0.0655,  0.0623, -0.0225,  ...,  0.0025,  0.0298, -0.0306],
        [ 0.0678, -0.0229,  0.0278,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0578,  0.0434, -0.0507,  ..., -0.0179, -0.0518, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 130-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0580,  ..., -0.0326, -0.0052, -0.0112],
        [-0.0223, -0.0500,  0.0097,  ...,  0.0011,  0.0157, -0.0454],
        [ 0.0140, -0.0073,  0.0452,  ..., -0.0192,  0.0020,  0.0059],
        ...,
        [-0.0655,  0.0623, -0.0229,  ...,  0.0025,  0.0299, -0.0306],
        [ 0.0677, -0.0227,  0.0273,  ...,  0.0058, -0.0119, -0.0260],
        [-0.0577,  0.0433, -0.0505,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 131-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (130)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0149,  0.0091,  0.0580,  ..., -0.0326, -0.0052, -0.0111],
        [-0.0223, -0.0500,  0.0097,  ...,  0.0011,  0.0157, -0.0454],
        [ 0.0140, -0.0073,  0.0452,  ..., -0.0192,  0.0020,  0.0059],
        ...,
        [-0.0655,  0.0623, -0.0233,  ...,  0.0025,  0.0298, -0.0306],
        [ 0.0677, -0.0224,  0.0266,  ...,  0.0057, -0.0119, -0.0260],
        [-0.0577,  0.0433, -0.0502,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 132-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0089,  0.0597,  ..., -0.0325, -0.0057, -0.0097],
        [-0.0222, -0.0502,  0.0102,  ...,  0.0012,  0.0150, -0.0444],
        [ 0.0141, -0.0074,  0.0456,  ..., -0.0191,  0.0016,  0.0069],
        ...,
        [-0.0655,  0.0623, -0.0237,  ...,  0.0025,  0.0297, -0.0306],
        [ 0.0676, -0.0221,  0.0261,  ...,  0.0057, -0.0120, -0.0260],
        [-0.0577,  0.0432, -0.0501,  ..., -0.0179, -0.0516, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 133-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0612,  ..., -0.0324, -0.0061, -0.0084],
        [-0.0221, -0.0503,  0.0107,  ...,  0.0014,  0.0144, -0.0434],
        [ 0.0142, -0.0075,  0.0460,  ..., -0.0190,  0.0013,  0.0079],
        ...,
        [-0.0655,  0.0623, -0.0239,  ...,  0.0024,  0.0297, -0.0306],
        [ 0.0676, -0.0220,  0.0258,  ...,  0.0057, -0.0120, -0.0260],
        [-0.0577,  0.0432, -0.0499,  ..., -0.0179, -0.0516, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 134-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0623,  ..., -0.0323, -0.0063, -0.0076],
        [-0.0219, -0.0508,  0.0107,  ...,  0.0015,  0.0135, -0.0428],
        [ 0.0142, -0.0075,  0.0462,  ..., -0.0190,  0.0010,  0.0086],
        ...,
        [-0.0655,  0.0623, -0.0240,  ...,  0.0024,  0.0295, -0.0306],
        [ 0.0675, -0.0214,  0.0259,  ...,  0.0057, -0.0119, -0.0260],
        [-0.0577,  0.0431, -0.0500,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 135-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0629,  ..., -0.0322, -0.0065, -0.0072],
        [-0.0218, -0.0511,  0.0108,  ...,  0.0015,  0.0130, -0.0423],
        [ 0.0142, -0.0076,  0.0464,  ..., -0.0189,  0.0009,  0.0091],
        ...,
        [-0.0655,  0.0623, -0.0241,  ...,  0.0024,  0.0294, -0.0306],
        [ 0.0675, -0.0210,  0.0259,  ...,  0.0057, -0.0118, -0.0260],
        [-0.0577,  0.0431, -0.0500,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 136-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0633,  ..., -0.0321, -0.0066, -0.0069],
        [-0.0217, -0.0513,  0.0108,  ...,  0.0016,  0.0126, -0.0420],
        [ 0.0142, -0.0076,  0.0466,  ..., -0.0189,  0.0008,  0.0094],
        ...,
        [-0.0655,  0.0623, -0.0241,  ...,  0.0024,  0.0294, -0.0306],
        [ 0.0675, -0.0207,  0.0259,  ...,  0.0057, -0.0117, -0.0260],
        [-0.0576,  0.0431, -0.0500,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 137-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0636,  ..., -0.0321, -0.0066, -0.0067],
        [-0.0217, -0.0515,  0.0108,  ...,  0.0016,  0.0123, -0.0417],
        [ 0.0143, -0.0076,  0.0466,  ..., -0.0189,  0.0007,  0.0096],
        ...,
        [-0.0655,  0.0623, -0.0242,  ...,  0.0024,  0.0293, -0.0306],
        [ 0.0674, -0.0205,  0.0260,  ...,  0.0057, -0.0117, -0.0260],
        [-0.0576,  0.0431, -0.0500,  ..., -0.0179, -0.0517, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 138-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0638,  ..., -0.0321, -0.0067, -0.0066],
        [-0.0216, -0.0516,  0.0108,  ...,  0.0016,  0.0122, -0.0416],
        [ 0.0143, -0.0076,  0.0467,  ..., -0.0189,  0.0007,  0.0098],
        ...,
        [-0.0655,  0.0623, -0.0242,  ...,  0.0024,  0.0293, -0.0306],
        [ 0.0674, -0.0204,  0.0260,  ...,  0.0057, -0.0117, -0.0260],
        [-0.0576,  0.0431, -0.0500,  ..., -0.0179, -0.0518, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 139-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0639,  ..., -0.0321, -0.0067, -0.0065],
        [-0.0216, -0.0516,  0.0108,  ...,  0.0016,  0.0121, -0.0415],
        [ 0.0143, -0.0076,  0.0467,  ..., -0.0189,  0.0006,  0.0099],
        ...,
        [-0.0655,  0.0623, -0.0242,  ...,  0.0024,  0.0293, -0.0306],
        [ 0.0673, -0.0202,  0.0260,  ...,  0.0057, -0.0116, -0.0260],
        [-0.0576,  0.0430, -0.0500,  ..., -0.0179, -0.0518, -0.0617]],
       device='cuda:0', requires_grad=True)


-------Epoch 140-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0640,  ..., -0.0321, -0.0067, -0.0064],
        [-0.0216, -0.0517,  0.0108,  ...,  0.0017,  0.0120, -0.0415],
        [ 0.0143, -0.0076,  0.0468,  ..., -0.0189,  0.0006,  0.0099],
        ...,
        [-0.0656,  0.0624, -0.0242,  ...,  0.0024,  0.0292, -0.0306],
        [ 0.0668, -0.0196,  0.0261,  ...,  0.0057, -0.0115, -0.0260],
        [-0.0575,  0.0430, -0.0500,  ..., -0.0179, -0.0519, -0.0616]],
       device='cuda:0', requires_grad=True)


-------Epoch 141-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (140)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0641,  ..., -0.0321, -0.0067, -0.0064],
        [-0.0216, -0.0517,  0.0108,  ...,  0.0017,  0.0119, -0.0414],
        [ 0.0143, -0.0076,  0.0468,  ..., -0.0189,  0.0006,  0.0100],
        ...,
        [-0.0656,  0.0625, -0.0241,  ...,  0.0024,  0.0292, -0.0306],
        [ 0.0665, -0.0195,  0.0265,  ...,  0.0057, -0.0115, -0.0261],
        [-0.0575,  0.0430, -0.0500,  ..., -0.0179, -0.0519, -0.0615]],
       device='cuda:0', requires_grad=True)


-------Epoch 142-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0641,  ..., -0.0321, -0.0067, -0.0064],
        [-0.0216, -0.0518,  0.0108,  ...,  0.0016,  0.0119, -0.0414],
        [ 0.0143, -0.0077,  0.0468,  ..., -0.0189,  0.0006,  0.0101],
        ...,
        [-0.0656,  0.0626, -0.0239,  ...,  0.0024,  0.0291, -0.0307],
        [ 0.0663, -0.0192,  0.0264,  ...,  0.0057, -0.0115, -0.0261],
        [-0.0574,  0.0430, -0.0499,  ..., -0.0179, -0.0519, -0.0614]],
       device='cuda:0', requires_grad=True)


-------Epoch 143-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0641,  ..., -0.0321, -0.0067, -0.0063],
        [-0.0216, -0.0518,  0.0108,  ...,  0.0016,  0.0119, -0.0414],
        [ 0.0143, -0.0077,  0.0467,  ..., -0.0189,  0.0006,  0.0101],
        ...,
        [-0.0656,  0.0626, -0.0238,  ...,  0.0024,  0.0290, -0.0307],
        [ 0.0662, -0.0190,  0.0262,  ...,  0.0057, -0.0115, -0.0262],
        [-0.0574,  0.0430, -0.0497,  ..., -0.0179, -0.0519, -0.0614]],
       device='cuda:0', requires_grad=True)


-------Epoch 144-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0321, -0.0067, -0.0063],
        [-0.0215, -0.0519,  0.0107,  ...,  0.0016,  0.0119, -0.0413],
        [ 0.0143, -0.0078,  0.0467,  ..., -0.0190,  0.0006,  0.0101],
        ...,
        [-0.0656,  0.0626, -0.0237,  ...,  0.0024,  0.0290, -0.0307],
        [ 0.0661, -0.0188,  0.0260,  ...,  0.0057, -0.0115, -0.0262],
        [-0.0574,  0.0430, -0.0496,  ..., -0.0180, -0.0519, -0.0614]],
       device='cuda:0', requires_grad=True)


-------Epoch 145-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0016], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0321, -0.0067, -0.0063],
        [-0.0215, -0.0519,  0.0107,  ...,  0.0015,  0.0119, -0.0413],
        [ 0.0143, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0656,  0.0626, -0.0236,  ...,  0.0024,  0.0290, -0.0307],
        [ 0.0660, -0.0186,  0.0259,  ...,  0.0057, -0.0115, -0.0262],
        [-0.0574,  0.0430, -0.0495,  ..., -0.0180, -0.0519, -0.0613]],
       device='cuda:0', requires_grad=True)


-------Epoch 146-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0321, -0.0067, -0.0063],
        [-0.0215, -0.0519,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0143, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0656,  0.0627, -0.0236,  ...,  0.0024,  0.0289, -0.0307],
        [ 0.0659, -0.0186,  0.0256,  ...,  0.0057, -0.0115, -0.0262],
        [-0.0573,  0.0430, -0.0494,  ..., -0.0180, -0.0519, -0.0613]],
       device='cuda:0', requires_grad=True)


-------Epoch 147-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0320, -0.0067, -0.0063],
        [-0.0215, -0.0519,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0143, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0662,  0.0647, -0.0204,  ...,  0.0023,  0.0308, -0.0322],
        [ 0.0635, -0.0147,  0.0294,  ...,  0.0057, -0.0104, -0.0280],
        [-0.0575,  0.0437, -0.0477,  ..., -0.0180, -0.0517, -0.0625]],
       device='cuda:0', requires_grad=True)


-------Epoch 148-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0320, -0.0067, -0.0063],
        [-0.0215, -0.0519,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0143, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0669,  0.0676, -0.0157,  ...,  0.0023,  0.0336, -0.0345],
        [ 0.0599, -0.0088,  0.0352,  ...,  0.0056, -0.0088, -0.0307],
        [-0.0577,  0.0448, -0.0452,  ..., -0.0180, -0.0514, -0.0643]],
       device='cuda:0', requires_grad=True)


-------Epoch 149-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0320, -0.0067, -0.0063],
        [-0.0215, -0.0520,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0143, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0674,  0.0695, -0.0127,  ...,  0.0022,  0.0355, -0.0359],
        [ 0.0576, -0.0050,  0.0389,  ...,  0.0056, -0.0078, -0.0325],
        [-0.0578,  0.0455, -0.0436,  ..., -0.0181, -0.0513, -0.0655]],
       device='cuda:0', requires_grad=True)


-------Epoch 150-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0320, -0.0067, -0.0063],
        [-0.0215, -0.0520,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0144, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0678,  0.0708, -0.0106,  ...,  0.0022,  0.0366, -0.0369],
        [ 0.0560, -0.0024,  0.0414,  ...,  0.0056, -0.0071, -0.0337],
        [-0.0579,  0.0459, -0.0425,  ..., -0.0181, -0.0511, -0.0662]],
       device='cuda:0', requires_grad=True)


-------Epoch 151-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (150)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0320, -0.0067, -0.0063],
        [-0.0215, -0.0520,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0144, -0.0079,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0680,  0.0717, -0.0093,  ...,  0.0022,  0.0374, -0.0376],
        [ 0.0550, -0.0008,  0.0431,  ...,  0.0055, -0.0067, -0.0344],
        [-0.0580,  0.0462, -0.0418,  ..., -0.0181, -0.0511, -0.0667]],
       device='cuda:0', requires_grad=True)


-------Epoch 152-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0320, -0.0067, -0.0063],
        [-0.0215, -0.0520,  0.0107,  ...,  0.0015,  0.0119, -0.0413],
        [ 0.0144, -0.0079,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0683,  0.0723, -0.0084,  ...,  0.0022,  0.0379, -0.0380],
        [ 0.0542,  0.0003,  0.0442,  ...,  0.0055, -0.0064, -0.0349],
        [-0.0581,  0.0465, -0.0413,  ..., -0.0181, -0.0510, -0.0671]],
       device='cuda:0', requires_grad=True)


-------Epoch 153-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0321, -0.0067, -0.0063],
        [-0.0215, -0.0520,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0144, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0685,  0.0727, -0.0078,  ...,  0.0022,  0.0382, -0.0383],
        [ 0.0537,  0.0009,  0.0449,  ...,  0.0055, -0.0062, -0.0353],
        [-0.0581,  0.0466, -0.0410,  ..., -0.0181, -0.0510, -0.0673]],
       device='cuda:0', requires_grad=True)


-------Epoch 154-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0320, -0.0067, -0.0063],
        [-0.0215, -0.0520,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0143, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0687,  0.0730, -0.0075,  ...,  0.0022,  0.0384, -0.0384],
        [ 0.0534,  0.0014,  0.0454,  ...,  0.0055, -0.0061, -0.0355],
        [-0.0581,  0.0467, -0.0408,  ..., -0.0181, -0.0510, -0.0674]],
       device='cuda:0', requires_grad=True)


-------Epoch 155-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0320, -0.0067, -0.0063],
        [-0.0215, -0.0520,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0143, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0688,  0.0731, -0.0072,  ...,  0.0022,  0.0387, -0.0386],
        [ 0.0532,  0.0017,  0.0457,  ...,  0.0055, -0.0060, -0.0356],
        [-0.0581,  0.0468, -0.0407,  ..., -0.0181, -0.0510, -0.0675]],
       device='cuda:0', requires_grad=True)


-------Epoch 156-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0321, -0.0067, -0.0063],
        [-0.0215, -0.0520,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0143, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0688,  0.0732, -0.0070,  ...,  0.0022,  0.0389, -0.0386],
        [ 0.0531,  0.0019,  0.0459,  ...,  0.0055, -0.0059, -0.0357],
        [-0.0581,  0.0468, -0.0406,  ..., -0.0181, -0.0510, -0.0676]],
       device='cuda:0', requires_grad=True)


-------Epoch 157-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0321, -0.0067, -0.0063],
        [-0.0215, -0.0520,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0143, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0688,  0.0733, -0.0069,  ...,  0.0021,  0.0390, -0.0387],
        [ 0.0530,  0.0020,  0.0461,  ...,  0.0055, -0.0059, -0.0358],
        [-0.0581,  0.0468, -0.0405,  ..., -0.0181, -0.0510, -0.0676]],
       device='cuda:0', requires_grad=True)


-------Epoch 158-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0642,  ..., -0.0321, -0.0067, -0.0063],
        [-0.0215, -0.0520,  0.0107,  ...,  0.0015,  0.0118, -0.0413],
        [ 0.0143, -0.0078,  0.0466,  ..., -0.0190,  0.0006,  0.0102],
        ...,
        [-0.0689,  0.0734, -0.0069,  ...,  0.0021,  0.0391, -0.0387],
        [ 0.0529,  0.0021,  0.0461,  ...,  0.0055, -0.0058, -0.0358],
        [-0.0581,  0.0468, -0.0405,  ..., -0.0181, -0.0510, -0.0676]],
       device='cuda:0', requires_grad=True)


-------Epoch 159-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0636,  ..., -0.0321, -0.0067, -0.0068],
        [-0.0215, -0.0520,  0.0105,  ...,  0.0015,  0.0118, -0.0414],
        [ 0.0143, -0.0078,  0.0469,  ..., -0.0190,  0.0006,  0.0108],
        ...,
        [-0.0689,  0.0734, -0.0069,  ...,  0.0021,  0.0393, -0.0388],
        [ 0.0528,  0.0022,  0.0462,  ...,  0.0055, -0.0058, -0.0358],
        [-0.0581,  0.0468, -0.0404,  ..., -0.0181, -0.0510, -0.0677]],
       device='cuda:0', requires_grad=True)


-------Epoch 160-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0628,  ..., -0.0322, -0.0067, -0.0074],
        [-0.0215, -0.0520,  0.0104,  ...,  0.0015,  0.0118, -0.0417],
        [ 0.0143, -0.0079,  0.0473,  ..., -0.0189,  0.0006,  0.0116],
        ...,
        [-0.0689,  0.0734, -0.0068,  ...,  0.0021,  0.0393, -0.0388],
        [ 0.0528,  0.0022,  0.0462,  ...,  0.0055, -0.0057, -0.0359],
        [-0.0581,  0.0468, -0.0404,  ..., -0.0181, -0.0510, -0.0677]],
       device='cuda:0', requires_grad=True)


-------Epoch 161-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (160)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0623,  ..., -0.0322, -0.0067, -0.0078],
        [-0.0215, -0.0520,  0.0102,  ...,  0.0015,  0.0118, -0.0419],
        [ 0.0143, -0.0079,  0.0475,  ..., -0.0189,  0.0006,  0.0121],
        ...,
        [-0.0689,  0.0734, -0.0068,  ...,  0.0021,  0.0394, -0.0388],
        [ 0.0528,  0.0023,  0.0462,  ...,  0.0055, -0.0057, -0.0359],
        [-0.0581,  0.0469, -0.0404,  ..., -0.0181, -0.0510, -0.0677]],
       device='cuda:0', requires_grad=True)


-------Epoch 162-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0619,  ..., -0.0322, -0.0067, -0.0080],
        [-0.0215, -0.0520,  0.0102,  ...,  0.0014,  0.0118, -0.0420],
        [ 0.0143, -0.0079,  0.0477,  ..., -0.0188,  0.0006,  0.0125],
        ...,
        [-0.0689,  0.0734, -0.0070,  ...,  0.0022,  0.0375, -0.0389],
        [ 0.0531,  0.0022,  0.0465,  ...,  0.0055, -0.0058, -0.0359],
        [-0.0582,  0.0469, -0.0411,  ..., -0.0181, -0.0512, -0.0678]],
       device='cuda:0', requires_grad=True)


-------Epoch 163-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0617,  ..., -0.0323, -0.0067, -0.0082],
        [-0.0215, -0.0520,  0.0101,  ...,  0.0014,  0.0118, -0.0421],
        [ 0.0143, -0.0079,  0.0478,  ..., -0.0188,  0.0006,  0.0127],
        ...,
        [-0.0690,  0.0733, -0.0072,  ...,  0.0022,  0.0361, -0.0389],
        [ 0.0532,  0.0022,  0.0467,  ...,  0.0055, -0.0058, -0.0359],
        [-0.0583,  0.0469, -0.0413,  ..., -0.0182, -0.0513, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 164-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0616,  ..., -0.0323, -0.0067, -0.0083],
        [-0.0215, -0.0520,  0.0101,  ...,  0.0014,  0.0118, -0.0421],
        [ 0.0143, -0.0079,  0.0479,  ..., -0.0188,  0.0006,  0.0128],
        ...,
        [-0.0690,  0.0733, -0.0075,  ...,  0.0022,  0.0351, -0.0389],
        [ 0.0532,  0.0022,  0.0468,  ...,  0.0056, -0.0058, -0.0359],
        [-0.0583,  0.0469, -0.0414,  ..., -0.0181, -0.0513, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 165-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0087,  0.0615,  ..., -0.0323, -0.0067, -0.0084],
        [-0.0215, -0.0520,  0.0101,  ...,  0.0014,  0.0118, -0.0421],
        [ 0.0143, -0.0079,  0.0479,  ..., -0.0188,  0.0006,  0.0129],
        ...,
        [-0.0691,  0.0733, -0.0076,  ...,  0.0023,  0.0344, -0.0390],
        [ 0.0532,  0.0022,  0.0469,  ...,  0.0056, -0.0059, -0.0358],
        [-0.0583,  0.0469, -0.0414,  ..., -0.0181, -0.0513, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 166-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0088,  0.0615,  ..., -0.0323, -0.0067, -0.0084],
        [-0.0215, -0.0520,  0.0101,  ...,  0.0014,  0.0118, -0.0422],
        [ 0.0143, -0.0079,  0.0478,  ..., -0.0188,  0.0006,  0.0129],
        ...,
        [-0.0691,  0.0734, -0.0077,  ...,  0.0023,  0.0340, -0.0390],
        [ 0.0532,  0.0021,  0.0469,  ...,  0.0056, -0.0059, -0.0358],
        [-0.0583,  0.0470, -0.0414,  ..., -0.0181, -0.0513, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 167-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0088,  0.0620,  ..., -0.0321, -0.0066, -0.0087],
        [-0.0215, -0.0520,  0.0096,  ...,  0.0010,  0.0114, -0.0418],
        [ 0.0143, -0.0079,  0.0477,  ..., -0.0188,  0.0006,  0.0129],
        ...,
        [-0.0691,  0.0734, -0.0078,  ...,  0.0023,  0.0337, -0.0390],
        [ 0.0534,  0.0021,  0.0471,  ...,  0.0056, -0.0057, -0.0358],
        [-0.0583,  0.0470, -0.0418,  ..., -0.0182, -0.0515, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 168-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151,  0.0088,  0.0623,  ..., -0.0320, -0.0065, -0.0089],
        [-0.0215, -0.0520,  0.0093,  ...,  0.0007,  0.0111, -0.0416],
        [ 0.0143, -0.0079,  0.0477,  ..., -0.0188,  0.0005,  0.0128],
        ...,
        [-0.0692,  0.0734, -0.0079,  ...,  0.0023,  0.0334, -0.0390],
        [ 0.0538,  0.0020,  0.0476,  ...,  0.0056, -0.0052, -0.0359],
        [-0.0585,  0.0471, -0.0429,  ..., -0.0182, -0.0520, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 169-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150,  0.0088,  0.0616,  ..., -0.0317, -0.0064, -0.0096],
        [-0.0237, -0.0516,  0.0070,  ...,  0.0026,  0.0131, -0.0437],
        [ 0.0165, -0.0082,  0.0499,  ..., -0.0209, -0.0015,  0.0150],
        ...,
        [-0.0692,  0.0734, -0.0079,  ...,  0.0023,  0.0332, -0.0390],
        [ 0.0540,  0.0020,  0.0479,  ...,  0.0056, -0.0049, -0.0359],
        [-0.0586,  0.0471, -0.0437,  ..., -0.0182, -0.0524, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 170-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0145,  0.0088,  0.0590,  ..., -0.0312, -0.0060, -0.0115],
        [-0.0304, -0.0505,  0.0002,  ...,  0.0091,  0.0196, -0.0505],
        [ 0.0231, -0.0090,  0.0566,  ..., -0.0272, -0.0077,  0.0218],
        ...,
        [-0.0692,  0.0734, -0.0079,  ...,  0.0023,  0.0331, -0.0390],
        [ 0.0542,  0.0020,  0.0481,  ...,  0.0056, -0.0046, -0.0359],
        [-0.0586,  0.0471, -0.0442,  ..., -0.0182, -0.0527, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 171-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (170)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0142,  0.0088,  0.0573,  ..., -0.0308, -0.0058, -0.0128],
        [-0.0348, -0.0497, -0.0043,  ...,  0.0133,  0.0239, -0.0550],
        [ 0.0274, -0.0096,  0.0611,  ..., -0.0313, -0.0119,  0.0263],
        ...,
        [-0.0692,  0.0734, -0.0080,  ...,  0.0023,  0.0330, -0.0390],
        [ 0.0543,  0.0020,  0.0482,  ...,  0.0056, -0.0045, -0.0359],
        [-0.0587,  0.0471, -0.0445,  ..., -0.0182, -0.0529, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 172-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0140,  0.0089,  0.0603,  ..., -0.0315, -0.0030, -0.0182],
        [-0.0377, -0.0492, -0.0075,  ...,  0.0163,  0.0259, -0.0575],
        [ 0.0303, -0.0100,  0.0642,  ..., -0.0342, -0.0140,  0.0290],
        ...,
        [-0.0692,  0.0734, -0.0080,  ...,  0.0023,  0.0329, -0.0390],
        [ 0.0544,  0.0020,  0.0483,  ...,  0.0056, -0.0044, -0.0359],
        [-0.0587,  0.0471, -0.0447,  ..., -0.0181, -0.0530, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 173-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0132,  0.0117,  0.0552,  ..., -0.0367, -0.0032, -0.0267],
        [-0.0397, -0.0481, -0.0100,  ...,  0.0177,  0.0265, -0.0593],
        [ 0.0319, -0.0083,  0.0651,  ..., -0.0380, -0.0167,  0.0296],
        ...,
        [-0.0692,  0.0734, -0.0080,  ...,  0.0023,  0.0328, -0.0390],
        [ 0.0544,  0.0020,  0.0484,  ...,  0.0056, -0.0043, -0.0359],
        [-0.0587,  0.0471, -0.0448,  ..., -0.0181, -0.0531, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 174-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0126,  0.0135,  0.0519,  ..., -0.0400, -0.0032, -0.0323],
        [-0.0410, -0.0474, -0.0116,  ...,  0.0186,  0.0269, -0.0606],
        [ 0.0329, -0.0072,  0.0657,  ..., -0.0405, -0.0185,  0.0301],
        ...,
        [-0.0692,  0.0734, -0.0080,  ...,  0.0023,  0.0327, -0.0390],
        [ 0.0544,  0.0020,  0.0484,  ...,  0.0056, -0.0043, -0.0359],
        [-0.0587,  0.0471, -0.0449,  ..., -0.0181, -0.0531, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 175-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0122,  0.0147,  0.0497,  ..., -0.0423, -0.0033, -0.0360],
        [-0.0419, -0.0469, -0.0127,  ...,  0.0191,  0.0272, -0.0613],
        [ 0.0336, -0.0065,  0.0661,  ..., -0.0422, -0.0197,  0.0304],
        ...,
        [-0.0692,  0.0734, -0.0080,  ...,  0.0023,  0.0327, -0.0390],
        [ 0.0544,  0.0020,  0.0484,  ...,  0.0056, -0.0043, -0.0359],
        [-0.0587,  0.0471, -0.0450,  ..., -0.0181, -0.0531, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 176-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0120,  0.0155,  0.0482,  ..., -0.0437, -0.0033, -0.0384],
        [-0.0425, -0.0465, -0.0134,  ...,  0.0195,  0.0273, -0.0619],
        [ 0.0341, -0.0060,  0.0663,  ..., -0.0433, -0.0204,  0.0306],
        ...,
        [-0.0692,  0.0734, -0.0081,  ...,  0.0023,  0.0326, -0.0390],
        [ 0.0544,  0.0020,  0.0484,  ...,  0.0056, -0.0042, -0.0359],
        [-0.0587,  0.0471, -0.0450,  ..., -0.0181, -0.0531, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 177-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0118,  0.0161,  0.0473,  ..., -0.0447, -0.0033, -0.0400],
        [-0.0428, -0.0463, -0.0139,  ...,  0.0198,  0.0275, -0.0622],
        [ 0.0344, -0.0057,  0.0665,  ..., -0.0440, -0.0209,  0.0307],
        ...,
        [-0.0693,  0.0734, -0.0081,  ...,  0.0023,  0.0325, -0.0390],
        [ 0.0544,  0.0020,  0.0484,  ...,  0.0056, -0.0042, -0.0359],
        [-0.0587,  0.0471, -0.0450,  ..., -0.0181, -0.0532, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 178-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0117,  0.0164,  0.0467,  ..., -0.0453, -0.0034, -0.0410],
        [-0.0431, -0.0462, -0.0142,  ...,  0.0200,  0.0275, -0.0624],
        [ 0.0346, -0.0055,  0.0666,  ..., -0.0445, -0.0213,  0.0308],
        ...,
        [-0.0693,  0.0735, -0.0081,  ...,  0.0023,  0.0325, -0.0390],
        [ 0.0544,  0.0020,  0.0484,  ...,  0.0056, -0.0042, -0.0359],
        [-0.0587,  0.0471, -0.0450,  ..., -0.0181, -0.0532, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 179-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0116,  0.0166,  0.0463,  ..., -0.0458, -0.0034, -0.0418],
        [-0.0433, -0.0461, -0.0144,  ...,  0.0201,  0.0276, -0.0626],
        [ 0.0347, -0.0054,  0.0667,  ..., -0.0448, -0.0215,  0.0309],
        ...,
        [-0.0693,  0.0735, -0.0081,  ...,  0.0023,  0.0324, -0.0390],
        [ 0.0545,  0.0020,  0.0484,  ...,  0.0056, -0.0042, -0.0359],
        [-0.0588,  0.0471, -0.0450,  ..., -0.0181, -0.0532, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 180-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0115,  0.0168,  0.0461,  ..., -0.0460, -0.0034, -0.0422],
        [-0.0434, -0.0460, -0.0145,  ...,  0.0201,  0.0276, -0.0627],
        [ 0.0348, -0.0053,  0.0667,  ..., -0.0450, -0.0216,  0.0309],
        ...,
        [-0.0693,  0.0735, -0.0081,  ...,  0.0023,  0.0324, -0.0390],
        [ 0.0545,  0.0020,  0.0484,  ...,  0.0056, -0.0042, -0.0359],
        [-0.0588,  0.0471, -0.0450,  ..., -0.0181, -0.0532, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 181-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (180)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0115,  0.0169,  0.0459,  ..., -0.0462, -0.0034, -0.0425],
        [-0.0435, -0.0460, -0.0146,  ...,  0.0202,  0.0276, -0.0628],
        [ 0.0348, -0.0052,  0.0667,  ..., -0.0451, -0.0217,  0.0309],
        ...,
        [-0.0693,  0.0735, -0.0081,  ...,  0.0023,  0.0324, -0.0390],
        [ 0.0545,  0.0020,  0.0484,  ...,  0.0056, -0.0042, -0.0359],
        [-0.0588,  0.0471, -0.0450,  ..., -0.0181, -0.0532, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 182-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0115,  0.0170,  0.0458,  ..., -0.0463, -0.0034, -0.0427],
        [-0.0435, -0.0459, -0.0146,  ...,  0.0202,  0.0276, -0.0628],
        [ 0.0349, -0.0052,  0.0668,  ..., -0.0452, -0.0218,  0.0309],
        ...,
        [-0.0693,  0.0735, -0.0081,  ...,  0.0023,  0.0324, -0.0390],
        [ 0.0545,  0.0020,  0.0484,  ...,  0.0056, -0.0042, -0.0359],
        [-0.0588,  0.0471, -0.0451,  ..., -0.0181, -0.0532, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 183-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0458,  ..., -0.0464, -0.0034, -0.0429],
        [-0.0435, -0.0459, -0.0147,  ...,  0.0202,  0.0277, -0.0628],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0453, -0.0218,  0.0309],
        ...,
        [-0.0693,  0.0735, -0.0081,  ...,  0.0022,  0.0324, -0.0390],
        [ 0.0545,  0.0020,  0.0484,  ...,  0.0056, -0.0042, -0.0359],
        [-0.0588,  0.0471, -0.0451,  ..., -0.0181, -0.0532, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 184-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0465, -0.0034, -0.0430],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0453, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0735, -0.0080,  ...,  0.0022,  0.0321, -0.0390],
        [ 0.0544,  0.0020,  0.0485,  ...,  0.0056, -0.0043, -0.0359],
        [-0.0587,  0.0471, -0.0451,  ..., -0.0181, -0.0531, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 185-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0465, -0.0034, -0.0430],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0453, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0735, -0.0080,  ...,  0.0022,  0.0319, -0.0390],
        [ 0.0544,  0.0021,  0.0485,  ...,  0.0056, -0.0045, -0.0359],
        [-0.0587,  0.0471, -0.0451,  ..., -0.0182, -0.0530, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 186-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0465, -0.0034, -0.0430],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0022,  0.0317, -0.0390],
        [ 0.0544,  0.0021,  0.0485,  ...,  0.0056, -0.0045, -0.0359],
        [-0.0587,  0.0470, -0.0452,  ..., -0.0182, -0.0530, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 187-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0465, -0.0034, -0.0431],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0022,  0.0316, -0.0390],
        [ 0.0543,  0.0021,  0.0485,  ...,  0.0056, -0.0046, -0.0359],
        [-0.0587,  0.0470, -0.0452,  ..., -0.0182, -0.0530, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 188-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0466, -0.0034, -0.0431],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0022,  0.0315, -0.0390],
        [ 0.0543,  0.0021,  0.0485,  ...,  0.0056, -0.0046, -0.0359],
        [-0.0587,  0.0470, -0.0452,  ..., -0.0182, -0.0529, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 189-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0466, -0.0034, -0.0431],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0022,  0.0314, -0.0390],
        [ 0.0543,  0.0021,  0.0485,  ...,  0.0056, -0.0047, -0.0359],
        [-0.0587,  0.0470, -0.0452,  ..., -0.0182, -0.0529, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 190-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0466, -0.0034, -0.0431],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0022,  0.0314, -0.0390],
        [ 0.0543,  0.0021,  0.0485,  ...,  0.0056, -0.0047, -0.0359],
        [-0.0587,  0.0470, -0.0452,  ..., -0.0182, -0.0529, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 191-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (190)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0466, -0.0034, -0.0431],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0022,  0.0314, -0.0390],
        [ 0.0543,  0.0021,  0.0485,  ...,  0.0056, -0.0047, -0.0359],
        [-0.0587,  0.0470, -0.0452,  ..., -0.0182, -0.0529, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 192-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0466, -0.0034, -0.0431],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0022,  0.0314, -0.0390],
        [ 0.0543,  0.0021,  0.0485,  ...,  0.0056, -0.0047, -0.0359],
        [-0.0587,  0.0470, -0.0452,  ..., -0.0182, -0.0529, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 193-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0466, -0.0034, -0.0432],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0022,  0.0313, -0.0390],
        [ 0.0543,  0.0022,  0.0485,  ...,  0.0056, -0.0047, -0.0359],
        [-0.0587,  0.0470, -0.0452,  ..., -0.0182, -0.0529, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 194-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0466, -0.0034, -0.0432],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0023,  0.0314, -0.0390],
        [ 0.0543,  0.0022,  0.0485,  ...,  0.0056, -0.0047, -0.0359],
        [-0.0587,  0.0470, -0.0452,  ..., -0.0182, -0.0529, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 195-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0466, -0.0034, -0.0432],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0023,  0.0314, -0.0390],
        [ 0.0543,  0.0021,  0.0485,  ...,  0.0056, -0.0046, -0.0359],
        [-0.0587,  0.0470, -0.0452,  ..., -0.0182, -0.0530, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 196-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0466, -0.0034, -0.0432],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0080,  ...,  0.0023,  0.0314, -0.0390],
        [ 0.0544,  0.0021,  0.0485,  ...,  0.0056, -0.0046, -0.0359],
        [-0.0587,  0.0470, -0.0451,  ..., -0.0182, -0.0530, -0.0678]],
       device='cuda:0', requires_grad=True)


-------Epoch 197-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0171,  0.0457,  ..., -0.0466, -0.0034, -0.0432],
        [-0.0436, -0.0459, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0079,  ...,  0.0023,  0.0314, -0.0390],
        [ 0.0545,  0.0021,  0.0485,  ...,  0.0056, -0.0045, -0.0360],
        [-0.0588,  0.0470, -0.0451,  ..., -0.0183, -0.0530, -0.0677]],
       device='cuda:0', requires_grad=True)


-------Epoch 198-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0114,  0.0172,  0.0457,  ..., -0.0466, -0.0034, -0.0431],
        [-0.0436, -0.0458, -0.0147,  ...,  0.0203,  0.0277, -0.0629],
        [ 0.0349, -0.0051,  0.0668,  ..., -0.0454, -0.0219,  0.0310],
        ...,
        [-0.0693,  0.0737, -0.0079,  ...,  0.0023,  0.0314, -0.0390],
        [ 0.0546,  0.0022,  0.0485,  ...,  0.0056, -0.0045, -0.0360],
        [-0.0588,  0.0470, -0.0451,  ..., -0.0183, -0.0531, -0.0677]],
       device='cuda:0', requires_grad=True)


-------Epoch 199-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0126,  0.0141,  0.0500,  ..., -0.0466,  0.0002, -0.0429],
        [-0.0437, -0.0448, -0.0152,  ...,  0.0203,  0.0268, -0.0629],
        [ 0.0352, -0.0065,  0.0678,  ..., -0.0454, -0.0203,  0.0310],
        ...,
        [-0.0693,  0.0737, -0.0079,  ...,  0.0023,  0.0314, -0.0390],
        [ 0.0547,  0.0022,  0.0485,  ...,  0.0056, -0.0045, -0.0360],
        [-0.0588,  0.0469, -0.0452,  ..., -0.0183, -0.0531, -0.0676]],
       device='cuda:0', requires_grad=True)


-------Epoch 200-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0144,  0.0093,  0.0563,  ..., -0.0466,  0.0056, -0.0428],
        [-0.0439, -0.0433, -0.0159,  ...,  0.0203,  0.0254, -0.0629],
        [ 0.0355, -0.0087,  0.0693,  ..., -0.0454, -0.0178,  0.0310],
        ...,
        [-0.0693,  0.0737, -0.0079,  ...,  0.0023,  0.0315, -0.0389],
        [ 0.0547,  0.0022,  0.0485,  ...,  0.0056, -0.0044, -0.0360],
        [-0.0588,  0.0469, -0.0452,  ..., -0.0183, -0.0531, -0.0676]],
       device='cuda:0', requires_grad=True)


-------Epoch 201-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (200)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0155,  0.0062,  0.0603,  ..., -0.0466,  0.0092, -0.0427],
        [-0.0440, -0.0423, -0.0164,  ...,  0.0203,  0.0246, -0.0629],
        [ 0.0357, -0.0102,  0.0703,  ..., -0.0454, -0.0162,  0.0310],
        ...,
        [-0.0693,  0.0736, -0.0079,  ...,  0.0023,  0.0315, -0.0390],
        [ 0.0548,  0.0021,  0.0485,  ...,  0.0056, -0.0043, -0.0360],
        [-0.0589,  0.0470, -0.0454,  ..., -0.0183, -0.0532, -0.0676]],
       device='cuda:0', requires_grad=True)


-------Epoch 202-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0163,  0.0041,  0.0630,  ..., -0.0466,  0.0115, -0.0427],
        [-0.0441, -0.0416, -0.0167,  ...,  0.0203,  0.0240, -0.0628],
        [ 0.0359, -0.0111,  0.0710,  ..., -0.0454, -0.0151,  0.0310],
        ...,
        [-0.0693,  0.0735, -0.0080,  ...,  0.0023,  0.0316, -0.0390],
        [ 0.0549,  0.0020,  0.0485,  ...,  0.0057, -0.0042, -0.0361],
        [-0.0589,  0.0470, -0.0456,  ..., -0.0183, -0.0533, -0.0676]],
       device='cuda:0', requires_grad=True)


-------Epoch 203-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0169,  0.0022,  0.0647,  ..., -0.0457,  0.0131, -0.0411],
        [-0.0441, -0.0403, -0.0167,  ...,  0.0198,  0.0237, -0.0631],
        [ 0.0360, -0.0121,  0.0713,  ..., -0.0451, -0.0144,  0.0312],
        ...,
        [-0.0692,  0.0734, -0.0079,  ...,  0.0023,  0.0317, -0.0390],
        [ 0.0549,  0.0019,  0.0485,  ...,  0.0057, -0.0041, -0.0361],
        [-0.0589,  0.0470, -0.0457,  ..., -0.0183, -0.0534, -0.0676]],
       device='cuda:0', requires_grad=True)


-------Epoch 204-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0172,  0.0008,  0.0657,  ..., -0.0447,  0.0142, -0.0398],
        [-0.0441, -0.0392, -0.0168,  ...,  0.0194,  0.0234, -0.0634],
        [ 0.0361, -0.0129,  0.0715,  ..., -0.0448, -0.0139,  0.0314],
        ...,
        [-0.0692,  0.0734, -0.0079,  ...,  0.0023,  0.0318, -0.0390],
        [ 0.0550,  0.0018,  0.0485,  ...,  0.0057, -0.0040, -0.0361],
        [-0.0589,  0.0470, -0.0458,  ..., -0.0183, -0.0535, -0.0676]],
       device='cuda:0', requires_grad=True)


-------Epoch 205-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0175, -0.0002,  0.0663,  ..., -0.0441,  0.0148, -0.0389],
        [-0.0441, -0.0384, -0.0168,  ...,  0.0191,  0.0233, -0.0635],
        [ 0.0361, -0.0134,  0.0717,  ..., -0.0446, -0.0136,  0.0316],
        ...,
        [-0.0692,  0.0733, -0.0079,  ...,  0.0023,  0.0318, -0.0391],
        [ 0.0550,  0.0018,  0.0485,  ...,  0.0057, -0.0039, -0.0361],
        [-0.0589,  0.0470, -0.0459,  ..., -0.0183, -0.0535, -0.0675]],
       device='cuda:0', requires_grad=True)


-------Epoch 206-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0176, -0.0008,  0.0668,  ..., -0.0437,  0.0153, -0.0383],
        [-0.0441, -0.0380, -0.0168,  ...,  0.0189,  0.0232, -0.0636],
        [ 0.0361, -0.0137,  0.0718,  ..., -0.0445, -0.0133,  0.0317],
        ...,
        [-0.0692,  0.0733, -0.0079,  ...,  0.0023,  0.0319, -0.0391],
        [ 0.0550,  0.0017,  0.0485,  ...,  0.0057, -0.0039, -0.0361],
        [-0.0589,  0.0470, -0.0459,  ..., -0.0183, -0.0536, -0.0675]],
       device='cuda:0', requires_grad=True)


-------Epoch 207-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0177, -0.0012,  0.0671,  ..., -0.0435,  0.0156, -0.0379],
        [-0.0441, -0.0376, -0.0168,  ...,  0.0188,  0.0231, -0.0637],
        [ 0.0362, -0.0139,  0.0719,  ..., -0.0444, -0.0132,  0.0317],
        ...,
        [-0.0693,  0.0733, -0.0072,  ...,  0.0023,  0.0302, -0.0390],
        [ 0.0548,  0.0017,  0.0487,  ...,  0.0056, -0.0042, -0.0361],
        [-0.0587,  0.0470, -0.0469,  ..., -0.0182, -0.0529, -0.0676]],
       device='cuda:0', requires_grad=True)


-------Epoch 208-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0178, -0.0014,  0.0673,  ..., -0.0433,  0.0158, -0.0377],
        [-0.0441, -0.0374, -0.0168,  ...,  0.0187,  0.0231, -0.0637],
        [ 0.0362, -0.0141,  0.0719,  ..., -0.0444, -0.0131,  0.0317],
        ...,
        [-0.0696,  0.0732, -0.0062,  ...,  0.0022,  0.0277, -0.0390],
        [ 0.0545,  0.0017,  0.0490,  ...,  0.0056, -0.0046, -0.0361],
        [-0.0585,  0.0471, -0.0483,  ..., -0.0179, -0.0519, -0.0678]],
       device='cuda:0', requires_grad=True)


-------Epoch 209-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0173, -0.0016,  0.0674,  ..., -0.0436,  0.0156, -0.0375],
        [-0.0441, -0.0373, -0.0168,  ...,  0.0186,  0.0231, -0.0638],
        [ 0.0360, -0.0142,  0.0719,  ..., -0.0444, -0.0133,  0.0318],
        ...,
        [-0.0697,  0.0732, -0.0056,  ...,  0.0022,  0.0260, -0.0389],
        [ 0.0543,  0.0017,  0.0491,  ...,  0.0056, -0.0049, -0.0361],
        [-0.0583,  0.0471, -0.0492,  ..., -0.0178, -0.0512, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 210-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0165, -0.0017,  0.0675,  ..., -0.0440,  0.0153, -0.0374],
        [-0.0441, -0.0372, -0.0168,  ...,  0.0186,  0.0231, -0.0638],
        [ 0.0358, -0.0142,  0.0719,  ..., -0.0446, -0.0135,  0.0318],
        ...,
        [-0.0698,  0.0732, -0.0052,  ...,  0.0021,  0.0249, -0.0389],
        [ 0.0542,  0.0017,  0.0493,  ...,  0.0056, -0.0051, -0.0361],
        [-0.0581,  0.0471, -0.0498,  ..., -0.0177, -0.0507, -0.0679]],
       device='cuda:0', requires_grad=True)


-------Epoch 211-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (210)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0160, -0.0018,  0.0675,  ..., -0.0443,  0.0150, -0.0373],
        [-0.0441, -0.0371, -0.0168,  ...,  0.0186,  0.0232, -0.0638],
        [ 0.0356, -0.0143,  0.0720,  ..., -0.0447, -0.0137,  0.0318],
        ...,
        [-0.0699,  0.0732, -0.0050,  ...,  0.0021,  0.0241, -0.0389],
        [ 0.0541,  0.0017,  0.0493,  ...,  0.0056, -0.0052, -0.0361],
        [-0.0581,  0.0471, -0.0502,  ..., -0.0177, -0.0504, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 212-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0015], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0157, -0.0019,  0.0676,  ..., -0.0445,  0.0149, -0.0373],
        [-0.0440, -0.0371, -0.0168,  ...,  0.0186,  0.0232, -0.0638],
        [ 0.0355, -0.0143,  0.0720,  ..., -0.0447, -0.0138,  0.0318],
        ...,
        [-0.0700,  0.0732, -0.0048,  ...,  0.0021,  0.0237, -0.0389],
        [ 0.0540,  0.0017,  0.0493,  ...,  0.0056, -0.0053, -0.0361],
        [-0.0580,  0.0471, -0.0505,  ..., -0.0176, -0.0503, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 213-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0155, -0.0019,  0.0676,  ..., -0.0446,  0.0148, -0.0372],
        [-0.0440, -0.0371, -0.0168,  ...,  0.0186,  0.0232, -0.0638],
        [ 0.0354, -0.0143,  0.0720,  ..., -0.0448, -0.0139,  0.0318],
        ...,
        [-0.0700,  0.0732, -0.0048,  ...,  0.0021,  0.0233, -0.0389],
        [ 0.0539,  0.0017,  0.0493,  ...,  0.0056, -0.0054, -0.0361],
        [-0.0580,  0.0471, -0.0506,  ..., -0.0176, -0.0501, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 214-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0153, -0.0019,  0.0676,  ..., -0.0447,  0.0147, -0.0372],
        [-0.0440, -0.0371, -0.0168,  ...,  0.0186,  0.0232, -0.0638],
        [ 0.0354, -0.0143,  0.0720,  ..., -0.0448, -0.0140,  0.0318],
        ...,
        [-0.0700,  0.0732, -0.0047,  ...,  0.0021,  0.0231, -0.0389],
        [ 0.0539,  0.0017,  0.0493,  ...,  0.0056, -0.0054, -0.0360],
        [-0.0579,  0.0471, -0.0507,  ..., -0.0176, -0.0500, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 215-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0152, -0.0019,  0.0676,  ..., -0.0447,  0.0147, -0.0374],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0354, -0.0143,  0.0720,  ..., -0.0448, -0.0140,  0.0318],
        ...,
        [-0.0701,  0.0732, -0.0047,  ...,  0.0021,  0.0230, -0.0389],
        [ 0.0538,  0.0017,  0.0493,  ...,  0.0056, -0.0054, -0.0360],
        [-0.0579,  0.0471, -0.0507,  ..., -0.0176, -0.0500, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 216-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151, -0.0018,  0.0677,  ..., -0.0447,  0.0146, -0.0375],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0143,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0701,  0.0732, -0.0047,  ...,  0.0021,  0.0228, -0.0389],
        [ 0.0538,  0.0017,  0.0493,  ...,  0.0056, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0507,  ..., -0.0176, -0.0500, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 217-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151, -0.0018,  0.0677,  ..., -0.0447,  0.0146, -0.0376],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0701,  0.0732, -0.0047,  ...,  0.0021,  0.0228, -0.0389],
        [ 0.0538,  0.0017,  0.0493,  ...,  0.0056, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0508,  ..., -0.0176, -0.0499, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 218-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0376],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0701,  0.0732, -0.0047,  ...,  0.0021,  0.0227, -0.0389],
        [ 0.0538,  0.0017,  0.0493,  ...,  0.0056, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0508,  ..., -0.0176, -0.0499, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 219-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0376],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0701,  0.0732, -0.0047,  ...,  0.0021,  0.0227, -0.0389],
        [ 0.0538,  0.0017,  0.0493,  ...,  0.0056, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0508,  ..., -0.0176, -0.0499, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 220-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0376],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0701,  0.0732, -0.0046,  ...,  0.0021,  0.0226, -0.0389],
        [ 0.0538,  0.0017,  0.0493,  ...,  0.0056, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0508,  ..., -0.0176, -0.0499, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 221-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (220)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0701,  0.0732, -0.0046,  ...,  0.0021,  0.0224, -0.0389],
        [ 0.0537,  0.0017,  0.0493,  ...,  0.0056, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0508,  ..., -0.0176, -0.0499, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 222-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0701,  0.0732, -0.0046,  ...,  0.0021,  0.0224, -0.0389],
        [ 0.0537,  0.0017,  0.0493,  ...,  0.0056, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0509,  ..., -0.0176, -0.0499, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 223-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0701,  0.0732, -0.0046,  ...,  0.0021,  0.0223, -0.0389],
        [ 0.0537,  0.0017,  0.0493,  ...,  0.0056, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0509,  ..., -0.0176, -0.0499, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 224-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0701,  0.0732, -0.0046,  ...,  0.0021,  0.0223, -0.0389],
        [ 0.0537,  0.0017,  0.0493,  ...,  0.0056, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0509,  ..., -0.0176, -0.0499, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 225-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0702,  0.0732, -0.0046,  ...,  0.0020,  0.0225, -0.0389],
        [ 0.0536,  0.0017,  0.0493,  ...,  0.0056, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0509,  ..., -0.0175, -0.0499, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 226-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0702,  0.0732, -0.0044,  ...,  0.0020,  0.0226, -0.0389],
        [ 0.0535,  0.0017,  0.0494,  ...,  0.0055, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0510,  ..., -0.0175, -0.0499, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 227-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0702,  0.0732, -0.0043,  ...,  0.0020,  0.0226, -0.0389],
        [ 0.0535,  0.0016,  0.0496,  ...,  0.0055, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0511,  ..., -0.0175, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 228-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0702,  0.0732, -0.0042,  ...,  0.0020,  0.0225, -0.0389],
        [ 0.0535,  0.0016,  0.0497,  ...,  0.0055, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0512,  ..., -0.0176, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 229-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0703,  0.0732, -0.0041,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0016,  0.0498,  ...,  0.0055, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0513,  ..., -0.0176, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 230-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0703,  0.0732, -0.0041,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0016,  0.0498,  ...,  0.0055, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0514,  ..., -0.0176, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 231-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (230)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0703,  0.0732, -0.0041,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0016,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0514,  ..., -0.0176, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 232-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0703,  0.0732, -0.0041,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0016,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0514,  ..., -0.0176, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 233-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0448, -0.0141,  0.0318],
        ...,
        [-0.0703,  0.0732, -0.0040,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0016,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0514,  ..., -0.0176, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 234-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0449, -0.0141,  0.0318],
        ...,
        [-0.0703,  0.0732, -0.0040,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0016,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0514,  ..., -0.0176, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 235-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0168,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0449, -0.0141,  0.0318],
        ...,
        [-0.0703,  0.0732, -0.0040,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0016,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0514,  ..., -0.0176, -0.0497, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 236-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0377],
        [-0.0440, -0.0370, -0.0167,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0449, -0.0141,  0.0318],
        ...,
        [-0.0703,  0.0732, -0.0040,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0016,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0514,  ..., -0.0176, -0.0497, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 237-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0018,  0.0677,  ..., -0.0447,  0.0145, -0.0376],
        [-0.0440, -0.0370, -0.0167,  ...,  0.0186,  0.0232, -0.0639],
        [ 0.0353, -0.0142,  0.0720,  ..., -0.0449, -0.0141,  0.0318],
        ...,
        [-0.0703,  0.0732, -0.0040,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0016,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0514,  ..., -0.0176, -0.0497, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 238-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151, -0.0019,  0.0679,  ..., -0.0450,  0.0144, -0.0370],
        [-0.0440, -0.0371, -0.0167,  ...,  0.0186,  0.0231, -0.0638],
        [ 0.0353, -0.0143,  0.0721,  ..., -0.0450, -0.0143,  0.0320],
        ...,
        [-0.0703,  0.0732, -0.0040,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0016,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0514,  ..., -0.0176, -0.0497, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 239-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151, -0.0021,  0.0680,  ..., -0.0452,  0.0143, -0.0364],
        [-0.0440, -0.0372, -0.0167,  ...,  0.0185,  0.0230, -0.0638],
        [ 0.0353, -0.0145,  0.0722,  ..., -0.0451, -0.0144,  0.0321],
        ...,
        [-0.0703,  0.0732, -0.0040,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0015,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0471, -0.0514,  ..., -0.0176, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 240-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0152, -0.0022,  0.0682,  ..., -0.0454,  0.0142, -0.0360],
        [-0.0440, -0.0373, -0.0166,  ...,  0.0185,  0.0230, -0.0637],
        [ 0.0354, -0.0146,  0.0722,  ..., -0.0452, -0.0144,  0.0322],
        ...,
        [-0.0703,  0.0732, -0.0040,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0015,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0472, -0.0514,  ..., -0.0176, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 241-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (240)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0152, -0.0022,  0.0682,  ..., -0.0456,  0.0141, -0.0357],
        [-0.0440, -0.0373, -0.0166,  ...,  0.0185,  0.0230, -0.0637],
        [ 0.0354, -0.0146,  0.0723,  ..., -0.0453, -0.0145,  0.0323],
        ...,
        [-0.0703,  0.0732, -0.0040,  ...,  0.0020,  0.0224, -0.0389],
        [ 0.0535,  0.0015,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0472, -0.0514,  ..., -0.0176, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 242-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0152, -0.0023,  0.0683,  ..., -0.0456,  0.0141, -0.0355],
        [-0.0440, -0.0374, -0.0166,  ...,  0.0185,  0.0229, -0.0637],
        [ 0.0354, -0.0147,  0.0723,  ..., -0.0453, -0.0145,  0.0324],
        ...,
        [-0.0703,  0.0732, -0.0039,  ...,  0.0020,  0.0225, -0.0388],
        [ 0.0535,  0.0015,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0472, -0.0514,  ..., -0.0175, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 243-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0152, -0.0023,  0.0683,  ..., -0.0456,  0.0141, -0.0354],
        [-0.0440, -0.0374, -0.0166,  ...,  0.0185,  0.0229, -0.0637],
        [ 0.0354, -0.0147,  0.0723,  ..., -0.0454, -0.0146,  0.0324],
        ...,
        [-0.0703,  0.0732, -0.0039,  ...,  0.0020,  0.0227, -0.0388],
        [ 0.0535,  0.0015,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0472, -0.0514,  ..., -0.0175, -0.0498, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 244-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0153, -0.0023,  0.0683,  ..., -0.0456,  0.0140, -0.0354],
        [-0.0440, -0.0375, -0.0166,  ...,  0.0185,  0.0229, -0.0637],
        [ 0.0354, -0.0147,  0.0723,  ..., -0.0454, -0.0146,  0.0324],
        ...,
        [-0.0703,  0.0732, -0.0038,  ...,  0.0020,  0.0229, -0.0388],
        [ 0.0535,  0.0015,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0472, -0.0514,  ..., -0.0175, -0.0497, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 245-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0153, -0.0024,  0.0683,  ..., -0.0456,  0.0140, -0.0353],
        [-0.0439, -0.0375, -0.0166,  ...,  0.0185,  0.0229, -0.0637],
        [ 0.0354, -0.0147,  0.0723,  ..., -0.0454, -0.0146,  0.0324],
        ...,
        [-0.0703,  0.0732, -0.0038,  ...,  0.0020,  0.0230, -0.0388],
        [ 0.0535,  0.0015,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0472, -0.0514,  ..., -0.0175, -0.0497, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 246-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0153, -0.0024,  0.0683,  ..., -0.0456,  0.0140, -0.0353],
        [-0.0439, -0.0375, -0.0166,  ...,  0.0185,  0.0229, -0.0637],
        [ 0.0354, -0.0147,  0.0723,  ..., -0.0454, -0.0146,  0.0324],
        ...,
        [-0.0703,  0.0732, -0.0038,  ...,  0.0020,  0.0230, -0.0388],
        [ 0.0535,  0.0015,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0472, -0.0513,  ..., -0.0175, -0.0497, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 247-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0153, -0.0024,  0.0683,  ..., -0.0456,  0.0140, -0.0353],
        [-0.0439, -0.0375, -0.0166,  ...,  0.0185,  0.0229, -0.0637],
        [ 0.0354, -0.0147,  0.0723,  ..., -0.0454, -0.0146,  0.0324],
        ...,
        [-0.0703,  0.0732, -0.0038,  ...,  0.0020,  0.0231, -0.0388],
        [ 0.0535,  0.0015,  0.0499,  ...,  0.0055, -0.0056, -0.0360],
        [-0.0579,  0.0472, -0.0513,  ..., -0.0175, -0.0497, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 248-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0151, -0.0024,  0.0683,  ..., -0.0455,  0.0140, -0.0353],
        [-0.0439, -0.0375, -0.0166,  ...,  0.0185,  0.0229, -0.0637],
        [ 0.0354, -0.0147,  0.0723,  ..., -0.0454, -0.0146,  0.0324],
        ...,
        [-0.0702,  0.0729, -0.0039,  ...,  0.0019,  0.0243, -0.0388],
        [ 0.0535,  0.0014,  0.0498,  ...,  0.0055, -0.0055, -0.0361],
        [-0.0579,  0.0472, -0.0513,  ..., -0.0175, -0.0497, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 249-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0150, -0.0024,  0.0683,  ..., -0.0454,  0.0140, -0.0353],
        [-0.0439, -0.0376, -0.0166,  ...,  0.0186,  0.0230, -0.0637],
        [ 0.0353, -0.0147,  0.0723,  ..., -0.0454, -0.0147,  0.0325],
        ...,
        [-0.0702,  0.0727, -0.0040,  ...,  0.0019,  0.0250, -0.0389],
        [ 0.0535,  0.0013,  0.0497,  ...,  0.0055, -0.0054, -0.0361],
        [-0.0579,  0.0471, -0.0513,  ..., -0.0176, -0.0496, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 250-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0169, -0.0013,  0.0675,  ..., -0.0463,  0.0123, -0.0329],
        [-0.0461, -0.0400, -0.0152,  ...,  0.0201,  0.0254, -0.0661],
        [ 0.0377, -0.0123,  0.0707,  ..., -0.0471, -0.0171,  0.0349],
        ...,
        [-0.0702,  0.0726, -0.0041,  ...,  0.0019,  0.0255, -0.0389],
        [ 0.0535,  0.0013,  0.0497,  ...,  0.0055, -0.0055, -0.0360],
        [-0.0579,  0.0471, -0.0513,  ..., -0.0176, -0.0496, -0.0681]],
       device='cuda:0', requires_grad=True)


-------Epoch 251-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (250)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0230,  0.0022,  0.0652,  ..., -0.0492,  0.0072, -0.0255],
        [-0.0532, -0.0473, -0.0109,  ...,  0.0249,  0.0330, -0.0734],
        [ 0.0449, -0.0051,  0.0658,  ..., -0.0523, -0.0247,  0.0424],
        ...,
        [-0.0701,  0.0725, -0.0041,  ...,  0.0019,  0.0259, -0.0389],
        [ 0.0533,  0.0015,  0.0496,  ...,  0.0055, -0.0057, -0.0360],
        [-0.0578,  0.0469, -0.0512,  ..., -0.0176, -0.0494, -0.0683]],
       device='cuda:0', requires_grad=True)


-------Epoch 252-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0269,  0.0044,  0.0637,  ..., -0.0510,  0.0039, -0.0206],
        [-0.0578, -0.0522, -0.0080,  ...,  0.0280,  0.0380, -0.0783],
        [ 0.0497, -0.0003,  0.0626,  ..., -0.0558, -0.0297,  0.0473],
        ...,
        [-0.0701,  0.0724, -0.0041,  ...,  0.0018,  0.0261, -0.0389],
        [ 0.0533,  0.0016,  0.0495,  ...,  0.0055, -0.0058, -0.0359],
        [-0.0578,  0.0468, -0.0512,  ..., -0.0176, -0.0493, -0.0684]],
       device='cuda:0', requires_grad=True)


-------Epoch 253-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0295,  0.0059,  0.0627,  ..., -0.0523,  0.0017, -0.0174],
        [-0.0609, -0.0554, -0.0061,  ...,  0.0300,  0.0412, -0.0814],
        [ 0.0529,  0.0028,  0.0605,  ..., -0.0580, -0.0330,  0.0506],
        ...,
        [-0.0701,  0.0724, -0.0041,  ...,  0.0018,  0.0262, -0.0389],
        [ 0.0532,  0.0017,  0.0495,  ...,  0.0055, -0.0059, -0.0359],
        [-0.0578,  0.0467, -0.0511,  ..., -0.0176, -0.0492, -0.0684]],
       device='cuda:0', requires_grad=True)


-------Epoch 254-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0312,  0.0068,  0.0620,  ..., -0.0531,  0.0003, -0.0153],
        [-0.0629, -0.0575, -0.0049,  ...,  0.0314,  0.0434, -0.0835],
        [ 0.0550,  0.0049,  0.0591,  ..., -0.0595, -0.0352,  0.0527],
        ...,
        [-0.0701,  0.0723, -0.0041,  ...,  0.0018,  0.0263, -0.0389],
        [ 0.0532,  0.0018,  0.0495,  ...,  0.0055, -0.0060, -0.0359],
        [-0.0578,  0.0467, -0.0511,  ..., -0.0177, -0.0491, -0.0685]],
       device='cuda:0', requires_grad=True)


-------Epoch 255-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0324,  0.0074,  0.0615,  ..., -0.0536, -0.0007, -0.0139],
        [-0.0642, -0.0589, -0.0041,  ...,  0.0323,  0.0448, -0.0849],
        [ 0.0563,  0.0062,  0.0581,  ..., -0.0605, -0.0366,  0.0541],
        ...,
        [-0.0701,  0.0723, -0.0041,  ...,  0.0018,  0.0264, -0.0389],
        [ 0.0532,  0.0018,  0.0495,  ...,  0.0055, -0.0061, -0.0359],
        [-0.0578,  0.0467, -0.0511,  ..., -0.0177, -0.0491, -0.0685]],
       device='cuda:0', requires_grad=True)


-------Epoch 256-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0331,  0.0078,  0.0612,  ..., -0.0540, -0.0013, -0.0130],
        [-0.0651, -0.0598, -0.0035,  ...,  0.0328,  0.0457, -0.0858],
        [ 0.0572,  0.0071,  0.0575,  ..., -0.0611, -0.0376,  0.0550],
        ...,
        [-0.0701,  0.0723, -0.0041,  ...,  0.0018,  0.0265, -0.0389],
        [ 0.0531,  0.0018,  0.0495,  ...,  0.0055, -0.0061, -0.0359],
        [-0.0578,  0.0466, -0.0511,  ..., -0.0177, -0.0491, -0.0685]],
       device='cuda:0', requires_grad=True)


-------Epoch 257-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0336,  0.0080,  0.0611,  ..., -0.0542, -0.0017, -0.0124],
        [-0.0657, -0.0604, -0.0032,  ...,  0.0332,  0.0463, -0.0864],
        [ 0.0578,  0.0077,  0.0571,  ..., -0.0616, -0.0382,  0.0556],
        ...,
        [-0.0701,  0.0723, -0.0041,  ...,  0.0018,  0.0265, -0.0389],
        [ 0.0531,  0.0018,  0.0495,  ...,  0.0055, -0.0061, -0.0359],
        [-0.0578,  0.0466, -0.0511,  ..., -0.0177, -0.0490, -0.0685]],
       device='cuda:0', requires_grad=True)


-------Epoch 258-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0339,  0.0083,  0.0609,  ..., -0.0544, -0.0020, -0.0120],
        [-0.0661, -0.0608, -0.0030,  ...,  0.0335,  0.0468, -0.0868],
        [ 0.0582,  0.0081,  0.0569,  ..., -0.0618, -0.0386,  0.0560],
        ...,
        [-0.0701,  0.0723, -0.0041,  ...,  0.0018,  0.0266, -0.0389],
        [ 0.0531,  0.0018,  0.0495,  ...,  0.0055, -0.0061, -0.0359],
        [-0.0578,  0.0466, -0.0511,  ..., -0.0177, -0.0490, -0.0685]],
       device='cuda:0', requires_grad=True)


-------Epoch 259-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0341,  0.0086,  0.0608,  ..., -0.0546, -0.0021, -0.0118],
        [-0.0663, -0.0608, -0.0028,  ...,  0.0336,  0.0470, -0.0871],
        [ 0.0585,  0.0084,  0.0567,  ..., -0.0620, -0.0388,  0.0563],
        ...,
        [-0.0700,  0.0716, -0.0045,  ...,  0.0018,  0.0275, -0.0386],
        [ 0.0530,  0.0020,  0.0495,  ...,  0.0055, -0.0062, -0.0359],
        [-0.0577,  0.0460, -0.0516,  ..., -0.0177, -0.0488, -0.0680]],
       device='cuda:0', requires_grad=True)


-------Epoch 260-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0342,  0.0089,  0.0608,  ..., -0.0547, -0.0023, -0.0117],
        [-0.0665, -0.0607, -0.0027,  ...,  0.0337,  0.0471, -0.0873],
        [ 0.0586,  0.0085,  0.0566,  ..., -0.0621, -0.0390,  0.0565],
        ...,
        [-0.0697,  0.0705, -0.0051,  ...,  0.0018,  0.0289, -0.0381],
        [ 0.0529,  0.0022,  0.0496,  ...,  0.0055, -0.0063, -0.0360],
        [-0.0575,  0.0451, -0.0522,  ..., -0.0177, -0.0484, -0.0672]],
       device='cuda:0', requires_grad=True)


-------Epoch 261-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (260)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0343,  0.0091,  0.0607,  ..., -0.0548, -0.0024, -0.0116],
        [-0.0666, -0.0606, -0.0026,  ...,  0.0337,  0.0472, -0.0874],
        [ 0.0587,  0.0086,  0.0565,  ..., -0.0622, -0.0391,  0.0566],
        ...,
        [-0.0695,  0.0698, -0.0054,  ...,  0.0018,  0.0298, -0.0378],
        [ 0.0528,  0.0024,  0.0497,  ...,  0.0055, -0.0064, -0.0360],
        [-0.0574,  0.0446, -0.0526,  ..., -0.0177, -0.0482, -0.0667]],
       device='cuda:0', requires_grad=True)


-------Epoch 262-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0092,  0.0607,  ..., -0.0548, -0.0025, -0.0115],
        [-0.0667, -0.0606, -0.0026,  ...,  0.0338,  0.0472, -0.0875],
        [ 0.0588,  0.0087,  0.0565,  ..., -0.0623, -0.0392,  0.0567],
        ...,
        [-0.0694,  0.0693, -0.0057,  ...,  0.0018,  0.0304, -0.0376],
        [ 0.0527,  0.0025,  0.0497,  ...,  0.0055, -0.0065, -0.0360],
        [-0.0573,  0.0442, -0.0528,  ..., -0.0177, -0.0480, -0.0664]],
       device='cuda:0', requires_grad=True)


-------Epoch 263-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0093,  0.0607,  ..., -0.0548, -0.0025, -0.0115],
        [-0.0667, -0.0605, -0.0026,  ...,  0.0338,  0.0473, -0.0875],
        [ 0.0589,  0.0087,  0.0564,  ..., -0.0623, -0.0392,  0.0567],
        ...,
        [-0.0694,  0.0690, -0.0058,  ...,  0.0018,  0.0307, -0.0375],
        [ 0.0527,  0.0026,  0.0497,  ...,  0.0055, -0.0065, -0.0361],
        [-0.0573,  0.0439, -0.0530,  ..., -0.0177, -0.0479, -0.0661]],
       device='cuda:0', requires_grad=True)


-------Epoch 264-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0094,  0.0607,  ..., -0.0548, -0.0026, -0.0115],
        [-0.0667, -0.0605, -0.0026,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0589,  0.0087,  0.0564,  ..., -0.0623, -0.0393,  0.0567],
        ...,
        [-0.0693,  0.0688, -0.0059,  ...,  0.0018,  0.0310, -0.0374],
        [ 0.0527,  0.0027,  0.0498,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0573,  0.0438, -0.0531,  ..., -0.0177, -0.0479, -0.0660]],
       device='cuda:0', requires_grad=True)


-------Epoch 265-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0094,  0.0607,  ..., -0.0549, -0.0026, -0.0114],
        [-0.0668, -0.0605, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0589,  0.0087,  0.0564,  ..., -0.0623, -0.0393,  0.0568],
        ...,
        [-0.0693,  0.0687, -0.0060,  ...,  0.0018,  0.0312, -0.0373],
        [ 0.0526,  0.0027,  0.0498,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0437, -0.0532,  ..., -0.0177, -0.0478, -0.0659]],
       device='cuda:0', requires_grad=True)


-------Epoch 266-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0094,  0.0607,  ..., -0.0549, -0.0026, -0.0114],
        [-0.0668, -0.0605, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0589,  0.0088,  0.0564,  ..., -0.0624, -0.0393,  0.0568],
        ...,
        [-0.0693,  0.0686, -0.0060,  ...,  0.0018,  0.0313, -0.0373],
        [ 0.0526,  0.0027,  0.0498,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0436, -0.0533,  ..., -0.0177, -0.0478, -0.0658]],
       device='cuda:0', requires_grad=True)


-------Epoch 267-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0094,  0.0607,  ..., -0.0549, -0.0026, -0.0114],
        [-0.0668, -0.0605, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0589,  0.0088,  0.0564,  ..., -0.0624, -0.0393,  0.0568],
        ...,
        [-0.0693,  0.0685, -0.0061,  ...,  0.0018,  0.0313, -0.0373],
        [ 0.0526,  0.0027,  0.0498,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0436, -0.0533,  ..., -0.0177, -0.0478, -0.0658]],
       device='cuda:0', requires_grad=True)


-------Epoch 268-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0094,  0.0607,  ..., -0.0549, -0.0026, -0.0114],
        [-0.0668, -0.0605, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0589,  0.0088,  0.0564,  ..., -0.0624, -0.0393,  0.0568],
        ...,
        [-0.0693,  0.0685, -0.0062,  ...,  0.0018,  0.0314, -0.0372],
        [ 0.0526,  0.0027,  0.0498,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0533,  ..., -0.0177, -0.0478, -0.0658]],
       device='cuda:0', requires_grad=True)


-------Epoch 269-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0094,  0.0607,  ..., -0.0549, -0.0026, -0.0114],
        [-0.0668, -0.0605, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0589,  0.0088,  0.0564,  ..., -0.0624, -0.0393,  0.0568],
        ...,
        [-0.0693,  0.0685, -0.0062,  ...,  0.0018,  0.0314, -0.0372],
        [ 0.0526,  0.0027,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0532,  ..., -0.0177, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 270-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0095,  0.0607,  ..., -0.0549, -0.0026, -0.0114],
        [-0.0668, -0.0605, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0590,  0.0088,  0.0564,  ..., -0.0624, -0.0393,  0.0568],
        ...,
        [-0.0693,  0.0685, -0.0063,  ...,  0.0018,  0.0313, -0.0372],
        [ 0.0527,  0.0027,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0532,  ..., -0.0177, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 271-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (270)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0345,  0.0095,  0.0607,  ..., -0.0549, -0.0026, -0.0114],
        [-0.0668, -0.0605, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0590,  0.0087,  0.0564,  ..., -0.0623, -0.0393,  0.0568],
        ...,
        [-0.0693,  0.0685, -0.0063,  ...,  0.0018,  0.0313, -0.0372],
        [ 0.0527,  0.0027,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0532,  ..., -0.0178, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 272-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0095,  0.0607,  ..., -0.0549, -0.0026, -0.0114],
        [-0.0668, -0.0605, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0590,  0.0087,  0.0563,  ..., -0.0623, -0.0393,  0.0568],
        ...,
        [-0.0692,  0.0685, -0.0063,  ...,  0.0018,  0.0313, -0.0372],
        [ 0.0527,  0.0027,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0532,  ..., -0.0178, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 273-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0095,  0.0607,  ..., -0.0549, -0.0026, -0.0114],
        [-0.0668, -0.0605, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0590,  0.0086,  0.0563,  ..., -0.0622, -0.0393,  0.0568],
        ...,
        [-0.0692,  0.0685, -0.0064,  ...,  0.0018,  0.0313, -0.0372],
        [ 0.0527,  0.0028,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0532,  ..., -0.0178, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 274-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0095,  0.0607,  ..., -0.0549, -0.0026, -0.0114],
        [-0.0668, -0.0605, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0590,  0.0086,  0.0563,  ..., -0.0622, -0.0393,  0.0568],
        ...,
        [-0.0692,  0.0685, -0.0064,  ...,  0.0018,  0.0313, -0.0372],
        [ 0.0527,  0.0028,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0531,  ..., -0.0178, -0.0478, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 275-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0096,  0.0607,  ..., -0.0550, -0.0026, -0.0114],
        [-0.0668, -0.0604, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0590,  0.0085,  0.0563,  ..., -0.0622, -0.0393,  0.0568],
        ...,
        [-0.0692,  0.0685, -0.0064,  ...,  0.0018,  0.0313, -0.0372],
        [ 0.0526,  0.0028,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0531,  ..., -0.0177, -0.0478, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 276-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0096,  0.0607,  ..., -0.0550, -0.0026, -0.0114],
        [-0.0668, -0.0604, -0.0025,  ...,  0.0338,  0.0473, -0.0876],
        [ 0.0590,  0.0085,  0.0563,  ..., -0.0622, -0.0393,  0.0568],
        ...,
        [-0.0692,  0.0685, -0.0064,  ...,  0.0018,  0.0313, -0.0372],
        [ 0.0526,  0.0028,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0531,  ..., -0.0177, -0.0478, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 277-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0345,  0.0096,  0.0607,  ..., -0.0552, -0.0026, -0.0114],
        [-0.0668, -0.0604, -0.0025,  ...,  0.0339,  0.0473, -0.0876],
        [ 0.0590,  0.0085,  0.0563,  ..., -0.0622, -0.0393,  0.0568],
        ...,
        [-0.0692,  0.0685, -0.0064,  ...,  0.0018,  0.0313, -0.0372],
        [ 0.0526,  0.0028,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0531,  ..., -0.0177, -0.0478, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 278-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0096,  0.0607,  ..., -0.0555, -0.0026, -0.0114],
        [-0.0668, -0.0604, -0.0025,  ...,  0.0341,  0.0473, -0.0876],
        [ 0.0590,  0.0085,  0.0563,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0692,  0.0685, -0.0064,  ...,  0.0018,  0.0314, -0.0372],
        [ 0.0526,  0.0028,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0435, -0.0531,  ..., -0.0177, -0.0478, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 279-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0009], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0096,  0.0608,  ..., -0.0557, -0.0026, -0.0114],
        [-0.0668, -0.0604, -0.0026,  ...,  0.0341,  0.0473, -0.0876],
        [ 0.0590,  0.0085,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0692,  0.0684, -0.0064,  ...,  0.0018,  0.0314, -0.0372],
        [ 0.0526,  0.0028,  0.0497,  ...,  0.0055, -0.0066, -0.0361],
        [-0.0572,  0.0434, -0.0531,  ..., -0.0177, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 280-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0096,  0.0608,  ..., -0.0559, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0342,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0692,  0.0684, -0.0064,  ...,  0.0018,  0.0314, -0.0372],
        [ 0.0526,  0.0029,  0.0497,  ...,  0.0055, -0.0067, -0.0361],
        [-0.0572,  0.0434, -0.0531,  ..., -0.0178, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 281-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (280)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.00      0.00      0.00      1497
           1       0.25      1.00      0.40       490

    accuracy                           0.25      1987
   macro avg       0.12      0.50      0.20      1987
weighted avg       0.06      0.25      0.10      1987

Confusion Matrix: 
 [[   0 1497]
 [   0  490]]
Parameter containing:
tensor([[ 0.0344,  0.0096,  0.0608,  ..., -0.0560, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0342,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0692,  0.0681, -0.0065,  ...,  0.0017,  0.0315, -0.0372],
        [ 0.0525,  0.0030,  0.0497,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0433, -0.0532,  ..., -0.0178, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 282-------




  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0096,  0.0608,  ..., -0.0560, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0342,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0691,  0.0678, -0.0067,  ...,  0.0016,  0.0315, -0.0372],
        [ 0.0525,  0.0030,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0433, -0.0532,  ..., -0.0178, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 283-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0096,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0690,  0.0676, -0.0067,  ...,  0.0015,  0.0315, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0532,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 284-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0096,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0690,  0.0675, -0.0068,  ...,  0.0014,  0.0315, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0571,  0.0432, -0.0533,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 285-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0096,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0690,  0.0674, -0.0068,  ...,  0.0014,  0.0315, -0.0372],
        [ 0.0524,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0571,  0.0432, -0.0533,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 286-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0096,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0690,  0.0673, -0.0068,  ...,  0.0014,  0.0315, -0.0372],
        [ 0.0524,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0571,  0.0432, -0.0533,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 287-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0690,  0.0673, -0.0068,  ...,  0.0014,  0.0315, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0534,  ..., -0.0179, -0.0477, -0.0658]],
       device='cuda:0', requires_grad=True)


-------Epoch 288-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0690,  0.0673, -0.0069,  ...,  0.0014,  0.0315, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0534,  ..., -0.0179, -0.0477, -0.0658]],
       device='cuda:0', requires_grad=True)


-------Epoch 289-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0690,  0.0673, -0.0069,  ...,  0.0013,  0.0315, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0535,  ..., -0.0179, -0.0477, -0.0658]],
       device='cuda:0', requires_grad=True)


-------Epoch 290-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0689,  0.0673, -0.0069,  ...,  0.0013,  0.0315, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0535,  ..., -0.0179, -0.0477, -0.0658]],
       device='cuda:0', requires_grad=True)


-------Epoch 291-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)


***VALIDATION (290)***




0it [00:00, ?it/s]

              precision    recall  f1-score   support

           0       0.76      0.95      0.85      1497
           1       0.41      0.10      0.16       490

    accuracy                           0.74      1987
   macro avg       0.59      0.53      0.50      1987
weighted avg       0.68      0.74      0.68      1987

Confusion Matrix: 
 [[1427   70]
 [ 441   49]]
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0689,  0.0672, -0.0069,  ...,  0.0013,  0.0315, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0535,  ..., -0.0179, -0.0477, -0.0658]],
       device='cuda:0', requires_grad=True)


-------Epoch 292-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0689,  0.0672, -0.0069,  ...,  0.0013,  0.0315, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0535,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 293-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0010], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0689,  0.0672, -0.0069,  ...,  0.0013,  0.0315, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0535,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 294-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0689,  0.0672, -0.0069,  ...,  0.0013,  0.0315, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0535,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 295-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0689,  0.0672, -0.0069,  ...,  0.0013,  0.0314, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0066, -0.0361],
        [-0.0572,  0.0432, -0.0535,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 296-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0689,  0.0672, -0.0069,  ...,  0.0013,  0.0314, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0066, -0.0361],
        [-0.0572,  0.0432, -0.0535,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 297-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0012], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0689,  0.0672, -0.0068,  ...,  0.0013,  0.0314, -0.0372],
        [ 0.0525,  0.0031,  0.0498,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0536,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 298-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0014], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0344,  0.0095,  0.0608,  ..., -0.0561, -0.0026, -0.0114],
        [-0.0668, -0.0603, -0.0026,  ...,  0.0343,  0.0473, -0.0876],
        [ 0.0590,  0.0084,  0.0562,  ..., -0.0621, -0.0393,  0.0568],
        ...,
        [-0.0689,  0.0672, -0.0068,  ...,  0.0013,  0.0314, -0.0373],
        [ 0.0525,  0.0031,  0.0499,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0537,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 299-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0011], device='cuda:0', grad_fn=<DivBackward0>)
Parameter containing:
tensor([[ 0.0349,  0.0095,  0.0614,  ..., -0.0562, -0.0029, -0.0115],
        [-0.0670, -0.0603, -0.0029,  ...,  0.0344,  0.0475, -0.0876],
        [ 0.0592,  0.0084,  0.0566,  ..., -0.0622, -0.0395,  0.0568],
        ...,
        [-0.0689,  0.0672, -0.0067,  ...,  0.0013,  0.0314, -0.0373],
        [ 0.0526,  0.0031,  0.0499,  ...,  0.0056, -0.0067, -0.0361],
        [-0.0572,  0.0432, -0.0538,  ..., -0.0179, -0.0477, -0.0657]],
       device='cuda:0', requires_grad=True)


-------Epoch 300-------




0it [00:00, ?it/s]

Average Loss =  tensor([0.0013], device='cuda:0', grad_fn=<DivBackward0>)
