In [22]:
import torch
import torchvision
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import csv
import random
import re
import os
import unicodedata
import codecs
import itertools
from scapy.all import *
import math
import pandas as pd
import numpy as np
from torchtext.data import Field, Dataset, Example,TabularDataset, BucketIterator
from torchtext import data
from torch.autograd import Variable
import sys
from sklearn.metrics import f1_score,precision_score,recall_score

In [3]:
CUDA = torch.cuda.is_available()
device=torch.device("cuda" if CUDA else "cpu")

# Data Preprocessing

In [4]:
# we remove all the packet header of the datagram, including the Ethernet header, the IP header, and the TCP header. 
# Only the payload can be transformed to segments.

# we read all the datagrams in binary type for convenience.

# Extracting Payload from pcap
def get_payload(pcap):
    if(hasattr(pcap,'load')):
        return pcap.load
    elif type(pcap)==scapy.packet.NoPayload:
        return None
    elif hasattr(pcap,'payload'):
        return get_payload(pcap.payload)
    return None

# List of Pcaps --> List of Payloads
def extract_payload(pcaps,class_name):
    # pcaps: raw pcaps data read from .pcap file (rdpcap)
    # class_name: the category of this pcap file
    raw_payload=pd.DataFrame({'payload':[get_payload(pcaps[i]) for i in range(len(pcaps))],'class':class_name})
    # drop rows with None payload
    return raw_payload.dropna()

def train_validate_test_split(df, train_percent=.8, validate_percent=.1, seed=0):
    np.random.seed(seed)
    perm = np.random.permutation(df.index)
    m = len(df.index)
    train_end = int(train_percent * m)
    validate_end = int(validate_percent * m) + train_end
    train = df.loc[perm[:train_end]]
    validate = df.loc[perm[train_end:validate_end]]
    test = df.loc[perm[validate_end:]]
    return train, validate, test

In [192]:
AIMChat_pcaps=rdpcap('./data/AIMchat2.pcapng')

# email
email_pcaps=rdpcap('./data/email1b.pcap')

# skype
skype_pcaps=rdpcap('./data/skype_chat1a.pcap')

# extract payload from pcaps file, label the payload, and store in csv file.
AIMChat_payload=extract_payload(AIMChat_pcaps,'AIMChat')
print("AIMChat payload size:",len(AIMChat_payload))
    
email_payload=extract_payload(email_pcaps,'email')
print("email payload size:",len(email_payload))
    
skype_payload=extract_payload(skype_pcaps,'skype')
print("skype payload size:",len(skype_payload))


email_payload=email_payload[0:len(AIMChat_payload)]
skype_payload=skype_payload[0:len(AIMChat_payload)]


# TODO: read more pcap files
# concatenate dataset from different cateogries
dataset=pd.concat([AIMChat_payload,email_payload,skype_payload])


    
# split dataset into train, validate (default ratio=8:1:1)
train_df,validate_df,test_df=train_validate_test_split(dataset)  
print("Train dataset length:",len(train_df))
print("Validate dataset length:",len(validate_df))
print("Test dataset length:",len(test_df))
    
# store to csv file
print("Storing dataset...")
train_df.to_csv('./data/train.csv')
validate_df.to_csv('./data/validate.csv')
test_df.to_csv('./data/test.csv')
print("Done.")

AIMChat payload size: 243
email payload size: 20151
skype payload size: 35326
Train dataset length: 1206
Validate dataset length: 154
Test dataset length: 157
Storing dataset...
Done.


### Segment Generator
Example.
![img](./image/segment-generator.jpg)

In [193]:
def fill(lst,N):
    length=len(lst)
    expected=N*math.ceil(length/N)
    lst.extend([0]*(expected-length))
    return lst

def unfold(lst):
    newlst=[]
    for l in lst:
        for s in l:
            newlst.append(s)
    return newlst;

def breakLength(length,N):
    fullCount=math.ceil(length/N)
    if(fullCount==1 or fullCount==0):
        return [length]
    last=length-N*(fullCount-1)
    return [N if i<fullCount-1 else last for i in range(fullCount)]

def expand_list(nested_list):
    for item in nested_list:
        if isinstance(item, (list, tuple)):
            for sub_item in expand_list(item):
                yield sub_item
        else:
            yield item
            
def transformLength(length,N):
    if(isinstance(length,torch.Tensor)):
        length=length.tolist()
    newlength=[[breakLength(length[i],N)] for i in range(len(length))]
    return torch.LongTensor(list(expand_list(newlength)))
    
def transformInputSeq(input_seq,length,N):
    payload_t=input_seq.transpose(0,1) # [batch, seq_len]
    segments=[[fill(payload_t[j].tolist()[i:i+N if i+N<length[j] else length[j]],N) for i in range(0, length[j], N)] for j in range(len(length))]
    # transform into (N, batch_size*ceil(seq_len/N))
    segments=torch.LongTensor(unfold(segments)).view(-1,N).transpose(0,1)
    return segments

def breakIntoSegments(input_seq,length,N):
    # break into segments
    segments=transformInputSeq(input_seq,length,N)
    newlength=transformLength(length,N)
    return (segments,newlength)

def concatSegments(segments,length,N):
    if(isinstance(segments ,torch.Tensor)):
        segments=segments.tolist()
    if(isinstance(length,torch.Tensor)):
        length=length.tolist()
    datagram_length=[math.ceil(length[i]/N) for i in range(len(length))]
    max_len=max(datagram_length)
    seg_len=len(segments[0])
    
    sum_count=0
    batch=[]
    datagram=[]
    for i in range(len(datagram_length)):
        for j in range(datagram_length[i]):
            datagram.append(segments[sum_count+j])
        while(len(datagram)<max_len):
            datagram.append([0]*seg_len)
        sum_count+=datagram_length[i]
        batch.append(datagram)
        datagram=[]
        
    datagrams=torch.Tensor(batch).transpose(0,1)
    datagram_length=torch.LongTensor(datagram_length)
    return datagrams,datagram_length

In [194]:
# a byte is transformed to an integer between 0 to 255
def SegmentGenerator(datagram,N=8):
    # datagram: binary, without packet header, payload
    # N : the length of each segments.
    # if datagram is read from csv file, it needs to be transformed in to byte data.
    if type(datagram)==str:
        datagram=eval(datagram)
    numerization=[datagram[i] for i in range(len(datagram))]
    return numerization

In [195]:
LABEL=data.LabelField()
PAYLOAD=Field(
    sequential=True,
    tokenize=SegmentGenerator,
    use_vocab=False,
    pad_token=0,
    include_lengths=True
    
)

In [196]:
train_data, valid_data,test_data = TabularDataset.splits(
                       path="data",
                       train='train.csv', validation="validate.csv", test="test.csv",
                       format='csv',
                       skip_header=True,
                       fields=[('index',None),('payload',PAYLOAD),('label',LABEL)])

LABEL.build_vocab(train_data)

In [197]:
batch_size=5
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), 
    batch_size = batch_size, 
    sort_key=lambda x:len(x.payload),
    sort_within_batch = True,
    device = device
)

# Focal Loss and Classification
For training loss, we involve the **focal loss** in BSNN, because it shows strong advantages on the imbalance multiclassification problem 


In [198]:
class FocalLoss(nn.Module):
    r"""
        This criterion is a implemenation of Focal Loss, which is proposed in 
        Focal Loss for Dense Object Detection.

            Loss(x, class) = - \alpha (1-softmax(x)[class])^gamma \log(softmax(x)[class])

        The losses are averaged across observations for each minibatch.

        Args:
            alpha(1D Tensor, Variable) : the scalar factor for this criterion
            gamma(float, double) : gamma > 0; reduces the relative loss for well-classiﬁed examples (p > .5), 
                                   putting more focus on hard, misclassiﬁed examples
            size_average(bool): By default, the losses are averaged over observations for each minibatch.
                                However, if the field size_average is set to False, the losses are
                                instead summed for each minibatch.
    """
    def __init__(self, class_num, alpha=None, gamma=2, size_average=True):
        super(FocalLoss, self).__init__()
        if isinstance(alpha,(float,int,int)): alpha = torch.Tensor([alpha])
        if isinstance(alpha,list): alpha = torch.Tensor(alpha)
        if alpha is None:
            self.alpha = Variable(torch.ones(class_num, 1))
        else:
            if isinstance(alpha, Variable):
                self.alpha = alpha
            else:
                self.alpha = Variable(alpha)
        self.gamma = gamma
        self.class_num = class_num
        self.size_average = size_average

    def forward(self, inputs, targets):
        N = inputs.size(0)  # batch size
        C = inputs.size(1)  # class number
        P = F.softmax(inputs) 

        class_mask = inputs.data.new(N, C).fill_(0)
        class_mask = Variable(class_mask)
        ids = targets.view(-1, 1)  # [batch size, 1]
        class_mask.scatter_(1, ids, 1.)

        if inputs.is_cuda and not self.alpha.is_cuda:
            self.alpha = self.alpha.cuda()
        alpha = self.alpha[ids.view(-1)]

        probs = (P*class_mask).sum(1).view(-1,1)

        log_p = probs.log()
        #print('probs size= {}'.format(probs.size()))
        #print(probs)

        batch_loss = -alpha*(torch.pow((1-probs), self.gamma))*log_p 
        #print('-----bacth_loss------')
        #print(batch_loss)

        if self.size_average:
            loss = batch_loss.mean()
        else:
            loss = batch_loss.sum()
        return loss

# Attention Mechanism
Since different character may have different importance in traffic classification, we apply attention mechanism to integrate the outputs.
First, we need to calculate the weight wi,n for every character in segment i.
![image.png](./image/weight.jpg)
where hi,n is:
![image.png](./image/hidden.jpg)
By a multilayer perceptron of one layer, the hide state
hi,n of character n can be achieved. Then, the weight of character n is set as the similarity of hi,n and the context vector hc. The context vector is an abstract representation of the most informative character in the segment. 

## Context Vector[[1]](https://www.aclweb.org/anthology/N16-1174)
That is, we first feed the word annotation o_in through a **one-layer MLP** to get h_in as a hidden representation of o_in, then we measure the importance of the word as the similarity of h_in with a word level context vector h_c and get a normalized importance weight αit through a **softmax function**. 

The context vector h_c can be seen as a high level representation of a fixed query “what is the informative word” over the words like that used in memory networks. The word context vector h_c is **randomly initialized and jointly learned during the training process**.

# Segment representation
Finally, the representation of the segment is:
![segment representation formula](./image/weighted_sum.jpg)
In this way, we can get representations of all the segments.

In [199]:
# Attention Encoder using Bidirectional GRU
class AttentionEncoder(nn.Module):
    def __init__(self,input_size, hidden_size, n_layers=1, dropout=0, bidirectional=True):
        super(AttentionEncoder, self).__init__()
        self.n_layers=n_layers
        self.hidden_size=hidden_size
        self.bidirectional=bidirectional
        # randomly initialize context vector
        self.h_c=Variable(torch.randn((2 if self.bidirectional else 1)*hidden_size).to(device))
        ## Initialize RNN
        self.gru = nn.GRU(input_size,hidden_size,n_layers,dropout=(0 if n_layers==1 else dropout), bidirectional=self.bidirectional,bias=False)
        self.fc=nn.Linear((2 if self.bidirectional else 1)*hidden_size, 2*hidden_size)
        
    def forward(self, input_seq, length=None):
        # input_seq=[seq_len,batch_size,256]
        
        seq_len=input_seq.size(0)
        batch_size=input_seq.size(1)
        
        # initialize initial hidden state
        self.h_0 = torch.zeros((2 if self.bidirectional else 1)*self.n_layers, batch_size , self.hidden_size).to(device)
        
        if(not length is None):
            packed_input = nn.utils.rnn.pack_padded_sequence(input_seq,length,enforce_sorted=False)
            # GRU
            (packed_o_in,h_n)=self.gru(packed_input,self.h_0) 
            # o_in:[seq_len, batch_size, hidden_size*2]   
            # h_n: [num_layers*2,batch_size, hidden_size]
            o_in, o_in_lengths = nn.utils.rnn.pad_packed_sequence(packed_o_in)
        else:
            o_in,h_n=self.gru(input_seq,self.h_0)
        
        ## Attention
        # MLP layer
        h_in=torch.tanh(self.fc(o_in))  # [seq_len, batch_size, hidden_size*2]
        
        # weight
        in_soft=torch.matmul(h_in,self.h_c) #[seq_len, batch_size]
        weight=F.softmax(in_soft,dim=0)  #[seq_len,batch_size]
        
        # weighted sum
#         print("batch_size:",batch_size)
        S=torch.Tensor([list(torch.mul(o_in[:,i,:],weight[:,i].unsqueeze(1)).sum(dim=0)) for i in range(batch_size)])
        
        return S

# BSNN
Overall structure
![BSNN](./image/bsnn.jpg)

we can feed S,the representation of all segments, to another attention encoder. After this, we can get a representation vector d of the whole datagram.

As we already have the representation d of a datagram, we can turn to the classification problem. For K classification, d can be transformed to a K-dimension vector y = Wxd + bx

In [200]:
# BSNN
class BSNN(nn.Module):
    def __init__(self,segment_encoder, datagram_encoder,hidden_size,num_class,N=8):
        super(BSNN,self).__init__()
        self.hidden_size=hidden_size
        self.num_class=num_class
        self.N=N
        self.segment_encoder=segment_encoder
        self.datagram_encoder=datagram_encoder
        self.fc=nn.Linear(2*hidden_size,num_class)
        
    def forward(self,segments,length):
        # segments=[seq_len,batch_size]
#         print("input_seq before transform:",segments.shape)
#         print("length before trans:",length.shape)
        
        # break into segments  [8, ceil(seq_len/8)*batch_size]
        (segments,newlength)=breakIntoSegments(segments,length,self.N)
#         print("input_seq after transform:",segments.shape)
#         print("length after trans:",newlength.shape)

        # one hot encoding
        #segments = [N ,ceil(seq_len/N)*batch_size,256]
        segments=F.one_hot(segments,256).float()
#         print("input_seq length after one-hot encoding:",segments.shape)
        
        # segments encoding
#         print("Segment encoder-------------------------------")
        # segment_embed=[new batch size, hidden size]
        segment_embed=self.segment_encoder(segments,length=newlength)
#         print("results from segment encoder:",segment_embed.shape)
        
        # concat segment vectors
        # datagram embed=[ batch size, hidden size]
        datagram_embed,data_length=concatSegments(segment_embed,length,self.N)
        
        # datagram encoding     
#         print("Datagram encoder-------------------------------")
        # datagram encoder input=[1, batch size, hidden size]
        D=self.datagram_encoder(datagram_embed,data_length)
#         print("datagram encoder results:",D.shape)
        
        # transform to K dimension
        K=self.fc(D)
        
        # multi-classification
        result=F.softmax(K,dim=1)
#         print(result.shape)
        return result

In [201]:
# length of segment
N=8
num_class=len(LABEL.vocab)

# Hyperparameters for neural networks
hidden_size=100
input_size=256

learning_rate=0.001
dropout=0.5

epochs=20

seg_encoder=AttentionEncoder(input_size,hidden_size)
data_encoder=AttentionEncoder(2*hidden_size,hidden_size)
model=BSNN(seg_encoder,data_encoder,hidden_size,num_class)

In [202]:
# initialize model weight
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)

BSNN(
  (segment_encoder): AttentionEncoder(
    (gru): GRU(256, 100, bias=False, bidirectional=True)
    (fc): Linear(in_features=200, out_features=200, bias=True)
  )
  (datagram_encoder): AttentionEncoder(
    (gru): GRU(200, 100, bias=False, bidirectional=True)
    (fc): Linear(in_features=200, out_features=200, bias=True)
  )
  (fc): Linear(in_features=200, out_features=3, bias=True)
)

In [203]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 474,603 trainable parameters


In [284]:
# Focal Loss Hyperparameters
# α refers to the class weight, which can be calculated by the percentage of every class. 
alpha=0.25
alpha=torch.Tensor([alpha for i in range(num_class)])
gamma=2

# optimizer Adam
optimizer = optim.Adam(model.parameters(),lr=learning_rate)

# Focal Loss
criterion = FocalLoss(alpha=alpha,gamma=gamma,class_num=num_class)

labels=[i for i in range(num_class)]

In [297]:
def train(model, iterator, optimizer, criterion,labels, clip):
    
    model.train()
    
    epoch_loss = 0
    epoch_f1_score = torch.Tensor(num_class*[0])
    epoch_precision= torch.Tensor(num_class*[0])
    epoch_recall =  torch.Tensor(num_class*[0])

    for i, batch in enumerate(iterator):
        
        src,length=batch.payload
        trg = batch.label
        
        optimizer.zero_grad()
        
        output = model(src,length)

        loss = criterion(output, trg)
        
        probability,predictions=output.max(dim=1)

        f1 = torch.Tensor(f1_score(trg,predictions, average=None,labels=labels))
        precision = torch.Tensor(precision_score(trg,predictions,average=None,labels=labels))
        recall= torch.Tensor(recall_score(trg,predictions,average=None,labels=labels))
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        epoch_f1_score+=f1
        epoch_precision+= precision
        epoch_recall+=recall
        
    return epoch_loss / len(iterator), epoch_f1_score / len(iterator) , epoch_precision / len(iterator) , epoch_recall / len(iterator)

In [295]:
def evaluate(model, iterator, criterion,labels):
    
    epoch_loss = 0
    epoch_f1_score = torch.Tensor(num_class*[0])
    epoch_precision= torch.Tensor(num_class*[0])
    epoch_recall =  torch.Tensor(num_class*[0])
    
    model.eval()
    
    with torch.no_grad():
    
        for batch in iterator:
            
            src,length=batch.payload
            trg=batch.label
            
            predictions = model(src,length)
            
            loss = criterion(predictions, trg)
            probability,predictions=predictions.max(dim=1)
            
            f1 = torch.Tensor(f1_score(trg,predictions,average=None,labels=labels))
            precision = torch.Tensor(precision_score(trg,predictions,average=None,labels=labels))
            recall= torch.Tensor(recall_score(trg,predictions, average=None,labels=labels))

            epoch_f1_score+=f1
            epoch_precision+= precision
            epoch_recall+=recall
        
    return epoch_loss / len(iterator), epoch_f1_score / len(iterator) , epoch_precision / len(iterator) , epoch_recall / len(iterator)

In [296]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [293]:
N_EPOCHS = 10
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss,train_f1,train_precision,train_recall = train(model, train_iterator, optimizer, criterion,labels, CLIP)
    valid_loss,valid_f1,valid_precision,valid_recall = evaluate(model, valid_iterator, criterion,labels)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        if(not os.path.exists('./model/')):
            os.makedirs('./model/')
        torch.save(model.state_dict(),'./model/model.pt')

    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f} | f1: {train_f1} | precision:{train_precision} | recall:{train_recall}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f} | f1: {valid_f1} | precision:{valid_precision} | recall:{valid_recall}')



trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 0, 1, 0, 0])


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


trg: tensor([0, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 2, 2, 2, 1])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 2, 1, 1, 1])
pre: tensor([1, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 1, 0])
trg: tensor([0, 0, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 0, 0, 0, 1])
trg: tensor([0, 2, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 2, 0, 1, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 2, 1, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1

trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 1, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 1, 1, 1, 1])
trg: tensor([1, 2, 2, 0, 0])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 1, 0, 0, 0])
trg: tensor([0, 2, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 2])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 0, 0, 0, 0])
trg: tensor([0, 2, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0

pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 2, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 2, 0])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 2, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 0, 1, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 0, 0, 0])
pre: tensor([1, 1, 1, 0, 0])
trg: tensor([0, 1, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 1, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 1, 0, 2, 0])
pre: tensor([0

pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 2, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 2, 0, 2])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 1, 2, 2, 1])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 0, 0, 0, 0])
pre: tensor([1

trg: tensor([2, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 2, 2, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 2, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 0, 2])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 2, 1, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 2, 2, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1

trg: tensor([0, 0, 2, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1])
pre: tensor([1])
trg: tensor([1, 2, 1, 1, 1])
pre: tensor([1, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 1, 1, 0])
trg: tensor([1, 1, 1, 2, 2])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 2, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 2])
pre: tensor([0, 0, 1, 0, 0])
trg: tensor([1, 1, 1, 1, 0])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 2, 2, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 0])
pre: tens

pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 2, 2, 0, 0])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 1, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 1, 0, 0, 1])
trg: tensor([0, 2, 2, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 2, 2, 0])
pre: tensor([0

	 Val. Loss: 0.000 |  Val. PPL:   1.000 | f1: tensor([0.2450, 0.1966, 0.0000]) | precision:tensor([0.1710, 0.2032, 0.0000]) | recall:tensor([0.4839, 0.2516, 0.0000])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 0, 0, 0, 0])
trg: tensor([2, 0, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 1, 0, 0, 1])
trg: tensor([2, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 2, 0, 1])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 2, 0])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 

trg: tensor([0, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 2, 1])
pre: tensor([1, 1, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 1, 0, 1, 0])
trg: tensor([1, 2, 2, 1, 2])
pre: tensor([1, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 0, 2, 2])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 1, 1, 1, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 1, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2

pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 2, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 1, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 1, 0, 0, 0])
trg: tensor([0, 0, 2, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 0, 0])
trg: tensor([0, 2, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 2, 2])
pre: tensor([0

pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 0, 1])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([1, 1, 1, 1, 0])
trg: tensor([0, 0, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 0])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 2, 0])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 2, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0

trg: tensor([0, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 1, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 1, 1])
trg: tensor([0, 2, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 1, 1, 1, 1])
pre: tensor([0, 1, 0, 1, 0])
trg: tensor([2, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 0, 1])
trg: tensor([2, 0, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 2, 2, 1])
pre: tensor([1, 1, 0, 0, 0])
trg: tensor([2, 0, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0

pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 2, 1, 1, 1])
pre: tensor([0, 0, 1, 1, 0])
trg: tensor([2, 2, 1, 1, 1])
pre: tensor([0, 0, 0, 0, 1])
trg: tensor([0, 2, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 1, 1, 1])
pre: tensor([0, 0, 1, 1, 1])
trg: tensor([2, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 0, 0])
trg: tensor([1, 1, 1, 1, 2])
pre: tensor([1

pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 2, 0, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 0, 1, 1, 0])
trg: tensor([0, 0, 2, 2, 1])
pre: tensor([0, 0, 0, 0, 1])
trg: tensor([0, 0, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 1, 0, 0, 0])
trg: tensor([1, 1, 1, 2, 1])
pre: tensor([1, 0, 1, 0, 0])
trg: tensor([2, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 0, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1

Epoch: 08 | Time: 0m 32s
	Train Loss: 0.101 | Train PPL:   1.106 | f1: tensor([0.4155, 0.2932, 0.0431]) | precision:tensor([0.3534, 0.3465, 0.0620]) | recall:tensor([0.5468, 0.3085, 0.0357])
	 Val. Loss: 0.000 |  Val. PPL:   1.000 | f1: tensor([0.2450, 0.1966, 0.0000]) | precision:tensor([0.1710, 0.2032, 0.0000]) | recall:tensor([0.4839, 0.2516, 0.0000])
trg: tensor([2, 2, 1, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 2, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 2, 0, 2])
pre: tensor([0, 0, 2, 0, 2])
trg: tensor([2, 0, 2, 0, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 2, 0, 2, 0])
pre: tensor([0, 0, 0, 2, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 2, 0, 0, 2])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 2, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: 

trg: tensor([0, 2, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 2])
trg: tensor([1, 1, 1, 0, 2])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 2, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 2])
trg: tensor([0, 0, 2, 2, 2])
pre: tensor([0, 0, 0, 2, 0])
trg: tensor([2, 2, 2, 0, 0])
pre: tensor([2, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 2, 0])
pre: tensor([0, 0, 0, 2, 0])
trg: tensor([0, 0, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 0, 1])
trg: tensor([0, 0, 2, 0, 2])
pre: tensor([0, 0, 2, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 1, 0, 1, 1])
trg: tensor([0, 2, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 0, 0])
pre: tensor([2, 2, 0, 0, 0])
trg: tensor([1, 1, 2, 2, 1])
pre: tensor([0, 1, 0, 0, 0])
trg: tensor([2

pre: tensor([0, 0, 1, 0, 1])
trg: tensor([0, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([2, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 2, 2, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 1, 1])
trg: tensor([0, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 0, 0, 2, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 2, 0, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 0, 2, 0, 2])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 2])
pre: tensor([1, 0, 0, 0, 0])
trg: tensor([0, 2, 0, 2, 0])
pre: tensor([0, 2, 0, 0, 0])
trg: tensor([0, 0, 2, 2, 0])
pre: tensor([1, 1, 1, 0, 0])
trg: tensor([0, 0, 2, 0, 0])
pre: tensor([0

pre: tensor([0, 2, 0, 0, 0])
trg: tensor([0, 2, 2, 0, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 2])
pre: tensor([0, 0, 0, 0, 2])
trg: tensor([0, 2, 2, 2, 0])
pre: tensor([0, 0, 2, 0, 0])
trg: tensor([1, 1, 1, 1, 2])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([0, 0, 0, 0, 1])
trg: tensor([1, 1, 1, 2, 1])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 2, 1, 1])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 0, 0, 0])
trg: tensor([0, 0, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 2, 2])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([2, 2, 0, 0, 0])
pre: tensor([0, 0, 0, 0, 0])
trg: tensor([1, 1, 1, 1, 1])
pre: tensor([1, 1, 1, 1, 1])
trg: tensor([0, 2, 0, 0, 2])
pre: tensor([1, 1, 0, 0, 0])
trg: tensor([0, 2, 1, 0, 0])
pre: tensor([0

In [270]:
_,gt=torch.randn(30,3).max(dim=1)
pr=torch.Tensor([1]*30)

In [271]:
gt

tensor([2, 1, 2, 0, 2, 0, 0, 1, 2, 1, 1, 1, 2, 0, 2, 0, 2, 1, 0, 1, 0, 2, 1, 2,
        1, 0, 2, 0, 2, 0])

In [276]:
pr

tensor([1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [272]:
pr[1]=0

In [285]:
pr

tensor([1., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [286]:
gt

tensor([2, 1, 2, 0, 2, 0, 0, 1, 2, 1, 1, 1, 2, 0, 2, 0, 2, 1, 0, 1, 0, 2, 1, 2,
        1, 0, 2, 0, 2, 0])

In [288]:
f1_score(pr.tolist(),pr.tolist(),average=None,labels=labels)

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


array([1., 1., 0.])