In [34]:
# built-in
import re
import codecs
import random

# common libs
import pandas as pd
import numpy as np 
from tqdm import tqdm

# plotting libs
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style(style='dark')

# nltk
from nltk.tokenize import RegexpTokenizer
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer


# torch
import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

#sklearn
from sklearn.metrics import classification_report
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV


#bayes_opt
from bayes_opt import BayesianOptimization


# import EarlyStopping
from pytorchtools import EarlyStopping

# PART I

## Loading the data

In [36]:
data = pd.read_csv("training-v1/offenseval-training-v1.tsv", delimiter='\t', engine='c')
data_aside = pd.read_csv("Test_B_Release/testset-taskb.tsv", delimiter='\t', engine='c')
data = data[data.subtask_a == 'OFF']

## Exploration

In [37]:
data.subtask_b[data.subtask_b == 'TIN'].count()

3876

In [38]:
data.subtask_b[data.subtask_b == 'UNT'].count()

524

In [40]:
data = data.sample(frac = 1)
tweets = np.array(data.tweet)
tweets_aside = np.array(data_aside.tweet)

## Cleaning

In [43]:
regexp1 = r'@[A-Za-z0-9]+'
regexp2 = r'https?://[A-Za-z0-9./]+'

combined_regexp = r'|'.join((regexp1, regexp2)) #getting rid of @USER and potentiel URLs
def low_stemmed_token_sentence(string, regexp = combined_regexp, SW = False):
# this tokenizer just accepts alphabetic word (remove numeric)
        
    cleaned = re.sub(regexp, '', string)
    tokenizer = RegexpTokenizer('[a-z]+') #splits the string into substrings we strip the # 
    stemmer = PorterStemmer() #basically it is suffix stripping
    
    low = cleaned.lower().replace('url', '')
    tokens = tokenizer.tokenize(low)
    if SW == True:
        stopWords = set(stopwords.words('english')) #creates a set of words that will be ignored
        filtered_tokens = []
        for tok in tokens:
            if tok not in stopWords:
                filtered_tokens.append(tok)
        tokens = filtered_tokens
        
    stemmed_tokens = [stemmer.stem(i) for i in tokens]
    return ((stemmed_tokens))

#low_stemmed_token_sentence(tweets[1])

cleaned_tweets = [low_stemmed_token_sentence(x, SW=True) for x in tweets]
print(len(cleaned_tweets))

4400


In [44]:
cleaned_tweets_aside = [low_stemmed_token_sentence(x, SW=True) for x in tweets_aside]
print(len(cleaned_tweets_aside))

240


## Preprocessing for PyTorch

In [45]:
all_cleaned_tweets = np.concatenate([cleaned_tweets, cleaned_tweets_aside])
print(all_cleaned_tweets.shape)

(4640,)


In [46]:
def get_word2idx(tokenized_corpus):
    vocabulary = []
    for sentence in tokenized_corpus:
        for token in sentence:
            if token not in vocabulary:
                vocabulary.append(token)
    word2idx = {w: idx+1 for (idx, w) in enumerate(vocabulary)}
    word2idx['<pad>'] = 0     # we reserve the 0 index for the placeholder token
    return word2idx

def get_model_inputs(tokenized_corpus, word2idx, labels, max_len):
    # we index our sentences
    vectorized_sents = [[word2idx[tok] for tok in sent if tok in word2idx] for sent in tokenized_corpus]
  
    # we create a tensor of a fixed size filled with zeroes for padding

    sent_tensor = Variable(torch.zeros((len(vectorized_sents), max_len))).long()
    sent_lengths = [len(sent) for sent in vectorized_sents]
  
    # we fill it with our vectorized sentences 
  
    for idx, (sent, sentlen) in enumerate(zip(vectorized_sents, sent_lengths)):

        sent_tensor[idx, :sentlen] = torch.LongTensor(sent)
        label_tensor = torch.FloatTensor(labels)
  
    return sent_tensor, label_tensor

In [47]:
word2idx = get_word2idx(all_cleaned_tweets)
sent_lengths = [len(sent) for sent in all_cleaned_tweets]
max_len = np.max(np.array(sent_lengths))

# Train-Valid-Test & Aside

In [48]:
regrouped = all_cleaned_tweets[:4400]
aside = all_cleaned_tweets[4400:]
data.tweet = regrouped
print(data.shape)

(4400, 5)


In [51]:
labels_string = np.array(data.subtask_b)
labels_string[labels_string == 'TIN'] = 1
labels_string[labels_string == 'UNT'] = 0
data.subtask_b = labels_string
train, valid, test = data[:3000], data[3000:3700], data[3700:4400]a

train_zeroes = train[train.subtask_b == 0]
train_upsampled = pd.concat([train, train_zeroes, train_zeroes, train_zeroes, train_zeroes], axis = 0)

data_zeroes =data[data.subtask_b == 0]
data_upsampled = pd.concat([data, data_zeroes, data_zeroes, data_zeroes, data_zeroes, data_zeroes], axis = 0)

# we go from 3000 to 4710
print(train_upsampled.shape)
# we go from 4400 to 7020
print(data_upsampled.shape)

(4400, 5)
(7020, 5)


In [52]:
train_sent, train_labels = np.array(train.tweet), np.array(train.subtask_b)
train_sent_upsampled, train_labels_upsampled = np.array(train_upsampled.tweet), np.array(train_upsampled.subtask_b)

valid_sent, valid_labels = np.array(valid.tweet), np.array(valid.subtask_b)
test_sent, test_labels = np.array(test.tweet), np.array(test.subtask_b)
data_upsampled_sent, data_upsampled_labels = np.array(data_upsampled.tweet), np.array(data_upsampled.subtask_b)

print(f'train: {len(train_sent)} and {len(train_labels)}')
print(f'train: {len(train_sent_upsampled)} and {len(train_labels_upsampled)}')

print(f'valid: {len(valid_sent)} and {len(valid_labels)}')
print(f'test: {len(test_sent)} and {len(test_labels)}')
print(f'aide: {len(aside)} and None')
print(f'data_upsampled: {len(data_upsampled_sent)} and {len(data_upsampled_labels)}')


train: 3000 and 3000
train: 4400 and 4400
valid: 700 and 700
test: 700 and 700
aide: 240 and None
data_upsampled: 7020 and 7020


In [53]:
train_sent_tensor, train_label_tensor = \
        get_model_inputs(train_sent, word2idx, list(train_labels), max_len)
train_sent_upsampled_tensor, train_label_upsampled_tensor = \
        get_model_inputs(train_sent_upsampled, word2idx, list(train_labels_upsampled), max_len)
valid_sent_tensor, valid_label_tensor = \
        get_model_inputs(valid_sent, word2idx, list(valid_labels), max_len)
test_sent_tensor, test_label_tensor = \
        get_model_inputs(test_sent, word2idx, list(test_labels), max_len)
aside_sent_tensor, _ = \
        get_model_inputs(aside, word2idx, list(test_labels[:240]), max_len)
data_upsampled_sent_tensor, data_upsampled_label_tensor = \
        get_model_inputs(data_upsampled_sent, word2idx, list(data_upsampled_labels), max_len)

# PART II

## Deep Lerning

In [54]:
wvecs = np.zeros((len(word2idx), 100))

wvecs = np.zeros((len(word2idx), 100))

with codecs.open('glove/glove.6B/glove.6B.100d.txt', 'r','utf-8') as f: 
  index = 0
  for line in tqdm(f.readlines()):
    if len(line.strip().split()) > 3:
      word = line.strip().split()[0]
      if word in word2idx:
          (word, vec) = (word, list(map(float,line.strip().split()[1:])))
          idx = word2idx[word]
          wvecs[idx] = vec
          
print(wvecs)


100%|██████████| 400000/400000 [00:06<00:00, 65033.04it/s]


[[ 0.        0.        0.       ...  0.        0.        0.      ]
 [ 0.10735  -0.13863   0.057066 ... -0.51945   0.88829   0.5476  ]
 [-0.14465   0.45569   0.36791  ... -0.85594   0.52813   0.27255 ]
 ...
 [ 0.        0.        0.       ...  0.        0.        0.      ]
 [ 0.24479  -0.15516  -0.71726  ...  0.35571  -0.50608   0.26447 ]
 [ 0.        0.        0.       ...  0.        0.        0.      ]]


In [55]:
class CNN(nn.Module):
    
    def __init__(self, vocab_size, embedding_dim, out_channels, window_size, output_dim, dropout, embedding_matrix = wvecs ,non_trainable = False):
        
        super(CNN, self).__init__()
        
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.embedding.weight.data.copy_(torch.from_numpy(embedding_matrix))
        if non_trainable:
            self.embedding.weight.requires_grad = False
        
        self.conv1 = nn.Conv2d\
        (in_channels=1, out_channels=out_channels, kernel_size=(window_size,embedding_dim))
        self.conv2 = nn.Conv2d\
        (in_channels=1, out_channels=out_channels, kernel_size=(window_size + 1,embedding_dim))
        #self.conv3 = nn.Conv2d\
        #(in_channels=1, out_channels=out_channels, kernel_size=(window_size + 2,embedding_dim))
        self.dropout = nn.Dropout2d(dropout)
        self.label = nn.Linear(2*out_channels, output_dim)

    def conv_block(self, x, conv_layer):
        conv_out = conv_layer(x)# conv_out.size() = (batch_size, out_channels, dim, 1)
        activation = F.relu(conv_out.squeeze(3))# activation.size() = (batch_size, out_channels, dim1)
        max_out = F.max_pool1d(activation, activation.size()[2]).squeeze(2)# maxpool_out.size() = (batch_size, out_channels)

        return max_out
        
              
        
        
    def forward(self, x):

        #print(f"x size : {x.shape}")
        embedded = self.embedding(x)
        #print(f"emb size : {embedded.shape}")
        embedded = embedded.unsqueeze(1)
        #print(f"before conv size : {embedded.shape}")
        # input.size() = (batch_size, 1, num_seq, embedding_length)
        max_out1 = self.conv_block(embedded, self.conv1)
        max_out2 = self.conv_block(embedded, self.conv2)
        #max_out3 = self.conv_block(embedded, self.conv3)

        #all_out = torch.cat((max_out1, max_out2, max_out3), 1)
        all_out = torch.cat((max_out1, max_out2), 1)
        # all_out.size() = (batch_size, num_kernels*out_channels)
        fc_in = self.dropout(all_out)
        # fc_in.size()) = (batch_size, num_kernels*out_channels)
        logits = self.label(fc_in)

        return logits

In [56]:
def accuracy(output, target):
 
    output = torch.round(torch.sigmoid(output))
    correct = (output == target).float()
    acc = correct.sum()/len(correct)
    return acc

In [59]:
epochs= 100

INPUT_DIM = len(word2idx)
EMBEDDING_DIM = 100
OUTPUT_DIM = 1

#the hyperparamerts specific to CNN

# we define the number of filters
N_OUT_CHANNELS = 100
# we define the window size
WINDOW_SIZE = 1
# we apply the dropout with the probability 0.5
DROPOUT = 0.2
patience = 20

model = CNN(INPUT_DIM, EMBEDDING_DIM, N_OUT_CHANNELS, WINDOW_SIZE, OUTPUT_DIM, DROPOUT)

optimizer = optim.Adam(model.parameters(), lr= 0.0093, weight_decay=0.0003) # lr= 0.0093, weight_decay=0.0003
loss_fn = nn.BCEWithLogitsLoss()

feature_train = train_sent_upsampled_tensor
target_train = train_label_upsampled_tensor

feature_valid = valid_sent_tensor
target_valid = valid_label_tensor

early_stopping = EarlyStopping(patience=patience, verbose=True)

train_acc, val_acc = [], []
train_loss, val_loss = [], []
for epoch in range(1, epochs+1):
   
    model.train()
  
    optimizer.zero_grad()
    
    predictions = model(feature_train).squeeze(1)
    loss = loss_fn(predictions, target_train)
    acc = accuracy(predictions, target_train)
    train_acc.append(acc)
    train_loss.append(loss)
    loss.backward()
    optimizer.step()
  
    epoch_loss = loss.item()
    epoch_acc = acc
  
    model.eval()
  
    with torch.no_grad():
 
        predictions_valid = model(feature_valid).squeeze(1)
        loss = loss_fn(predictions_valid, target_valid)
        acc = accuracy(predictions_valid, target_valid)
        val_acc.append(acc)
        val_loss.append(loss)
        valid_loss = loss.item()
        valid_acc = acc
        f1 = classification_report(target_valid, torch.round(torch.sigmoid(predictions_valid)), output_dict=True)['macro avg']['f1-score']
    print(f'| Epoch: {epoch:02} | Train Loss: {epoch_loss:.3f} | Train Acc: {epoch_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |Macro avg f1 {f1} |')
    model.eval()
    
    feature = test_sent_tensor
    target = test_label_tensor
    
    early_stopping(-f1, model)
        
    if early_stopping.early_stop:
        print("Early stopping")
        break

with torch.no_grad():
 
    predictions = model(feature).squeeze(1)
    loss = loss_fn(predictions, target)
    acc = accuracy(predictions, target)
    print(f'| Test Loss: {loss:.3f} | Test Acc: {acc*100:.2f}%')
#    f_measure(predictions, test_labels)


  'precision', 'predicted', average, warn_for)


| Epoch: 01 | Train Loss: 0.699 | Train Acc: 48.48% | Val. Loss: 0.367 | Val. Acc: 88.29% |Macro avg f1 0.46889226100151743 |
Validation loss decreased (inf --> -0.468892).  Saving model ...
| Epoch: 02 | Train Loss: 0.770 | Train Acc: 60.14% | Val. Loss: 0.413 | Val. Acc: 88.00% |Macro avg f1 0.5210322244306148 |
Validation loss decreased (-0.468892 --> -0.521032).  Saving model ...
| Epoch: 03 | Train Loss: 0.644 | Train Acc: 63.80% | Val. Loss: 0.558 | Val. Acc: 77.14% |Macro avg f1 0.586679263108172 |
Validation loss decreased (-0.521032 --> -0.586679).  Saving model ...
| Epoch: 04 | Train Loss: 0.606 | Train Acc: 67.86% | Val. Loss: 0.682 | Val. Acc: 61.71% |Macro avg f1 0.50943475168403 |
EarlyStopping counter: 1 out of 20
| Epoch: 05 | Train Loss: 0.632 | Train Acc: 66.00% | Val. Loss: 0.661 | Val. Acc: 63.86% |Macro avg f1 0.5243735077923787 |
EarlyStopping counter: 2 out of 20
| Epoch: 06 | Train Loss: 0.617 | Train Acc: 67.86% | Val. Loss: 0.575 | Val. Acc: 73.29% |Macro avg

KeyboardInterrupt: 

## Tests on holdout test set

In [414]:
with torch.no_grad():
    model.load_state_dict(torch.load('checkpoint.pt'))
    model.eval()
    predictions = model(feature).squeeze(1)

print(classification_report(target, torch.round(torch.sigmoid(predictions))))

              precision    recall  f1-score   support

         0.0       0.94      1.00      0.97        81
         1.0       1.00      0.99      1.00       619

   micro avg       0.99      0.99      0.99       700
   macro avg       0.97      1.00      0.98       700
weighted avg       0.99      0.99      0.99       700



## Tests on official test set

In [420]:
predictions_off = model(aside_sent_tensor).squeeze(1)
predictions_to_submit = torch.round(torch.sigmoid(predictions_off))

p = predictions_to_submit.detach().numpy()
p = ['TIN' if x == 1 else 'UNT' for x in p]
df_to_submit = pd.DataFrame(p)

df_to_submit.index = data_test.id
df_to_submit.to_csv('test_3_b.csv',sep=',', header = False)

In [421]:
df_to_submit.head(5)

Unnamed: 0_level_0,0
id,Unnamed: 1_level_1
15923,TIN
60133,UNT
83681,UNT
65507,TIN
12588,TIN


# Bayesian Optimization

In [84]:
def f(lr, wd):
    epochs=45

    INPUT_DIM = len(word2idx)
    EMBEDDING_DIM = 100
    OUTPUT_DIM = 1

    #the hyperparamerts specific to CNN

    # we define the number of filters
    N_OUT_CHANNELS = 100
    # we define the window size
    WINDOW_SIZE = 1
    # we apply the dropout with the probability 0.5
    dp = 0.2
    DROPOUT = dp

    model = CNN(INPUT_DIM, EMBEDDING_DIM, N_OUT_CHANNELS, WINDOW_SIZE, OUTPUT_DIM, DROPOUT)

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    loss_fn = nn.BCEWithLogitsLoss()

    feature_train = train_sent_tensor
    target_train = train_label_tensor

    feature_valid = valid_sent_tensor
    target_valid = valid_label_tensor


    for epoch in range(1, epochs+1):
   
        model.train()
  
        optimizer.zero_grad()
    
        predictions = model(feature_train).squeeze(1)
        loss = loss_fn(predictions, target_train)
        acc = accuracy(predictions, target_train)
        loss.backward()
        optimizer.step()
  
        epoch_loss = loss.item()
        epoch_acc = acc
      
        model.eval()
  
        with torch.no_grad():
 
            predictions_valid = model(feature_valid).squeeze(1)
            loss = loss_fn(predictions_valid, target_valid)
            acc = accuracy(predictions_valid, target_valid)
            valid_loss = loss.item()
            valid_acc = acc
            f1 = classification_report(target_valid, torch.round(torch.sigmoid(predictions_valid)), output_dict=True)['macro avg']['f1-score']
        print(f'| Epoch: {epoch:02} | Train Loss: {epoch_loss:.3f} | Train Acc: {epoch_acc*100:.2f}% | Val. Loss: {valid_loss:.3f} | Val. Acc: {valid_acc*100:.2f}% |Macro avg f1 {f1}% |')
        model.eval()
    
        feature = test_sent_tensor
        target = test_label_tensor

    with torch.no_grad():
 
        predictions = model(feature).squeeze(1)
        loss = loss_fn(predictions, target)
        acc = accuracy(predictions, target)
        print(f'| Test Loss: {loss:.3f} | Test Acc: {acc*100:.2f}%')
    return(f1)

In [85]:
# Bounded region of parameter space
pbounds = {'lr': (0.00001, 0.05), 'wd': (0.000001, 0.01)}

optimizer = BayesianOptimization(
    f=f,
    pbounds=pbounds,
    random_state=1,
)

optimizer.maximize(
    init_points=8,
    n_iter=8,
)

Initialization
-----------------------------------------------------
 Step |   Time |      Value |        lr |        wd | 


  'precision', 'predicted', average, warn_for)


| Epoch: 01 | Train Loss: 0.692 | Train Acc: 51.97% | Val. Loss: 1.015 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 02 | Train Loss: 0.753 | Train Acc: 88.83% | Val. Loss: 1.030 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 03 | Train Loss: 0.761 | Train Acc: 88.83% | Val. Loss: 0.755 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 04 | Train Loss: 0.558 | Train Acc: 88.83% | Val. Loss: 0.510 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 05 | Train Loss: 0.385 | Train Acc: 88.83% | Val. Loss: 0.410 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 06 | Train Loss: 0.334 | Train Acc: 88.83% | Val. Loss: 0.421 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 07 | Train Loss: 0.370 | Train Acc: 88.83% | Val. Loss: 0.446 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 08 | Train Loss: 0.403 | Train Acc: 88.83% | Val. Loss: 0.438 | Val. Acc: 84.86% |Macro avg f1 0.45904

| Epoch: 37 | Train Loss: 0.679 | Train Acc: 59.40% | Val. Loss: 0.680 | Val. Acc: 63.43% |Macro avg f1 0.459583348411923% |
| Epoch: 38 | Train Loss: 0.677 | Train Acc: 61.33% | Val. Loss: 0.678 | Val. Acc: 64.29% |Macro avg f1 0.45910192374264375% |
| Epoch: 39 | Train Loss: 0.677 | Train Acc: 61.30% | Val. Loss: 0.677 | Val. Acc: 65.57% |Macro avg f1 0.4634889438015004% |
| Epoch: 40 | Train Loss: 0.671 | Train Acc: 63.57% | Val. Loss: 0.675 | Val. Acc: 66.43% |Macro avg f1 0.46533797465474475% |
| Epoch: 41 | Train Loss: 0.671 | Train Acc: 63.57% | Val. Loss: 0.673 | Val. Acc: 67.57% |Macro avg f1 0.468616966133946% |
| Epoch: 42 | Train Loss: 0.671 | Train Acc: 63.87% | Val. Loss: 0.672 | Val. Acc: 68.71% |Macro avg f1 0.47172724171321645% |
| Epoch: 43 | Train Loss: 0.669 | Train Acc: 66.47% | Val. Loss: 0.670 | Val. Acc: 70.29% |Macro avg f1 0.47701149425287354% |
| Epoch: 44 | Train Loss: 0.666 | Train Acc: 67.57% | Val. Loss: 0.668 | Val. Acc: 71.86% |Macro avg f1 0.4746086234

| Epoch: 28 | Train Loss: 0.327 | Train Acc: 88.83% | Val. Loss: 0.407 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 29 | Train Loss: 0.327 | Train Acc: 88.83% | Val. Loss: 0.403 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 30 | Train Loss: 0.326 | Train Acc: 88.83% | Val. Loss: 0.401 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 31 | Train Loss: 0.327 | Train Acc: 88.83% | Val. Loss: 0.399 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 32 | Train Loss: 0.328 | Train Acc: 88.83% | Val. Loss: 0.399 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 33 | Train Loss: 0.329 | Train Acc: 88.83% | Val. Loss: 0.398 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 34 | Train Loss: 0.329 | Train Acc: 88.83% | Val. Loss: 0.399 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 35 | Train Loss: 0.329 | Train Acc: 88.83% | Val. Loss: 0.399 | Val. Acc: 84.86% |Macro avg f1 0.45904

  'precision', 'predicted', average, warn_for)


| Epoch: 01 | Train Loss: 0.888 | Train Acc: 12.57% | Val. Loss: 3.428 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 02 | Train Loss: 2.500 | Train Acc: 88.83% | Val. Loss: 2.894 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 03 | Train Loss: 2.042 | Train Acc: 88.83% | Val. Loss: 1.400 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 04 | Train Loss: 0.930 | Train Acc: 88.83% | Val. Loss: 0.638 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 05 | Train Loss: 0.383 | Train Acc: 88.83% | Val. Loss: 0.624 | Val. Acc: 81.29% |Macro avg f1 0.47003716097115555% |
| Epoch: 06 | Train Loss: 0.475 | Train Acc: 84.17% | Val. Loss: 0.629 | Val. Acc: 82.00% |Macro avg f1 0.48008771309330117% |
| Epoch: 07 | Train Loss: 0.387 | Train Acc: 87.43% | Val. Loss: 0.688 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 08 | Train Loss: 0.289 | Train Acc: 88.83% | Val. Loss: 0.894 | Val. Acc: 84.86% |Macro avg f1 0.45904

  'precision', 'predicted', average, warn_for)


| Epoch: 01 | Train Loss: 0.549 | Train Acc: 88.33% | Val. Loss: 3.527 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 02 | Train Loss: 2.570 | Train Acc: 88.83% | Val. Loss: 1.769 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 03 | Train Loss: 1.295 | Train Acc: 88.83% | Val. Loss: 0.554 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 04 | Train Loss: 0.413 | Train Acc: 88.83% | Val. Loss: 0.482 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 05 | Train Loss: 0.440 | Train Acc: 88.83% | Val. Loss: 0.505 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 06 | Train Loss: 0.472 | Train Acc: 88.83% | Val. Loss: 0.428 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 07 | Train Loss: 0.346 | Train Acc: 88.83% | Val. Loss: 0.559 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 08 | Train Loss: 0.407 | Train Acc: 88.83% | Val. Loss: 0.447 | Val. Acc: 84.86% |Macro avg f1 0.45904

  'precision', 'predicted', average, warn_for)


| Epoch: 01 | Train Loss: 0.686 | Train Acc: 57.40% | Val. Loss: 2.651 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 02 | Train Loss: 1.937 | Train Acc: 88.83% | Val. Loss: 2.013 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 03 | Train Loss: 1.434 | Train Acc: 88.83% | Val. Loss: 0.979 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 04 | Train Loss: 0.669 | Train Acc: 88.83% | Val. Loss: 0.499 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 05 | Train Loss: 0.329 | Train Acc: 88.83% | Val. Loss: 0.534 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 06 | Train Loss: 0.412 | Train Acc: 88.73% | Val. Loss: 0.597 | Val. Acc: 83.14% |Macro avg f1 0.47022755842889763% |
| Epoch: 07 | Train Loss: 0.405 | Train Acc: 87.17% | Val. Loss: 0.634 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 08 | Train Loss: 0.290 | Train Acc: 88.80% | Val. Loss: 0.831 | Val. Acc: 84.86% |Macro avg f1 0.45904

  'precision', 'predicted', average, warn_for)


| Epoch: 01 | Train Loss: 0.645 | Train Acc: 74.90% | Val. Loss: 1.928 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 02 | Train Loss: 1.409 | Train Acc: 88.83% | Val. Loss: 1.657 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 03 | Train Loss: 1.185 | Train Acc: 88.83% | Val. Loss: 0.987 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 04 | Train Loss: 0.695 | Train Acc: 88.83% | Val. Loss: 0.554 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 05 | Train Loss: 0.378 | Train Acc: 88.83% | Val. Loss: 0.436 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 06 | Train Loss: 0.324 | Train Acc: 88.83% | Val. Loss: 0.518 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 07 | Train Loss: 0.408 | Train Acc: 88.83% | Val. Loss: 0.529 | Val. Acc: 84.71% |Macro avg f1 0.4586233565351895% |
| Epoch: 08 | Train Loss: 0.356 | Train Acc: 88.70% | Val. Loss: 0.566 | Val. Acc: 84.86% |Macro avg f1 0.459041

  'precision', 'predicted', average, warn_for)


| Epoch: 01 | Train Loss: 0.657 | Train Acc: 71.03% | Val. Loss: 0.824 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 02 | Train Loss: 0.613 | Train Acc: 88.83% | Val. Loss: 0.920 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 03 | Train Loss: 0.673 | Train Acc: 88.83% | Val. Loss: 0.766 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 04 | Train Loss: 0.559 | Train Acc: 88.83% | Val. Loss: 0.578 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 05 | Train Loss: 0.426 | Train Acc: 88.83% | Val. Loss: 0.449 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 06 | Train Loss: 0.342 | Train Acc: 88.83% | Val. Loss: 0.404 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 07 | Train Loss: 0.322 | Train Acc: 88.83% | Val. Loss: 0.409 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 08 | Train Loss: 0.338 | Train Acc: 88.83% | Val. Loss: 0.421 | Val. Acc: 84.86% |Macro avg f1 0.45904

  'precision', 'predicted', average, warn_for)


| Epoch: 33 | Train Loss: 0.551 | Train Acc: 88.33% | Val. Loss: 0.558 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 34 | Train Loss: 0.547 | Train Acc: 88.43% | Val. Loss: 0.557 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 35 | Train Loss: 0.545 | Train Acc: 88.53% | Val. Loss: 0.556 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 36 | Train Loss: 0.545 | Train Acc: 88.40% | Val. Loss: 0.556 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 37 | Train Loss: 0.544 | Train Acc: 88.53% | Val. Loss: 0.555 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 38 | Train Loss: 0.544 | Train Acc: 88.40% | Val. Loss: 0.554 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 39 | Train Loss: 0.541 | Train Acc: 88.70% | Val. Loss: 0.553 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 40 | Train Loss: 0.542 | Train Acc: 88.60% | Val. Loss: 0.552 | Val. Acc: 84.86% |Macro avg f1 0.45904

  'precision', 'predicted', average, warn_for)


| Epoch: 01 | Train Loss: 0.805 | Train Acc: 15.97% | Val. Loss: 1.268 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 02 | Train Loss: 0.934 | Train Acc: 88.83% | Val. Loss: 1.248 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 03 | Train Loss: 0.917 | Train Acc: 88.83% | Val. Loss: 0.847 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 04 | Train Loss: 0.623 | Train Acc: 88.83% | Val. Loss: 0.514 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 05 | Train Loss: 0.391 | Train Acc: 88.83% | Val. Loss: 0.413 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 06 | Train Loss: 0.344 | Train Acc: 88.83% | Val. Loss: 0.442 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 07 | Train Loss: 0.396 | Train Acc: 88.83% | Val. Loss: 0.445 | Val. Acc: 84.86% |Macro avg f1 0.45904173106646057% |
| Epoch: 08 | Train Loss: 0.398 | Train Acc: 88.83% | Val. Loss: 0.418 | Val. Acc: 84.86% |Macro avg f1 0.45904