In [1]:
import pandas as pd
import os
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, RobertaConfig

from smart_pytorch import SMARTLoss, kl_loss, sym_kl_loss
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
from torch.utils.data import (
    Dataset, 
    DataLoader, 
    RandomSampler, 
    SequentialSampler
)
import math 
from transformers.optimization import (
    AdamW, 
    get_linear_schedule_with_warmup
)
from sklearn.metrics import (
    confusion_matrix,
    matthews_corrcoef,
    accuracy_score,
    roc_curve,
    auc,
    average_precision_score,
    f1_score,
)
from scipy.special import softmax
from torch.nn import CrossEntropyLoss
from tqdm import tqdm
import datasets

In [2]:
print(torch.version.cuda)
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.backends.cudnn.enabled)

11.7
1.13.1
True
True


In [3]:
train_data = datasets.load_dataset('zeroshot/twitter-financial-news-sentiment', split='train')
test_data = datasets.load_dataset('zeroshot/twitter-financial-news-sentiment', split='validation')

Using custom data configuration zeroshot--twitter-financial-news-sentiment-ccca0f3c622c5b67
Found cached dataset csv (C:/Users/ruoxinli3/.cache/huggingface/datasets/zeroshot___csv/zeroshot--twitter-financial-news-sentiment-ccca0f3c622c5b67/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)
Using custom data configuration zeroshot--twitter-financial-news-sentiment-ccca0f3c622c5b67
Found cached dataset csv (C:/Users/ruoxinli3/.cache/huggingface/datasets/zeroshot___csv/zeroshot--twitter-financial-news-sentiment-ccca0f3c622c5b67/0.0.0/6b34fb8fcf56f7c8ba51dc895bfa2bfbe43546f190a60fcf74bb5e8afdcc2317)


In [4]:
train_data['text']

['$BYND - JPMorgan reels in expectations on Beyond Meat https://t.co/bd0xbFGjkT',
 '$CCL $RCL - Nomura points to bookings weakness at Carnival and Royal Caribbean https://t.co/yGjpT2ReD3',
 '$CX - Cemex cut at Credit Suisse, J.P. Morgan on weak building outlook https://t.co/KN1g4AWFIb',
 '$ESS: BTIG Research cuts to Neutral https://t.co/MCyfTsXc2N',
 '$FNKO - Funko slides after Piper Jaffray PT cut https://t.co/z37IJmCQzB',
 '$FTI - TechnipFMC downgraded at Berenberg but called Top Pick at Deutsche Bank https://t.co/XKcPDilIuU',
 '$GM - GM loses a bull https://t.co/tdUfG5HbXy',
 '$GM: Deutsche Bank cuts to Hold https://t.co/7Fv1ZiFZBS',
 '$GTT: Cowen cuts to Market Perform',
 '$HNHAF $HNHPD $AAPL - Trendforce cuts iPhone estimate after Foxconn delay https://t.co/rlnEwzlzzS',
 "$HOG - Moody's warns on Harley-Davidson https://t.co/LurHBEadeU",
 '$HXL - Citing aero ties, Wells slashes PT on Hexcel https://t.co/wU5P2i8WBU',
 '$I - Intelsat cut to Market Perform at Raymond James https://t.c

In [5]:
model_name = "roberta-large"

num_labels = 3
device = torch.device("cuda")

tokenizer_name = model_name

max_seq_length = 128
train_batch_size = 8
test_batch_size = 8
warmup_ratio = 0.06
weight_decay=0.0
gradient_accumulation_steps = 1
num_train_epochs = 15
learning_rate = 1e-05
adam_epsilon = 1e-08

In [6]:
class SMARTRobertaClassificationModel(nn.Module):
    
    def __init__(self, model, weight = 0.02):
        super().__init__()
        self.model = model 
        self.weight = weight

    def forward(self, input_ids, attention_mask, labels):

        # Get initial embeddings 
        embed = self.model.roberta.embeddings(input_ids) 

        # Define eval function 
        def eval(embed):
            outputs = self.model.roberta(inputs_embeds=embed, attention_mask=attention_mask)
            pooled = outputs[0] 
            logits = self.model.classifier(pooled) 
            return logits 
        
        # Define SMART loss
        smart_loss_fn = SMARTLoss(eval_fn = eval, loss_fn = kl_loss, loss_last_fn = sym_kl_loss)
        # Compute initial (unperturbed) state 
        state = eval(embed)
        # Apply classification loss 
        loss = F.cross_entropy(state.view(-1, 3), labels.view(-1))
        # Apply smart loss 
        loss += self.weight * smart_loss_fn(embed, state)
        
        return state, loss
    
tokenizer = AutoTokenizer.from_pretrained(model_name)
# tokenizer = AutoTokenizer.from_pretrained('./roberta_pretrained_fin')

config = RobertaConfig.from_pretrained(model_name, num_labels=num_labels)
model = AutoModelForSequenceClassification.from_pretrained('./roberta_pretrained_fin_0.5_e1', config = config)

model_smart = SMARTRobertaClassificationModel(model)

Some weights of the model checkpoint at ./roberta_pretrained_fin_0.5_e1 were not used when initializing RobertaForSequenceClassification: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.bias', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at ./roberta_pretrained_fin_0.5_e1 and are newly initialized: ['classifier.out_proj.weight', 'clas

In [7]:
print('Model=\n',model_smart,'\n')

Model=
 SMARTRobertaClassificationModel(
  (model): RobertaForSequenceClassification(
    (roberta): RobertaModel(
      (embeddings): RobertaEmbeddings(
        (word_embeddings): Embedding(50265, 1024, padding_idx=1)
        (position_embeddings): Embedding(514, 1024, padding_idx=1)
        (token_type_embeddings): Embedding(1, 1024)
        (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): RobertaEncoder(
        (layer): ModuleList(
          (0): RobertaLayer(
            (attention): RobertaAttention(
              (self): RobertaSelfAttention(
                (query): Linear(in_features=1024, out_features=1024, bias=True)
                (key): Linear(in_features=1024, out_features=1024, bias=True)
                (value): Linear(in_features=1024, out_features=1024, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output): RobertaSelfOutpu

In [8]:
class MyClassificationDataset(Dataset):
    
    def __init__(self, data, tokenizer):
        text, labels = data
        self.examples = tokenizer(text=text,text_pair=None,truncation=True,padding="max_length",
                                  max_length=max_seq_length,return_tensors="pt")
        self.labels = torch.tensor(labels, dtype=torch.long)
        

    def __len__(self):
        return len(self.examples["input_ids"])

    def __getitem__(self, index):
        return {key: self.examples[key][index] for key in self.examples}, self.labels[index]

train_examples = (train_data['text'], train_data['label'])
train_dataset = MyClassificationDataset(train_examples,tokenizer)

test_examples = (test_data['text'], test_data['label'])
test_dataset = MyClassificationDataset(test_examples,tokenizer)

In [9]:
def get_inputs_dict(batch):
    inputs = {key: value.squeeze(1).to(device) for key, value in batch[0].items()}
    inputs["labels"] = batch[1].to(device)
    return inputs

train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset,shuffle=True,batch_size=train_batch_size)

test_sampler = SequentialSampler(test_dataset)
test_dataloader = DataLoader(test_dataset,sampler=test_sampler, batch_size=test_batch_size)

#Extract a batch as sanity-check
# batch = get_inputs_dict(next(iter(train_dataloader)))
# input_ids = batch['input_ids'].to(device)
# attention_mask = batch['attention_mask'].to(device)
# labels = batch['labels'].to(device)

# print(batch)

In [10]:
t_total = len(train_dataloader) // gradient_accumulation_steps * num_train_epochs
optimizer_grouped_parameters = []
custom_parameter_names = set()
no_decay = ["bias", "LayerNorm.weight"]
optimizer_grouped_parameters.extend(
    [
        {
            "params": [
                p
                for n, p in model_smart.named_parameters()
                if n not in custom_parameter_names and not any(nd in n for nd in no_decay)
            ],
            "weight_decay": weight_decay,
        },
        {
            "params": [
                p
                for n, p in model_smart.named_parameters()
                if n not in custom_parameter_names and any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]
)

warmup_steps = math.ceil(t_total * warmup_ratio)
optimizer = AdamW(optimizer_grouped_parameters, lr=learning_rate, eps=adam_epsilon)
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=warmup_steps, num_training_steps=t_total)




In [11]:
def compute_metrics(preds, model_outputs, labels, eval_examples=None, multi_label=True):
    assert len(preds) == len(labels)
    mismatched = labels != preds
    #wrong = [i for (i, v) in zip(eval_examples, mismatched) if v.any()]
    mcc = matthews_corrcoef(labels, preds)
    acc = accuracy_score(labels, preds)
    f1 = f1_score(labels, preds, average='macro')
    con_m = confusion_matrix(labels, preds, labels=[0, 1, 2])
#     scores = np.array([softmax(element)[1] for element in model_outputs])
#     fpr, tpr, thresholds = roc_curve(labels, scores)
#     auroc = auc(fpr, tpr)
#     auprc = average_precision_score(labels, scores)
    return (
        {
            **{"mcc": mcc, "acc":acc, "f1": f1},
        },
        con_m
    )

def print_confusion_matrix(result):
    print('confusion matrix:')
    print('            predicted    ')
    print('          0     |     1')
    print('    ----------------------')
    print('   0 | ',format(result['tn'],'5d'),' | ',format(result['fp'],'5d'))
    print('gt -----------------------')
    print('   1 | ',format(result['fn'],'5d'),' | ',format(result['tp'],'5d'))
    print('---------------------------------------------------')


In [12]:
torch.cuda.empty_cache()

In [13]:
model_smart.to(device)


PATH = "SMART_Roberta_large_FinancialTweets/exp8-0.5/"+str(9)
model_smart.load_state_dict(torch.load(PATH))
model_smart.zero_grad()

for epoch in range(num_train_epochs):

    model_smart.train()
    epoch_loss = []
    
    for batch in tqdm(train_dataloader):
        batch = get_inputs_dict(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        logits, loss = model_smart(input_ids, attention_mask=attention_mask, labels=labels)
#         loss = outputs[0]
        loss.backward()
        optimizer.step()
        scheduler.step()
        model_smart.zero_grad()
        epoch_loss.append(loss.item())
        
#    SAVE
    PATH = "SMART_Roberta_large_FinancialTweets/exp8-0.5/"+str(epoch)
    torch.save(model_smart.state_dict(), PATH)
    
        
    
    print('epoch',epoch,'Training avg loss',np.mean(epoch_loss))


100%|██████████████████████████████████████| 1193/1193 [12:10<00:00,  1.63it/s]


epoch 10 Training avg loss 0.04041001987160631


100%|██████████████████████████████████████| 1193/1193 [12:20<00:00,  1.61it/s]


epoch 11 Training avg loss 0.06085722152343662


100%|██████████████████████████████████████| 1193/1193 [13:37<00:00,  1.46it/s]


epoch 12 Training avg loss 0.03338276115022654


100%|██████████████████████████████████████| 1193/1193 [13:03<00:00,  1.52it/s]


epoch 13 Training avg loss 0.024115451914765728


100%|██████████████████████████████████████| 1193/1193 [20:03<00:00,  1.01s/it]


epoch 14 Training avg loss 0.02282985494535905


In [13]:
model_smart.to(device)

for epoch in range(15):
    
    #evaluate model with test_df at the end of the epoch.
    eval_loss = 0.0
    nb_eval_steps = 0
    n_batches = len(test_dataloader)
    preds = np.empty((len(test_dataset), num_labels))
    out_label_ids = np.empty((len(test_dataset)))
    PATH = "SMART_Roberta_large_FinancialTweets/exp1/"+str(epoch)
    model_smart.load_state_dict(torch.load(PATH))
    model_smart.eval()
    
    for i,test_batch in enumerate(test_dataloader):
#         with torch.no_grad():
        test_batch = get_inputs_dict(test_batch)
        input_ids = test_batch['input_ids'].to(device)
        attention_mask = test_batch['attention_mask'].to(device)
        labels = test_batch['labels'].to(device)
        logits, tmp_eval_loss = model_smart(input_ids, attention_mask=attention_mask, labels=labels)
    #             tmp_eval_loss, logits = outputs[:2]
        eval_loss += tmp_eval_loss.item()
            
        nb_eval_steps += 1
        start_index = test_batch_size * i
        end_index = start_index + test_batch_size if i != (n_batches - 1) else len(test_dataset)
#         print(logits)
        preds[start_index:end_index] = logits.detach().cpu().numpy()
        out_label_ids[start_index:end_index] = test_batch["labels"].detach().cpu().numpy()
        
    eval_loss = eval_loss / nb_eval_steps
    model_outputs = preds
    preds = np.argmax(preds, axis=1)
    result, con_m = compute_metrics(preds, model_outputs, out_label_ids)
    
    #print('epoch',epoch,'Training avg loss',np.mean(epoch_loss))
    print('epoch',epoch,'Testing  avg loss',eval_loss)
    print(result)
    print(con_m)
    print('---------------------------------------------------\n')

epoch 0 Testing  avg loss 0.2677054530489305
{'mcc': 0.7892972013023432, 'acc': 0.8919597989949749, 'f1': 0.864619295016417}
[[ 299    8   40]
 [   1  395   79]
 [  81   49 1436]]
---------------------------------------------------

epoch 1 Testing  avg loss 0.275108687666846
{'mcc': 0.8062561640207809, 'acc': 0.8919597989949749, 'f1': 0.8706068385881022}
[[ 329    6   12]
 [   4  435   36]
 [ 127   73 1366]]
---------------------------------------------------

epoch 2 Testing  avg loss 0.2285561748127372
{'mcc': 0.8299002979594252, 'acc': 0.9108040201005025, 'f1': 0.8900869308358702}
[[ 311    5   31]
 [   4  430   41]
 [  62   70 1434]]
---------------------------------------------------

epoch 3 Testing  avg loss 0.26236413746260817
{'mcc': 0.8234544963666155, 'acc': 0.9095477386934674, 'f1': 0.8871531305327931}
[[ 298    7   42]
 [   2  419   54]
 [  45   66 1455]]
---------------------------------------------------

epoch 4 Testing  avg loss 0.3012679757383523
{'mcc': 0.8258712896

In [13]:
# df_sample =  pd.read_csv("../data/tweets/stockerbot-export-test-2.csv")

df_sample =  pd.read_csv("../data/tweets.csv")
df_sample

new_labels=np.zeros(704)

# for l in df_sample['label'].tolist():
#     if l == 2:
#         new_labels.append(1)
#     elif l==1:
#         new_labels.append(2)
#     else:
#         new_labels.append(0)
# print(new_labels)
sample_examples = (df_sample['text'].astype(str).tolist(), new_labels)
# sample_examples = (df_sample['clean_text'].astype(str).tolist(), new_labels)
sample_dataset = MyClassificationDataset(sample_examples,tokenizer)

sample_dataloader = DataLoader(sample_dataset,shuffle=False,batch_size=test_batch_size)

print(sample_dataloader)

<torch.utils.data.dataloader.DataLoader object at 0x000001C8E880EBB0>


In [18]:
model_smart.to(device)
pred_final = []
for epoch in range(5,7):
    
    #evaluate model with test_df at the end of the epoch.
    eval_loss = 0.0
    nb_eval_steps = 0
    n_batches = len(sample_dataloader)
    preds = np.empty((len(sample_dataset), num_labels))
    out_label_ids = np.empty((len(sample_dataset)))
    
    PATH = "SMART_Roberta_large_FinancialTweets/exp8-0.5/"+str(epoch)
    model_smart.load_state_dict(torch.load(PATH))
    model_smart.eval()
    
    for i,test_batch in enumerate(sample_dataloader):
#         with torch.no_grad():
        test_batch = get_inputs_dict(test_batch)
        input_ids = test_batch['input_ids'].to(device)
        attention_mask = test_batch['attention_mask'].to(device)
        labels = test_batch['labels'].to(device)
        logits, tmp_eval_loss = model_smart(input_ids, attention_mask=attention_mask, labels=labels)
    #             tmp_eval_loss, logits = outputs[:2]
        eval_loss += tmp_eval_loss.item()
            
        nb_eval_steps += 1
        start_index = test_batch_size * i
        end_index = start_index + test_batch_size if i != (n_batches - 1) else len(sample_dataset)
#         print(logits)
        preds[start_index:end_index] = logits.detach().cpu().numpy()
        out_label_ids[start_index:end_index] = test_batch["labels"].detach().cpu().numpy()
        
    eval_loss = eval_loss / nb_eval_steps
    model_outputs = preds
    preds = np.argmax(preds, axis=1)
    result, con_m = compute_metrics(preds, model_outputs, out_label_ids)
    if epoch == 6:
        pred_final = preds 
    
    #print('epoch',epoch,'Training avg loss',np.mean(epoch_loss))
#     print('epoch',epoch,'Testing  avg loss',eval_loss)
#     print(result)
#     print(con_m)
#     print('---------------------------------------------------\n')

epoch 5 Testing  avg loss 6.138319451700557
{'mcc': 0.0, 'acc': 0.16761363636363635, 'f1': 0.09570154095701543}
[[118  96 490]
 [  0   0   0]
 [  0   0   0]]
---------------------------------------------------

epoch 6 Testing  avg loss 5.798707563768733
{'mcc': 0.0, 'acc': 0.21022727272727273, 'f1': 0.11580594679186229}
[[148  79 477]
 [  0   0   0]
 [  0   0   0]]
---------------------------------------------------



In [20]:
pred_final_df=pd.DataFrame(pred_final)
pred_final_df.columns = ['pred_sen']
df_sample = df_sample.join(pred_final_df)

In [21]:
df_sample.to_csv("../data/tweets.csv",index=False)

In [22]:
df_sample

Unnamed: 0,id,text,timestamp,source,symbols,company_names,url,verified,pred_sen
0,1.629760e+18,As travel demand continues to surge Marriott I...,Sun Feb 26 08:31:37 +0000 2023,YahooFinance,DDR-MAR,DDR*Marriott International,https://twitter.com/i/web/status/1629761089310...,,2
1,1.629770e+18,Frustration with PowerPoint the iconic slide p...,Sun Feb 26 09:20:08 +0000 2023,Forbes,MSFT,Microsoft,https://twitter.com/i/web/status/1629773296039...,,2
2,1.629790e+18,Twitter lays off at least 50 in relentless cos...,Sun Feb 26 10:25:05 +0000 2023,Reuters,TWTR,Twitter,http://reut.rs/3xUHiAktrue,,0
3,1.629810e+18,Tyler “Ninja” Blevins Joins GameSquare As Inno...,Sun Feb 26 11:40:07 +0000 2023,Forbes,SQ,Square,https://twitter.com/i/web/status/1629808523470...,,2
4,1.629810e+18,BlackRock's Stephen Laipply offers his predict...,Sun Feb 26 11:56:05 +0000 2023,business,BLK-PPL,BlackRock*PPL,https://trib.al/wwpt9fvtrue,,2
...,...,...,...,...,...,...,...,...,...
699,1.640000e+18,The probes are piling up for Tesla https://t.c...,Mon Mar 13 05:00:10 +0000 2023,TheStreet,TSLA,Tesla,https://trib.al/TvXRG0Xtrue,,0
700,1.640000e+18,Caterpillar union workers vote in favor of six...,Mon Mar 13 05:40:23 +0000 2023,Reuters,CAT,Caterpillar,http://reut.rs/3yukmbvtrue,,2
701,1.640000e+18,Carl Icahn prepares for proxy fight at Illumin...,Mon Mar 13 06:45:21 +0000 2023,Reuters,ILMN,Illumina,http://reut.rs/3JeR2uvtrue,,2
702,1.640000e+18,Caterpillar union workers vote in favor of six...,Mon Mar 13 08:45:26 +0000 2023,Reuters,CAT,Caterpillar,http://reut.rs/427UxLFtrue,,2


In [12]:
model.to(device)

model.zero_grad()

for epoch in range(num_train_epochs):

    model.train()
    epoch_loss = []
    
    for batch in tqdm(train_dataloader):
        batch = get_inputs_dict(batch)
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs[0]
        loss.backward()
        optimizer.step()
        scheduler.step()
        model.zero_grad()
        epoch_loss.append(loss.item())
        
#    SAVE
    PATH = "SMART_Roberta_large_FinancialTweets/exp_roberta/"+str(epoch)
    torch.save(model.state_dict(), PATH)
    
        
#     evaluate model with test_df at the end of the epoch.
    eval_loss = 0.0
    nb_eval_steps = 0
    n_batches = len(test_dataloader)
    preds = np.empty((len(test_dataset), num_labels))
    out_label_ids = np.empty((len(test_dataset)))
    model.eval()
    
    for i,test_batch in enumerate(test_dataloader):
        with torch.no_grad():
            test_batch = get_inputs_dict(test_batch)
            input_ids = test_batch['input_ids'].to(device)
            attention_mask = test_batch['attention_mask'].to(device)
            labels = test_batch['labels'].to(device)
            outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
            tmp_eval_loss, logits = outputs[:2]
            eval_loss += tmp_eval_loss.item()
            
        nb_eval_steps += 1
        start_index = test_batch_size * i
        end_index = start_index + test_batch_size if i != (n_batches - 1) else len(test_dataset)
        preds[start_index:end_index] = logits.detach().cpu().numpy()
        out_label_ids[start_index:end_index] = test_batch["labels"].detach().cpu().numpy()
        
    eval_loss = eval_loss / nb_eval_steps
    model_outputs = preds
    preds = np.argmax(preds, axis=1)
    result, con_m = compute_metrics(preds, model_outputs, out_label_ids)
    
    print('epoch',epoch,'Training avg loss',np.mean(epoch_loss))
    print('epoch',epoch,'Testing  avg loss',eval_loss)
    print(result) 
    print(con_m)
    print('---------------------------------------------------\n')


100%|██████████████████████████████████████| 1193/1193 [05:15<00:00,  3.78it/s]


epoch 0 Training avg loss 0.5037835334355054
epoch 0 Testing  avg loss 0.2689392535252665
{'mcc': 0.7885300602234105, 'acc': 0.8944723618090452, 'f1': 0.8622334200815785}
[[ 272    2   73]
 [   9  386   80]
 [  39   49 1478]]
---------------------------------------------------



100%|██████████████████████████████████████| 1193/1193 [05:16<00:00,  3.77it/s]


epoch 1 Training avg loss 0.23639989512221543
epoch 1 Testing  avg loss 0.2511897973948311
{'mcc': 0.8180867914851837, 'acc': 0.9045226130653267, 'f1': 0.8822751462945799}
[[ 303    7   37]
 [   8  429   38]
 [  46   92 1428]]
---------------------------------------------------



100%|██████████████████████████████████████| 1193/1193 [05:15<00:00,  3.78it/s]


epoch 2 Training avg loss 0.12902271561012335
epoch 2 Testing  avg loss 0.2894112297107371
{'mcc': 0.8338169709276786, 'acc': 0.911641541038526, 'f1': 0.89026986318448}
[[ 313    8   26]
 [   8  437   30]
 [  60   79 1427]]
---------------------------------------------------



100%|██████████████████████████████████████| 1193/1193 [05:13<00:00,  3.81it/s]


epoch 3 Training avg loss 0.07416152946036411
epoch 3 Testing  avg loss 0.30955295761763724
{'mcc': 0.8369685492612786, 'acc': 0.9149916247906198, 'f1': 0.8938815062077428}
[[ 308    7   32]
 [   6  431   38]
 [  50   70 1446]]
---------------------------------------------------



100%|██████████████████████████████████████| 1193/1193 [05:13<00:00,  3.81it/s]


epoch 4 Training avg loss 0.05168614226681464
epoch 4 Testing  avg loss 0.31890937290456145
{'mcc': 0.822476828480644, 'acc': 0.9074539363484088, 'f1': 0.8820196896457718}
[[ 295   19   33]
 [   6  429   40]
 [  42   81 1443]]
---------------------------------------------------



100%|██████████████████████████████████████| 1193/1193 [05:13<00:00,  3.81it/s]


epoch 5 Training avg loss 0.034925281233695815
epoch 5 Testing  avg loss 0.37698892763370795
{'mcc': 0.8221804262482968, 'acc': 0.907035175879397, 'f1': 0.8835201140367973}
[[ 311    9   27]
 [   7  419   49]
 [  66   64 1436]]
---------------------------------------------------



100%|██████████████████████████████████████| 1193/1193 [05:13<00:00,  3.81it/s]


epoch 6 Training avg loss 0.01687900119808072
epoch 6 Testing  avg loss 0.43579950665687733
{'mcc': 0.8235048975929922, 'acc': 0.9078726968174204, 'f1': 0.8852007786381844}
[[ 309    8   30]
 [   6  422   47]
 [  60   69 1437]]
---------------------------------------------------



100%|██████████████████████████████████████| 1193/1193 [05:13<00:00,  3.81it/s]


epoch 7 Training avg loss 0.01539710214739581
epoch 7 Testing  avg loss 0.44542310417844505
{'mcc': 0.8315574421580846, 'acc': 0.9128978224455612, 'f1': 0.8903290355473}
[[ 313    4   30]
 [   8  414   53]
 [  64   49 1453]]
---------------------------------------------------



100%|██████████████████████████████████████| 1193/1193 [05:13<00:00,  3.80it/s]


epoch 8 Training avg loss 0.007365325339466286
epoch 8 Testing  avg loss 0.4729526738369759
{'mcc': 0.8343226767992002, 'acc': 0.9137353433835846, 'f1': 0.8921025441483978}
[[ 312    5   30]
 [   7  424   44]
 [  60   60 1446]]
---------------------------------------------------



100%|██████████████████████████████████████| 1193/1193 [05:13<00:00,  3.81it/s]


epoch 9 Training avg loss 0.0038552889222936795
epoch 9 Testing  avg loss 0.4913047444435786
{'mcc': 0.8315585816316732, 'acc': 0.9120603015075377, 'f1': 0.8900421908188898}
[[ 310    7   30]
 [   7  426   42]
 [  57   67 1442]]
---------------------------------------------------

