In [3]:
import torch
import numpy as np
import os
import pandas as pd
from sklearn.metrics import f1_score, accuracy_score, precision_score, recall_score

# 저장된 prediction test

In [4]:
model_name = 'bert-base-uncased'

train_df = pd.read_pickle(os.path.join(f'{model_name}_dataset', 'train.pkl'))
val_df = pd.read_pickle(os.path.join(f'{model_name}_dataset', 'val.pkl'))
# test_df = pd.read_pickle(os.path.join(f'{model_name}_dataset', 'test.pkl'))

train_true_label = train_df['label']
val_true_label = val_df['label']
# test_true_label = test_df['label']

In [5]:
def label_acc(df, predict, label):
    index = df.loc[df['label']==label].index
    return (predict[index] == label).sum() / len(index)

from collections import Counter

def document_acc(df, predict):
    is_correct = []
    docs_index = df['docs_index'].drop_duplicates()
    for i in docs_index:
        index = df.loc[df['docs_index']==i].index
        docs_label = df.loc[df['docs_index']==i]['label'].iloc[0]
        predicted_label = Counter(predict[index]).most_common(1)[0][0]
        is_correct.append(predicted_label==docs_label)
    
    return np.array(is_correct).sum() / len(is_correct)

In [5]:
train_f1 = []
train_acc = {
    'label0': [],
    'label1': [],
    'label2': [],
    'whole': [],
    'docs': []
}
val_f1 = []
val_acc = {
    'label0': [],
    'label1': [],
    'label2': [],
    'whole': [],
    'docs': []
}

for i in range(20):
    valid_predict = torch.load(os.path.join(f'{model_name}_val_predictions', f'{i}valid.pkl'), map_location=torch.device('cpu'))
    val_flatten_predictions = np.stack([torch.softmax(torch.Tensor(p), dim=0).cpu().detach().numpy() for batch in valid_predict for p in batch])

    valid_predicts_label = np.argmax(val_flatten_predictions, axis=1)

    val_f1.append(f1_score(val_true_label, valid_predicts_label, average='macro'))
    val_acc['whole'].append(accuracy_score(val_true_label, valid_predicts_label))

    val_acc['docs'].append(document_acc(val_df, valid_predicts_label))

    print(f"{i}번째 점수")
    print(f"{i}train f1: {train_f1[i]}")
    print(f"{i}val f1: {val_f1[i]}")
    print(f"{i}train acc: {train_acc['whole'][i]}")
    print(f"{i}val acc: {val_acc['whole'][i]}")
    print()

0번째 점수
0train f1: 0.2692649636770139
0val f1: 0.12162456515965192
0train acc: 0.29083102283883355
0val acc: 0.2112343563043689

1번째 점수
1train f1: 0.8332731324034351
1val f1: 0.9478280006304473
1train acc: 0.8575189064977938
1val acc: 0.9556317304271901

2번째 점수
2train f1: 0.9446358521658021
2val f1: 0.9686612874636973
2train acc: 0.9527040876873119
2val acc: 0.9751641173236749

3번째 점수
3train f1: 0.9604778430950581
3val f1: 0.9743916530696569
3train acc: 0.9665131131800675
3val acc: 0.979626814992077

4번째 점수
4train f1: 0.9715238744833451
4val f1: 0.9705406627336955
4train acc: 0.9764707151564843
4val acc: 0.9764576528797335

5번째 점수
5train f1: 0.9844364237579004
5val f1: 0.9737055194558649
5train acc: 0.9871187684075389
5val acc: 0.9785919865472302

6번째 점수
6train f1: 0.9884969691059466
6val f1: 0.9690714968749287
6train acc: 0.9903660470585697
6val acc: 0.9752934708792808

7번째 점수
7train f1: 0.9898479882421728
7val f1: 0.9707131755862329
7train acc: 0.9917577379090114
7val acc: 0.979465123