In [None]:
import torch
from transformers import AdamW, AutoTokenizer, AutoModelForSequenceClassification, BertTokenizer, BertConfig, BertForSequenceClassification, BertPreTrainedModel, BertModel
import json
from torch.utils.data import TensorDataset, random_split, Subset
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import TrainingArguments, Trainer
from torch import nn
import torch.nn.functional as F
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import f1_score, precision_recall_fscore_support
import pickle
import time
import numpy as np
import os
from collections import Counter
import datetime
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"
device = 'cuda'

In [4]:
test = json.load(open('..\Sentence_retrieval_result\CHEF_wiki_train_0511SBDA50e0519_BM25Fv3ALL_0607.json', 'r', encoding='utf-8'))
test = test[11620:]
labels_ = [row['label'] for row in test]
sentence = json.load(open('..\Sentence_retrieval_result\CHEF_wiki_train_0511SBDA50e0519_BM25Fv3ALL_0607.json', 'r', encoding='utf-8'))
sentence = sentence[11620:]

In [None]:
# 放入 /CHEF/Predict model_training/predict model 下的資料夾
model_state_dict = torch.load('', map_location=torch.device('cuda'))
# 放入 /sbert/model 下的資料夾
model = BertForSequenceClassification.from_pretrained('', state_dict=model_state_dict, num_labels=3,output_attentions = False,output_hidden_states = False)
model.to(device)
tokenizer = BertTokenizer.from_pretrained('')

In [None]:
input_ids = []
attention_masks = []
labels = []
for i in range(len(sentence)):
    encoded_dict = tokenizer.encode_plus(
        sentence[i],  # Sentence to encode.
        add_special_tokens=False,  # Add '[CLS]' and '[SEP]'
        max_length= 512,  # Pad & truncate all sentences.
        padding='max_length',
        return_attention_mask=True,  # Construct attn. masks.
        return_tensors='pt',  # Return pytorch tensors.
        truncation=True
    )
    # Add the encoded sentence to the list.
    input_ids.append(encoded_dict['input_ids'])
    # And its attention mask (simply differentiates padding from non-padding).
    attention_masks.append(encoded_dict['attention_mask'])
    #labels.append(2)
    labels.append(labels_[i])

In [None]:
input_ids = torch.cat(input_ids, dim=0).to(device)
attention_masks = torch.cat(attention_masks, dim=0).to(device)
labels = torch.tensor(labels, device='cuda')
test_dataset = TensorDataset(input_ids, attention_masks,labels)
test_dataloader = DataLoader(
        test_dataset,
        sampler=SequentialSampler(test_dataset),   
        batch_size=8 
    )

In [None]:
def format_time(elapsed):
    elapsed_rounded = int(round((elapsed)))
    # Format as hh:mm:ss
    return str(datetime.timedelta(seconds=elapsed_rounded))

In [None]:
best_microf1 = 0
best_macrof1 = 0
best_recall = 0
best_precision = 0
best_prediction = None
best_ground_truth = None
t0 = time.time()
# Put the model in evaluation mode
#model.forward()
model.eval()
print("Running Validation...")
# Tracking variables
total_eval_accuracy = 0
total_eval_loss = 0
nb_eval_steps = 0
all_prediction = np.array([])
all_ground_truth = np.array([])
all_logits = np.array([])
# Evaluate data for one epoch
for batch in test_dataloader:
    # Unpack this training batch from our dataloader.
    b_input_ids = batch[0].to(device)
    b_input_mask = batch[1].to(device)
    b_labels = batch[2].to(device)
    with torch.no_grad():
        outputs = model(
            b_input_ids, 
            token_type_ids=None, 
            attention_mask=b_input_mask,
            labels=b_labels
        )
        loss, logits = outputs[0], outputs[1]
    # Accumulate the validation loss.
    total_eval_loss += loss.sum().item()
    # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to('cpu').numpy()

    pred_flat = np.argmax(logits, axis=1).flatten()
    labels_flat = label_ids.flatten()
    all_prediction = np.concatenate((all_prediction, pred_flat), axis=None)
    all_ground_truth = np.concatenate((all_ground_truth, labels_flat), axis=None)
    if len(all_logits) == 0:
        all_logits = logits
    else:
        all_logits = np.concatenate((all_logits, logits), axis=0)

# Calculate the average loss over all of the batches.
avg_val_loss = total_eval_loss / len(test_dataloader)
# Measure how long the validation run took.
validation_time = format_time(time.time() - t0)
print('Validation Elapsed: {:}.'.format(validation_time))
c = Counter()
for pred in all_prediction:
    c[int(pred)] += 1
print(c)
pre, recall, f1, _ = precision_recall_fscore_support(all_ground_truth, all_prediction, average='micro')
print("       F1 (micro): {:.2%}".format(f1))
microf1 = f1
pre, recall, f1, _ = precision_recall_fscore_support(all_ground_truth, all_prediction, average='macro')
print("Precision (macro): {:.2%}".format(pre))
print("   Recall (macro): {:.2%}".format(recall))
print("       F1 (macro): {:.2%}".format(f1))

In [None]:
# 放入 \CHEF\predict_label\predict_result 下的資料夾
with open('.\CHEF\predict_label\predict_result\all_prediction_0511SBDA50e0519_epoch16_BM25Fv3ALL_r_0530.pickle', 'wb') as f:
    pickle.dump(all_prediction, f)

In [None]:
all_prediction